Bad Performance with VBO and shaders

Started by tlm, February 09, 2014, 15:07:33

Previous topic - Next topic

tlm

I try to render 2048 quads in modern OpenGL with VBOs and shaders. But the performance is horrible. I've just about 50 FPS by using the following code.  I think, I call something every frame, what slows down the application, but I don't know what it is.


Also I think, that it is very slow to send the position, view and projection matrix every frame to the shader, if only the objects position changed.


What can I do to get this code more faster?


Model Code:

   
import java.nio.ByteBuffer;
    import java.nio.FloatBuffer;
    import java.util.Arrays;
    
    import org.lwjgl.BufferUtils;
    import org.lwjgl.opengl.GL11;
    import org.lwjgl.opengl.GL15;
    import org.lwjgl.opengl.GL20;
    import org.lwjgl.opengl.GL30;
    import org.lwjgl.util.vector.Matrix4f;
    import org.lwjgl.util.vector.Vector3f;
    
    public abstract class Model
    {
    	private TexturedVertex[] vertices;
    	private byte[] indices;
    	
    	private FloatBuffer verticesBuffer;
    	private ByteBuffer indicesBuffer;
    
    	private Vector3f position;
    	private Vector3f angle;
    	private Vector3f scale;
    	
    	int shaderProgram;
    	int indicesCount;
    	
    	private int projectionMatrixLocation;
    	private int viewMatrixLocation;
    	private int modelMatrixLocation;
    	
    	int vaoID;
    	int vboID;
    	int vboiID;
    
    	
    	public void load (TexturedVertex[] vertices, byte[] indices, Shader shader)
    	{
    		this.position = new Vector3f(0, 0, -1);
    		this.angle    = new Vector3f(0, 0, 0);
    		this.scale    = new Vector3f(1, 1, 1);
    		
    		this.vertices = vertices;
    		this.indices = indices;
    		this.shaderProgram = shader.getProgramID();
    		
    		this.generateBuffer();
    		this.generateArrayBufferObjects();
    		this.configureShader();
    	}
    	
    
    	private void generateBuffer()
    	{
    		// === Vertices Buffer === //
    			this.verticesBuffer = BufferUtils.createFloatBuffer(vertices.length * TexturedVertex.elementCount);
    			   
    			for (int i = 0; i < vertices.length; i++)
    			{
    				verticesBuffer.put(vertices[i].getElements());
    				System.out.print   ("XYZW: " + Arrays.toString(vertices[i].getXYZW()));
    				System.out.print   (" RGBA: " + Arrays.toString(vertices[i].getRGBA()));
    				System.out.println (" ST: " + Arrays.toString(vertices[i].getST()));
    			}
    			verticesBuffer.flip();
    
    		// === Generate Indices Buffer === //
    			this.indicesCount = indices.length;			
    			this.indicesBuffer = BufferUtils.createByteBuffer(indicesCount);
    			indicesBuffer.put(indices);
    			indicesBuffer.flip();
    	}
    	
    	private void generateArrayBufferObjects()
    	{
    		// === Generate VAO & VBO === //
    			this.vaoID = GL30.glGenVertexArrays();
    			GL30.glBindVertexArray(vaoID);
    			
    			this.vboID = GL15.glGenBuffers();
    			GL15.glBindBuffer(GL15.GL_ARRAY_BUFFER, vboID);
    			GL15.glBufferData(GL15.GL_ARRAY_BUFFER, verticesBuffer, GL15.GL_STATIC_DRAW);
    		
    			this.vboiID = GL15.glGenBuffers();				
    			GL15.glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, vboiID);
    			GL15.glBufferData(GL15.GL_ELEMENT_ARRAY_BUFFER, indicesBuffer, GL15.GL_STATIC_DRAW);
    
    			System.out.println ();
    			System.out.println ("VAO-ID: #" + vaoID);
    			System.out.println ("VBO-ID: #" + vboID);	
    			System.out.println ("VBOI-ID: #" + vboiID);	
    					
    		// === Put informations to shader === //
    			GL20.glVertexAttribPointer(0, TexturedVertex.positionElementCount,	GL11.GL_FLOAT, false, TexturedVertex.stride, TexturedVertex.positionByteOffset);
    			GL20.glVertexAttribPointer(1, TexturedVertex.colorElementCount,		GL11.GL_FLOAT, false, TexturedVertex.stride, TexturedVertex.colorByteOffset);
    			GL20.glVertexAttribPointer(2, TexturedVertex.textureElementCount,	GL11.GL_FLOAT, false, TexturedVertex.stride, TexturedVertex.textureByteOffset);
    			
    		// === Unbind Buffers === //
    			GL15.glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, 0);	
    			GL15.glBindBuffer(GL15.GL_ARRAY_BUFFER, 0);
    			GL30.glBindVertexArray(0);
    			
    	}
    	
    
    	private void configureShader()
    	{
    		// === Bind shader === //
    			GL20.glUseProgram(shaderProgram);
    		
    		// === Get matrix location === //
    			this.projectionMatrixLocation = GL20.glGetUniformLocation(shaderProgram, "projectionMatrix");
    			this.viewMatrixLocation 	  = GL20.glGetUniformLocation(shaderProgram, "viewMatrix");
    			this.modelMatrixLocation 	  = GL20.glGetUniformLocation(shaderProgram, "modelMatrix");		
    		
    		// === Update the matrix === //
    			this.updateMatrix();
    			
    		// === Unbind shader === //
    			GL20.glUseProgram(0);
    	}	
    	
    	void updateMatrix()
    	{
    		// === Bind shader === //
    			GL20.glUseProgram(shaderProgram);
    		
    		// === Load matrix === //
    			Matrix4f projectionMatrix = GameManager.camera.getProjectionMatrix();
    			Matrix4f viewMatrix = GameManager.camera.getViewMatrix();
    			Matrix4f modelMatrix = new Matrix4f();
    	
    		// === Scale, translate and rotate matrix === //
    			Matrix4f.scale(scale, modelMatrix, modelMatrix);
    			Matrix4f.translate(position, modelMatrix, modelMatrix);
    			Matrix4f.rotate(Util.degreesToRadians(angle.z), new Vector3f(0, 0, 1), modelMatrix, modelMatrix);
    			Matrix4f.rotate(Util.degreesToRadians(angle.y), new Vector3f(0, 1, 0), modelMatrix, modelMatrix);
    			Matrix4f.rotate(Util.degreesToRadians(angle.x), new Vector3f(1, 0, 0), modelMatrix, modelMatrix);
    			
    		// === Apply uniform matrix to shader  === //
    			FloatBuffer matrixBuffer = BufferUtils.createFloatBuffer(16);
    			
    			projectionMatrix.store(matrixBuffer);
    			matrixBuffer.flip();
    			GL20.glUniformMatrix4(projectionMatrixLocation,	false, matrixBuffer);
    			
    			viewMatrix.store(matrixBuffer);
    			matrixBuffer.flip();
    			GL20.glUniformMatrix4(viewMatrixLocation,		false, matrixBuffer);
    			
    			modelMatrix.store(matrixBuffer);
    			matrixBuffer.flip();
    			GL20.glUniformMatrix4(modelMatrixLocation,		false, matrixBuffer);
    			
    		// === Unbind shader === //
    			GL20.glUseProgram(0);
    	}
    
    	public void setPosition (Vector3f newPosition)
    	{
    		this.position = newPosition;
    		this.updateMatrix();
    	}
    	
    	public void setAngle (Vector3f newAngle)
    	{
    		this.angle = newAngle;
    		this.updateMatrix();
    	}
    	
    	public void setScale (Vector3f newAngle)
    	{
    		this.scale = newAngle;
    		this.updateMatrix();
    	}
    }


Render Code:

   
import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.nio.ByteBuffer;
    
    import org.lwjgl.opengl.Display;
    import org.lwjgl.opengl.GL11;
    import org.lwjgl.opengl.GL13;
    import org.lwjgl.opengl.GL15;
    import org.lwjgl.opengl.GL20;
    import org.lwjgl.opengl.GL30;
    import org.lwjgl.util.vector.Vector3f;
    
    import de.matthiasmann.twl.utils.PNGDecoder;
    import de.matthiasmann.twl.utils.PNGDecoder.Format;
    
    public class GameManager
    {
    	private Logger logger;
    	private Profiler profiler;
    	private Window window;
    	public static Camera camera;
    	
    	public GameManager()
    	{
    		
    		init();		
    
    		ModelChest[] chests = new ModelChest[2048];
    		for (int i = 0; i < 2048; i++)
    		{
    			chests[i] = new ModelChest();	
    		}
    		
    		
    		ModelChest chest = new ModelChest();		
    		ModelChest chestB = new ModelChest();		
    		
    		int textureID = loadPNGTexture ("Getigerte-Katze-Baby-Decke.png", GL13.GL_TEXTURE0);
    		
    		while (!window.isCloseRequested())
    		{			
    			GL11.glEnable(GL11.GL_DEPTH_TEST);
    			
    			GL11.glClear(GL11.GL_COLOR_BUFFER_BIT | GL11.GL_DEPTH_BUFFER_BIT);
    			{				
    				chest.setPosition(new Vector3f(1, 0, -0.5F));				
    				GL20.glUseProgram(chest.shaderProgram);
    					
    					GL13.glActiveTexture(GL13.GL_TEXTURE0);
    					GL11.glBindTexture(GL11.GL_TEXTURE_2D, textureID);
    					
    					GL30.glBindVertexArray(chest.vaoID);
    					GL20.glEnableVertexAttribArray(0);
    					GL20.glEnableVertexAttribArray(1);
    					GL20.glEnableVertexAttribArray(2);
    					
    					GL15.glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, chest.vboiID);
    					{
    						GL11.glDrawElements(GL11.GL_TRIANGLES, chest.indicesCount, GL11.GL_UNSIGNED_BYTE, 0);		
    					}
    					GL15.glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, 0);
    		
    					GL20.glDisableVertexAttribArray(0);
    					GL20.glDisableVertexAttribArray(1);
    					GL20.glDisableVertexAttribArray(2);
    					GL30.glBindVertexArray(0);
    							
    				//GL20.glUseProgram(0);
    							
    				
    				for (float i = 0; i < 2048; i++)
    				{			
    					chests[(int)i].setPosition(new Vector3f(-1F + ((float)i / 30F), 0, -0.5F));	
    				GL20.glUseProgram(chests[(int)i].shaderProgram);
    										
    					GL30.glBindVertexArray(chests[(int)i].vaoID);
    					GL20.glEnableVertexAttribArray(0);
    					GL20.glEnableVertexAttribArray(1);
    					GL20.glEnableVertexAttribArray(2);
    					
    					GL15.glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, chests[(int)i].vboiID);
    					{
    						GL11.glDrawElements(GL11.GL_TRIANGLE_STRIP , chests[(int)i].indicesCount, GL11.GL_UNSIGNED_BYTE, 0);		
    					
    					}
    					GL15.glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, 0);
    		
    					GL20.glDisableVertexAttribArray(0);
    					GL20.glDisableVertexAttribArray(1);
    					GL20.glDisableVertexAttribArray(2);
    					GL30.glBindVertexArray(0);
    							
    					GL20.glUseProgram(0);
    				}
    			}
    			
    
    			Display.update();
    		}
    		
    		destroy();
    	}
    
    	public void init()
    	{		
    		logger = new Logger();
    		logger.init();
    		
    		profiler = new Profiler();
    		profiler.init();
    		
    		window = new Window();
    		window.init();
    		
    		camera = new Camera();
    		camera.init();
    	}
    	
    	public void destroy()
    	{
    		logger.destroy();		
    		profiler.destroy();	
    		window.destroy();
    		camera.destroy();
    		
    		System.exit(0);
    	}
    
    	private int loadPNGTexture(String filename, int textureUnit) {
    		ByteBuffer buf = null;
    		int tWidth = 0;
    		int tHeight = 0;
    		
    		try {
    			// Open the PNG file as an InputStream
    			InputStream in = new FileInputStream(filename);
    			// Link the PNG decoder to this stream
    			PNGDecoder decoder = new PNGDecoder(in);
    			
    			// Get the width and height of the texture
    			tWidth = decoder.getWidth();
    			tHeight = decoder.getHeight();
    			
    			
    			// Decode the PNG file in a ByteBuffer
    			buf = ByteBuffer.allocateDirect(
    					4 * decoder.getWidth() * decoder.getHeight());
    			decoder.decode(buf, decoder.getWidth() * 4, Format.RGBA);
    			buf.flip();
    			
    			in.close();
    		} catch (IOException e) {
    			e.printStackTrace();
    			System.exit(-1);
    		}
    		
    		// Create a new texture object in memory and bind it
    		int texId = GL11.glGenTextures();
    		GL13.glActiveTexture(textureUnit);
    		GL11.glBindTexture(GL11.GL_TEXTURE_2D, texId);
    		
    		// All RGB bytes are aligned to each other and each component is 1 byte
    		GL11.glPixelStorei(GL11.GL_UNPACK_ALIGNMENT, 1);
    		
    		// Upload the texture data and generate mip maps (for scaling)
    		GL11.glTexImage2D(GL11.GL_TEXTURE_2D, 0, GL11.GL_RGB, tWidth, tHeight, 0, 
    				GL11.GL_RGBA, GL11.GL_UNSIGNED_BYTE, buf);
    		GL30.glGenerateMipmap(GL11.GL_TEXTURE_2D);
    		
    		// Setup the ST coordinate system
    		GL11.glTexParameteri(GL11.GL_TEXTURE_2D, GL11.GL_TEXTURE_WRAP_S, GL11.GL_REPEAT);
    		GL11.glTexParameteri(GL11.GL_TEXTURE_2D, GL11.GL_TEXTURE_WRAP_T, GL11.GL_REPEAT);
    		
    		// Setup what to do when the texture has to be scaled
    		GL11.glTexParameteri(GL11.GL_TEXTURE_2D, GL11.GL_TEXTURE_MAG_FILTER, 
    				GL11.GL_NEAREST);
    		GL11.glTexParameteri(GL11.GL_TEXTURE_2D, GL11.GL_TEXTURE_MIN_FILTER, 
    				GL11.GL_LINEAR_MIPMAP_LINEAR);
    		
    		
    		return texId;
    	}
    	
    }


Camera:

   
import org.lwjgl.opengl.Display;
    import org.lwjgl.util.vector.Matrix4f;
    import org.lwjgl.util.vector.Vector3f;
    
    public class Camera extends Worker
    {
    	private Matrix4f projectionMatrix;
    	private Matrix4f viewMatrix;
    	
    	private Vector3f position;
    	private Vector3f angle;
    	
    	private float fieldOfView;
    	private float aspectRatio;
    	private float nearPlane;
    	private float farPlane;
    	
    	private float xScale;
    	private float yScale;
    	private float frustumLength;
    	
    	@Override
    	protected void onInitialize()
    	{		
    		// Apply default settings		
    		this.fieldOfView = 60f;
    		this.aspectRatio = (float) Display.getWidth() / (float) Display.getHeight();
    		this.nearPlane = 0.1f;
    		this.farPlane = 100f;
    
    		this.position = new Vector3f(0, 0, -1);
    		this.angle = new Vector3f(0, 0, 0);
    		
    		// Calculate scale and furstum length
    		this.yScale = Util.coTangent(Util.degreesToRadians(fieldOfView / 2f));
    		this.xScale = yScale / aspectRatio;
    		this.frustumLength = farPlane - nearPlane;
    
    		// Projection Matrix
    		projectionMatrix = new Matrix4f();
    		
    		projectionMatrix.m00 = xScale;
    		projectionMatrix.m11 = yScale;
    		projectionMatrix.m22 = -((farPlane + nearPlane) / frustumLength);
    		projectionMatrix.m23 = -1;
    		projectionMatrix.m32 = -((2 * nearPlane * farPlane) / frustumLength);
    		projectionMatrix.m33 = 0;
    		
    		// View Matrix
    		viewMatrix = new Matrix4f();
    		
    		Matrix4f.translate(position, viewMatrix, viewMatrix);
    		Matrix4f.rotate(Util.degreesToRadians(angle.z), new Vector3f(0, 0, 1), 
    				viewMatrix, viewMatrix);
    		Matrix4f.rotate(Util.degreesToRadians(angle.y), new Vector3f(0, 1, 0), 
    				viewMatrix, viewMatrix);
    		Matrix4f.rotate(Util.degreesToRadians(angle.x), new Vector3f(1, 0, 0), 
    				viewMatrix, viewMatrix);
    	}
    
    	public Matrix4f getProjectionMatrix()
    	{
    		return this.projectionMatrix;
    	}
    
    	public Matrix4f getViewMatrix()
    	{
    		return this.viewMatrix;
    	}
    
    	public void setPosition (Vector3f newPosition)
    	{		
    		Matrix4f.translate(newPosition, viewMatrix, viewMatrix);
    	}
    
    	public void setPosition (float x, float y, float z)
    	{		
    		setPosition(new Vector3f (x, y, z));
    	}
    
    	public void setAngle (Vector3f newAngle)
    	{		
    		Matrix4f.rotate(Util.degreesToRadians(newAngle.z), new Vector3f(0, 0, 1), 
    				viewMatrix, viewMatrix);
    		Matrix4f.rotate(Util.degreesToRadians(newAngle.y), new Vector3f(0, 1, 0), 
    				viewMatrix, viewMatrix);
    		Matrix4f.rotate(Util.degreesToRadians(newAngle.x), new Vector3f(1, 0, 0), 
    				viewMatrix, viewMatrix);
    	}
    
    	public void setAngle (float x, float y, float z)
    	{		
    		setAngle(new Vector3f (x, y, z));
    	}
    
    	@Override
    	protected void onDestroy()
    	{
    		;
    
    	}
    
    	@Override
    	protected void onTick()
    	{
    		;
    	}
    }



Shader:

   
#version 150 core
    
    uniform mat4 projectionMatrix;
    uniform mat4 viewMatrix;
    uniform mat4 modelMatrix;
    
    in vec4 in_Position;
    in vec4 in_Color;
    in vec2 in_TextureCoord;
    
    out vec4 pass_Color;
    out vec2 pass_TextureCoord;
    
    void main(void) {
    	gl_Position = projectionMatrix * viewMatrix * modelMatrix *  in_Position;
    	
    	pass_Color = in_Color;
    	pass_TextureCoord = in_TextureCoord;
    }

Cornix

Just look at this:
    				for (float i = 0; i < 2048; i++)
    				{			
    					chests[(int)i].setPosition(new Vector3f(-1F + ((float)i / 30F), 0, -0.5F));	
    				GL20.glUseProgram(chests[(int)i].shaderProgram);
    										
    					GL30.glBindVertexArray(chests[(int)i].vaoID);
    					GL20.glEnableVertexAttribArray(0);
    					GL20.glEnableVertexAttribArray(1);
    					GL20.glEnableVertexAttribArray(2);
    					
    					GL15.glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, chests[(int)i].vboiID);
    					{
    						GL11.glDrawElements(GL11.GL_TRIANGLE_STRIP , chests[(int)i].indicesCount, GL11.GL_UNSIGNED_BYTE, 0);		
    					
    					}
    					GL15.glBindBuffer(GL15.GL_ELEMENT_ARRAY_BUFFER, 0);
    		
    					GL20.glDisableVertexAttribArray(0);
    					GL20.glDisableVertexAttribArray(1);
    					GL20.glDisableVertexAttribArray(2);
    					GL30.glBindVertexArray(0);
    							
    					GL20.glUseProgram(0);
    				}

You are binding 2024 shaders.
You are binding 2024 VAO's.
You are binding 2024 VBO's.
And you are calling glDrawElements 2024 times.

This is probably WAY worse then immediate mode.

The idea is to bind gl objects as rarely as possible. Everytime the state has to change things get slow.
And when working with VBO's the idea is to batch rendering commands. Try to put all of them into 1 VBO. There is no size limit on those things.
Then batch them all sorted by shader and texture and try to minimize the number of draw calls you make.

osa1

Hi tim,

Can you please share your whole program in a zip? I'm very interested this program.

Thanks.