我一直在构建一个OpenGL计算着色器来实现光线跟踪。目前它只是通过对一个三角形数组投射光线来计算像素颜色。
#version 430 core
struct Triangle {
vec3 vertex1;
vec3 vertex2;
vec3 vertex3;
vec3 color1;
vec3 color2;
vec3 color3;
vec3 normal1;
vec3 normal2;
vec3 normal3;
vec3 edge1;
vec3 edge2;
};
layout (std430, binding = 0) readonly buffer TriangleBuffer {
int numTriangles;
Triangle triangles[];
};
layout (std430, binding = 1, column_major) buffer CameraBuffer {
vec3 cameraPosition;
mat4 view;
mat4 projection;
mat4 inverseViewProjection;
};
layout (rgba8, binding = 2) writeonly uniform image2D outputImage;
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
vec3 getBarycentricCoords(int triangleIndex, vec3 closestIntersectionPoint) {
vec3 v0 = triangles[triangleIndex].vertex2 - triangles[triangleIndex].vertex1;
vec3 v1 = triangles[triangleIndex].vertex3 - triangles[triangleIndex].vertex1;
vec3 v2 = closestIntersectionPoint - triangles[triangleIndex].vertex1;
float d00 = dot(v0, v0);
float d01 = dot(v0, v1);
float d11 = dot(v1, v1);
float d20 = dot(v2, v0);
float d21 = dot(v2, v1);
float denom = d00 * d11 - d01 * d01;
float b1 = (d11 * d20 - d01 * d21) / denom;
float b2 = (d00 * d21 - d01 * d20) / denom;
float b0 = 1.0f - b1 - b2;
return vec3(b0, b1, b2);
}
vec3 getTriangleColor(int triangleIndex, vec3 closestIntersectionPoint) {
vec3 barycentric = getBarycentricCoords(triangleIndex, closestIntersectionPoint);
vec3 triangleColor = barycentric.x * triangles[triangleIndex].color1 + barycentric.y * triangles[triangleIndex].color2 + barycentric.z * triangles[triangleIndex].color3;
return triangleColor;
}
bool rayTriangleIntersection(vec3 rayOrigin, vec3 rayDirection, int triangleIndex, out vec3 intersectionPoint) {
vec3 h = cross(rayDirection, triangles[triangleIndex].edge2);
float a = dot(triangles[triangleIndex].edge1, h);
if (a > -0.00001 && a < 0.00001) {
return false;
}
float f = 1.0 / a;
vec3 s = rayOrigin - triangles[triangleIndex].vertex1;
float u = f * dot(s, h);
if (u < 0.0 || u > 1.0) {
return false;
}
vec3 q = cross(s, triangles[triangleIndex].edge1);
float v = f * dot(rayDirection, q);
if (v < 0.0 || u + v > 1.0) {
return false;
}
float t = f * dot(triangles[triangleIndex].edge2, q);
if (t > 0.00001) {
intersectionPoint = rayOrigin + rayDirection * t;
return true;
}
return false;
}
vec3 unProject(vec3 win, mat4 model, mat4 proj, vec4 viewport) {
vec4 tmp = vec4(win, 1);
tmp.x = (tmp.x - viewport[0]) / viewport[2];
tmp.y = (tmp.y - viewport[1]) / viewport[3];
tmp.x = tmp.x * 2 - 1;
tmp.y = tmp.y * 2 - 1;
vec4 obj = inverseViewProjection * tmp;
obj /= obj.w;
return obj.xyz;
}
void main() {
ivec2 pixelCoord = ivec2(gl_GlobalInvocationID.xy);
vec4 viewport = vec4(0, 0, vec2(imageSize(outputImage)).xy);
vec3 near = vec3(pixelCoord.x, pixelCoord.y, -1);
vec3 far = vec3(pixelCoord.x, pixelCoord.y, 0.9518f);
vec3 rayOrigin = unProject(near, view, projection, viewport);
vec3 rayWorldFar = unProject(far, view, projection, viewport);
vec3 rayDirection = normalize(rayWorldFar - rayOrigin);
vec3 intersectionPoint;
vec3 closestIntersectionPoint = vec3(0,0,0);
float closestIntersectionDistance = 999999999.0f;
vec3 finalColor = vec3(0,0,0);
bool intersectionFound = false;
for (int triangleIndex = 0; triangleIndex < numTriangles; triangleIndex++) {
if (rayTriangleIntersection(rayOrigin, rayDirection, triangleIndex, intersectionPoint)) {
float intersectionDistance = distance(intersectionPoint, rayOrigin);
if (intersectionDistance < closestIntersectionDistance) {
closestIntersectionDistance = intersectionDistance;
closestIntersectionPoint = intersectionPoint;
finalColor = getTriangleColor(triangleIndex, closestIntersectionPoint);
intersectionFound = true;
}
}
}
if (intersectionFound) {
imageStore(outputImage, pixelCoord, vec4(finalColor, 1.0f));
}
else
imageStore(outputImage, pixelCoord, vec4(0));
}
然而当运行着色器时,我只得到30fps。代码中有一个明显的瓶颈。这是在只有20个三角形的情况下运行的。
我可以做什么优化来提高代码的性能?为什么会有瓶颈?
1条答案
按热度按时间3qpi33ja1#
我设法使我的帧率增加了一倍以上,做了以下修改:
将布局更改为更高的值
为此,我使用了
GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS
并更新布局大小:
根据group_id和local_id获取像素坐标
更新glDispatchCompute