�?.3章:Surface Shader执行流程详解

理解Surface Shader的执行流程对于优化性能和调试问题至关重要。本章将详细解析Surface Shader在渲染管线中的执行过程,包括前向渲染和延迟渲染的差异,以及各个阶段的具体工作原理�?

🎯 学习目标

通过本章学习,你将掌握:

  • Surface Shader在渲染管线中的完整执行流�?- 前向渲染和延迟渲染的执行差异
  • 光照计算的时机和方式
  • 多Pass渲染的执行顺�?- 性能优化的关键点

🔄 渲染管线概览

Cocos Creator渲染管线架构

1
2
3
4
5
6
7
8
9
graph TD
A[Scene Objects] --> B[Culling]
B --> C[Sorting]
C --> D[Batching]
D --> E[Vertex Processing]
E --> F[Rasterization]
F --> G[Fragment Processing]
G --> H[Output Merger]
H --> I[Frame Buffer]

Surface Shader在管线中的位�?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// 渲染管线中Surface Shader的调用时�?class RenderPipeline {
executeRenderPass(objects: RenderObject[]) {
// 1. 几何阶段
for (let obj of objects) {
this.executeVertexShader(obj);
}

// 2. 光栅化阶�? this.rasterizeTriangles();

// 3. 着色阶�? for (let fragment of fragments) {
this.executeFragmentShader(fragment);
}

// 4. 输出合并
this.blendAndOutput();
}
}

🏃‍♂�?顶点处理阶段

顶点着色器执行流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
// Surface Shader顶点处理的详细流�?CCProgram surface-vs %{
precision highp float;

// === 步骤1: 输入数据获取 ===
#include <builtin/inputs/cc-position>
#include <builtin/inputs/cc-normal>
#include <builtin/inputs/cc-texcoord>

vec4 vert() {
// 步骤2: 初始化输入结�? StandardVertInput input;
CCVertInput(input); // 从顶点缓冲区读取数据

// 步骤3: 顶点变换
vec4 localPos = input.position;
vec4 worldPos = cc_matWorld * localPos;
vec4 viewPos = cc_matView * worldPos;
vec4 clipPos = cc_matProj * viewPos;

// 步骤4: 法线变换
vec3 worldNormal = normalize((cc_matWorldIT * vec4(input.normal, 0)).xyz);

// 步骤5: 数据传递到片元着色器
CC_TRANSFER_WORLDPOS(worldPos);
CC_TRANSFER_WORLDNORMAL(worldNormal);
CC_TRANSFER_UV(input.texCoord);

// 步骤6: 返回裁剪空间位置
return clipPos;
}
}%

顶点变换详解

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// 详细的变换矩阵应�?void performVertexTransforms(inout StandardVertInput input) {
// 1. 本地空间 -> 世界空间
vec4 worldPosition = cc_matWorld * input.position;
vec3 worldNormal = normalize((cc_matWorldIT * vec4(input.normal, 0)).xyz);

// 2. 世界空间 -> 视图空间
vec4 viewPosition = cc_matView * worldPosition;
vec3 viewNormal = normalize((cc_matViewInvTrans * vec4(worldNormal, 0)).xyz);

// 3. 视图空间 -> 裁剪空间
vec4 clipPosition = cc_matProj * viewPosition;

// 4. 切线空间处理(如果需要)
#if CC_USE_TANGENT
vec3 worldTangent = normalize((cc_matWorld * vec4(input.tangent.xyz, 0)).xyz);
vec3 worldBitangent = cross(worldNormal, worldTangent) * input.tangent.w;
mat3 tbnMatrix = mat3(worldTangent, worldBitangent, worldNormal);
CC_TRANSFER_TBN(tbnMatrix);
#endif
}

🎨 片元处理阶段

片元着色器执行流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
CCProgram surface-fs %{
precision mediump float;

// === 步骤1: 接收插值数�?===
#include <builtin/inputs/cc-worldpos>
#include <builtin/inputs/cc-worldnormal>
#include <builtin/inputs/cc-uv>

vec4 frag() {
// 步骤2: 材质数据采样和处�? StandardSurface surface;
surf(surface); // 调用Surface函数

// 步骤3: 光照计算
vec4 finalColor = calculateLighting(surface);

// 步骤4: 后处理效�? finalColor = applyPostProcessing(finalColor);

// 步骤5: 返回最终颜�? return finalColor;
}

// Surface函数的执行细�? void surf(out StandardSurface s) {
// 2.1: 纹理采样
vec2 uv = CC_GET_UV();
vec4 albedoTex = texture(mainTexture, uv);
vec3 normalTex = texture(normalMap, uv).rgb * 2.0 - 1.0;

// 2.2: 材质属性计�? s.albedo = albedoTex.rgb * mainColor.rgb;
s.normal = normalize(CC_GET_WORLDNORMAL() + normalTex);
s.roughness = roughness;
s.metallic = metallic;
s.alpha = albedoTex.a * mainColor.a;
}
}%

光照计算流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
// 光照计算的详细实�?vec4 calculateLighting(StandardSurface surface) {
vec3 worldPos = CC_GET_WORLDPOS();
vec3 normal = surface.normal;
vec3 viewDir = normalize(cc_cameraPos.xyz - worldPos);

vec3 finalColor = vec3(0.0);

// 1. 环境光照
vec3 ambient = calculateAmbientLighting(surface);
finalColor += ambient;

// 2. 主光源光�? vec3 mainLight = calculateMainLighting(surface, normal, viewDir, worldPos);
finalColor += mainLight;

// 3. 附加光源光照
#if CC_ENABLE_ADDITIONAL_LIGHTS
for (int i = 0; i < cc_additionalLightCount; i++) {
vec3 additionalLight = calculateAdditionalLight(i, surface, normal, viewDir, worldPos);
finalColor += additionalLight;
}
#endif

// 4. 自发�? finalColor += surface.emissive;

return vec4(finalColor, surface.alpha);
}

// 主光源光照计�?vec3 calculateMainLighting(StandardSurface surface, vec3 normal, vec3 viewDir, vec3 worldPos) {
vec3 lightDir = normalize(-cc_mainLitDir.xyz);
vec3 lightColor = cc_mainLitColor.rgb * cc_mainLitColor.w;

// Lambert漫反�? float NdotL = max(dot(normal, lightDir), 0.0);
vec3 diffuse = surface.albedo * lightColor * NdotL;

// PBR镜面反射
vec3 halfDir = normalize(lightDir + viewDir);
float NdotH = max(dot(normal, halfDir), 0.0);
float NdotV = max(dot(normal, viewDir), 0.0);

vec3 specular = calculatePBRSpecular(surface.roughness, surface.metallic,
surface.albedo, NdotL, NdotV, NdotH);

// 阴影衰减
#if CC_RECEIVE_SHADOW
float shadowAtten = CC_SHADOW_ATTEN();
diffuse *= shadowAtten;
specular *= shadowAtten;
#endif

return diffuse + specular;
}

🔀 前向渲染 vs 延迟渲染

前向渲染执行流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
graph TD
A[Object 1] --> B[Vertex Shader]
B --> C[Rasterization]
C --> D[Fragment Shader + Lighting]
D --> E[Blend to Frame Buffer]

F[Object 2] --> G[Vertex Shader]
G --> H[Rasterization]
H --> I[Fragment Shader + Lighting]
I --> J[Blend to Frame Buffer]

K[Object N] --> L[Vertex Shader]
L --> M[Rasterization]
M --> N[Fragment Shader + Lighting]
N --> O[Blend to Frame Buffer]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// 前向渲染中的光照计算
CCProgram forward-fs %{
vec4 frag() {
StandardSurface surface;
surf(surface);

// 在片元着色器中直接计算所有光�? vec3 color = vec3(0.0);

// 对每个光源进行光照计�? for (int i = 0; i < lightCount; i++) {
color += calculateLightContribution(surface, lights[i]);
}

return vec4(color, surface.alpha);
}
}%

延迟渲染执行流程

1
2
3
4
5
6
7
8
9
10
11
graph TD
A[All Objects] --> B[Geometry Pass]
B --> C[G-Buffer]
C --> D[Lighting Pass]
D --> E[Final Frame Buffer]

B1[Vertex Shader] --> B
B2[Fragment Shader<br/>Output to G-Buffer] --> B

D1[Screen Quad] --> D
D2[Lighting Shader<br/>Read from G-Buffer] --> D
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// 延迟渲染的几何Pass
CCProgram deferred-geometry-fs %{
struct GBufferOutput {
vec4 albedoAO; // RGB: albedo, A: AO
vec4 normalRoughness; // RGB: normal, A: roughness
vec4 motionMetallic; // RG: motion vector, B: metallic, A: unused
};

GBufferOutput frag() {
StandardSurface surface;
surf(surface);

GBufferOutput output;
output.albedoAO = vec4(surface.albedo, surface.ao);
output.normalRoughness = vec4(surface.normal * 0.5 + 0.5, surface.roughness);
output.motionMetallic = vec4(calculateMotionVector(), surface.metallic, 0.0);

return output;
}
}%

// 延迟渲染的光照Pass
CCProgram deferred-lighting-fs %{
vec4 frag() {
// 从G-Buffer读取数据
vec4 albedoAO = texture(gBufferAlbedoAO, screenUV);
vec4 normalRoughness = texture(gBufferNormalRoughness, screenUV);
vec4 motionMetallic = texture(gBufferMotionMetallic, screenUV);

// 重建Surface数据
StandardSurface surface;
surface.albedo = albedoAO.rgb;
surface.ao = albedoAO.a;
surface.normal = normalRoughness.rgb * 2.0 - 1.0;
surface.roughness = normalRoughness.a;
surface.metallic = motionMetallic.b;

// 重建世界位置
vec3 worldPos = reconstructWorldPosition(screenUV, depthTexture);

// 执行光照计算
vec3 finalColor = calculateAllLighting(surface, worldPos);

return vec4(finalColor, 1.0);
}
}%

🔄 多Pass渲染执行顺序

Pass执行顺序控制

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
CCEffect %{
techniques:
- name: multi-pass-demo
passes:
# Pass 0: 深度预Pass (Queue: 1000)
- vert: depth-vs:vert
frag: depth-fs:frag
phase: depth-prepass
rasterizerState:
cullMode: back
depthStencilState:
depthTest: true
depthWrite: true
blendState:
targets:
- blend: false
colorMask: 0 # 只写深度,不写颜�?
# Pass 1: 主渲染Pass (Queue: 2000)
- vert: main-vs:vert
frag: main-fs:frag
phase: forward
rasterizerState:
cullMode: back
depthStencilState:
depthTest: true
depthWrite: false
depthFunc: equal
blendState:
targets:
- blend: false

# Pass 2: 后效果Pass (Queue: 3000)
- vert: effect-vs:vert
frag: effect-fs:frag
phase: forward-add
rasterizerState:
cullMode: none
depthStencilState:
depthTest: true
depthWrite: false
blendState:
targets:
- blend: true
blendSrc: one
blendDst: one
}%

Pass间数据传�?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// Pass间通过纹理传递数�?CCProgram pass1-fs %{
// 第一个Pass输出数据到纹�? void main() {
vec4 data = calculateSomeData();
gl_FragColor = data;
}
}%

CCProgram pass2-fs %{
uniform sampler2D pass1Result;

// 第二个Pass读取第一个Pass的结�? void main() {
vec4 previousData = texture(pass1Result, uv);
vec4 finalResult = processData(previousData);
gl_FragColor = finalResult;
}
}%

�?性能优化关键�?

顶点着色器优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// 优化前:复杂的顶点计�?vec4 vert_slow() {
// 昂贵的数学运�? float complexValue = pow(sin(a_position.x), 3.0) * exp(a_position.y);

// 多次矩阵乘法
vec4 pos1 = cc_matWorld * a_position;
vec4 pos2 = cc_matView * pos1;
vec4 pos3 = cc_matProj * pos2;

return pos3;
}

// 优化后:简化的顶点计算
vec4 vert_fast() {
// 预计算复杂值,或使用查找表
float complexValue = texture(lookupTable, a_position.xy).r;

// 合并矩阵乘法
vec4 clipPos = cc_matViewProj * (cc_matWorld * a_position);

return clipPos;
}

片元着色器优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// 优化前:每个片元都采样多�?vec4 frag_slow() {
// 多次纹理采样
vec4 color1 = texture(tex1, uv);
vec4 color2 = texture(tex1, uv + offset1);
vec4 color3 = texture(tex1, uv + offset2);
vec4 color4 = texture(tex1, uv + offset3);

return (color1 + color2 + color3 + color4) * 0.25;
}

// 优化后:减少纹理采样,使用更高效的算�?vec4 frag_fast() {
// 使用单次采样和数学运算替代多次采�? vec4 baseColor = texture(tex1, uv);
vec4 result = baseColor * somePrecomputedValue;

return result;
}

光照计算优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// 光照计算的性能优化策略
vec3 optimizedLighting(StandardSurface surface) {
// 1. 早期退�? if (surface.alpha < 0.01) discard;

// 2. 距离剔除
float lightDistance = length(lightPos - worldPos);
if (lightDistance > lightRange) return vec3(0.0);

// 3. LOD系统
int lightingLOD = determineLightingLOD(cameraDistance);
if (lightingLOD == 0) {
return calculateSimpleLighting(surface);
} else {
return calculateFullPBRLighting(surface);
}
}

📊 性能分析和调�?

渲染统计收集

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
class RenderStats {
vertexShaderTime: number = 0;
fragmentShaderTime: number = 0;
drawCalls: number = 0;
trianglesRendered: number = 0;

measureShaderPerformance() {
// GPU时间查询
const query = gl.createQuery();
gl.beginQuery(gl.TIME_ELAPSED, query);

// 执行渲染
this.executeRenderPass();

gl.endQuery(gl.TIME_ELAPSED);

// 获取结果
gl.getQueryParameter(query, gl.QUERY_RESULT);
}
}

瓶颈识别

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// 通过条件编译识别性能瓶颈
#define PERFORMANCE_TEST 1

#if PERFORMANCE_TEST == 1
// 简化版�?- 测试是否是复杂计算导致的瓶颈
vec4 frag() {
return vec4(1.0, 0.0, 0.0, 1.0); // 纯红�? }
#elif PERFORMANCE_TEST == 2
// 仅纹理采�?- 测试是否是纹理带宽瓶�? vec4 frag() {
return texture(mainTexture, uv);
}
#elif PERFORMANCE_TEST == 3
// 仅光照计�?- 测试是否是光照计算瓶�? vec4 frag() {
return vec4(calculateLighting(), 1.0);
}
#else
// 完整版本
vec4 frag() {
return fullRenderingPipeline();
}
#endif

💡 最佳实�?

执行流程优化

  1. 合理安排Pass顺序: 深度预Pass �?不透明物体 �?天空�?�?透明物体
  2. *批处理优�?: 相同材质的物体合并渲�?3. 状态变更最小化: 减少渲染状态切换次�?4. early-Z优化: 利用深度测试减少overdraw

内存访问优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
// 优化内存访问模式
// 错误:分散的内存访问
vec4 badMemoryAccess() {
vec4 c1 = texture(tex, uv);
vec4 c2 = texture(otherTex, uv); // 缓存未命�? vec4 c3 = texture(tex, uv + offset); // 又回到第一个纹�? return c1 + c2 + c3;
}

// 正确:连续的内存访问
vec4 goodMemoryAccess() {
vec4 c1 = texture(tex, uv);
vec4 c3 = texture(tex, uv + offset); // 连续访问同一纹理
vec4 c2 = texture(otherTex, uv);
return c1 + c2 + c3;
}

理解Surface Shader的执行流程是优化渲染性能的基础。通过掌握这些知识,你可以更好地设计着色器,避免性能瓶颈,创建高效的渲染效果�?

下一步学习