第11.3章:着色器优化技术详解
掌握着色器优化技术是提升游戏性能的关键。本教程将深入介绍各种着色器优化策略,帮助你编写高效的GPU代码。
🎯 学习目标
- 掌握GPU架构和着色器执行原理
- 学会分析和优化着色器性能瓶颈
- 了解各种着色器优化技术
- 掌握移动端特有的优化策略
📋 前置知识
- 熟悉着色器编程基础
- 理解GPU渲染管线
- 了解基本的计算机图形学概念
🔧 GPU架构基础
GPU执行模型
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| // GPU并行执行示例 CCProgram gpu_execution_model %{ // GPU以Warp/Wavefront为单位执行 // 通常32个线程同时执行相同指令 void main() { // 好的做法:所有线程执行相同代码路径 vec4 color = texture(mainTexture, v_uv); color.rgb *= 2.0; // 坏的做法:分支导致执行分歧 if (v_uv.x > 0.5) { color.rgb *= 2.0; // 一半线程执行这里 } else { color.rgb *= 0.5; // 另一半线程执行这里 } fragColor = color; } }%
|
内存层次结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| interface GPUMemoryHierarchy { registers: { latency: '0 cycles', bandwidth: 'Very High', size: 'Very Small', usage: '局部变量' }; constantMemory: { latency: '1-2 cycles (cached)', bandwidth: 'High', size: 'Medium', usage: 'Uniform变量' }; textureMemory: { latency: '100-200 cycles', bandwidth: 'Medium', size: 'Large', usage: '纹理采样' }; globalMemory: { latency: '200-400 cycles', bandwidth: 'Low', size: 'Very Large', usage: '顶点缓冲、帧缓冲' }; }
|
计算优化技术
1. 减少复杂数学运算
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| // 数学运算优化对比 CCProgram math_optimization %{ // 未优化版本 vec3 slowVersion(vec3 input) { float result = pow(input.x, 2.0); // 昂贵的幂运算 result += sqrt(input.y); // 昂贵的开方运算 result *= sin(input.z * 3.14159); // 昂贵的三角函数 return vec3(result); } // 优化版本 vec3 fastVersion(vec3 input) { float result = input.x * input.x; // 使用乘法替代平方 result += pow(input.y, 0.5); // 或使用查找表 result *= sinLUT(input.z); // 使用预计算的查找表 return vec3(result); } // 查找表实现 uniform sampler2D sinLUT; float sinLUT(float x) { float normalized = x / (2.0 * 3.14159); // 归一化到[0,1] return texture(sinLUT, vec2(normalized, 0.5)).r; } }%
|
2. 向量化操�?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| // 向量化优�?CCProgram vectorization %{ // �?标量操作(慢�? void scalarVersion() { float r = texture(tex, uv).r * color.r; float g = texture(tex, uv).g * color.g; float b = texture(tex, uv).b * color.b; float a = texture(tex, uv).a * color.a; fragColor = vec4(r, g, b, a); } // �?向量操作(快�? void vectorVersion() { vec4 texColor = texture(tex, uv); fragColor = texColor * color; // 单个向量操作 } // �?SIMD友好的操�? void simdFriendly() { vec4 a = texture(texA, uv); vec4 b = texture(texB, uv); vec4 c = texture(texC, uv); // 多个向量同时计算 vec4 result = a * b + c; // Fused Multiply-Add fragColor = result; } }%
|
🖼�?纹理优化技�?
1. 纹理采样优化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| // 纹理采样优化 CCProgram texture_optimization %{ // �?多次重复采样 void redundantSampling() { vec4 color1 = texture(mainTex, uv); vec4 color2 = texture(mainTex, uv + offset1); // 重复采样 vec4 color3 = texture(mainTex, uv + offset2); fragColor = (color1 + color2 + color3) / 3.0; } // �?减少采样次数 void optimizedSampling() { // 使用双线性插值减少采�? vec4 color = texture(mainTex, uv); vec4 neighbor = texture(mainTex, uv + offset); fragColor = mix(color, neighbor, blendFactor); } // �?合并纹理采样 void packedTextures() { // 将多个单通道纹理打包到一个RGBA纹理�? vec4 packed = texture(packedTex, uv); float roughness = packed.r; float metallic = packed.g; float ao = packed.b; float height = packed.a; } }%
|
2. 纹理压缩和格式选择
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| class TextureOptimizer { public selectOptimalFormat(usage: TextureUsage): TextureFormat { switch (usage) { case 'albedo': return sys.platform === sys.Platform.MOBILE ? 'ETC2_RGB' : 'BC1_RGB'; case 'normal': return sys.platform === sys.Platform.MOBILE ? 'ETC2_RG11' : 'BC5_RG'; case 'roughnessMetallicAO': return 'RGB8'; case 'heightmap': return 'R8'; default: return 'RGBA8'; } } public optimizeTextureSize(originalSize: number, usage: TextureUsage): number { const maxSizes = { 'ui': 2048, 'character': 1024, 'environment': 512, 'effects': 256 }; return Math.min(originalSize, maxSizes[usage] || 512); } }
|
3. Mipmap优化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| // Mipmap优化技�?CCProgram mipmap_optimization %{ // 手动Mipmap级别选择 float calculateMipmapLevel(vec2 uv, vec2 textureSize) { vec2 dx = dFdx(uv * textureSize); vec2 dy = dFdy(uv * textureSize); float maxDelta = max(dot(dx, dx), dot(dy, dy)); return 0.5 * log2(maxDelta); } // 优化的纹理采�? vec4 optimizedTextureSample(sampler2D tex, vec2 uv) { float level = calculateMipmapLevel(uv, textureSize); return textureLod(tex, uv, level); } // 各向异性过滤优�? vec4 anisotropicSample(sampler2D tex, vec2 uv) { // 计算各向异性比�? vec2 dx = dFdx(uv * textureSize); vec2 dy = dFdy(uv * textureSize); float maxAniso = max(length(dx), length(dy)); float minAniso = min(length(dx), length(dy)); float ratio = maxAniso / minAniso; // 限制各向异性级别以提高性能 ratio = min(ratio, 4.0); return texture(tex, uv); // GPU自动处理各向异�? } }%
|
🔀 分支优化技�?
1. 避免动态分�?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| // 分支优化对比 CCProgram branch_optimization %{ // �?动态分支(GPU执行效率低) vec3 dynamicBranch(vec3 color, float condition) { if (condition > 0.5) { return color * 2.0; // 分支A } else { return color * 0.5; // 分支B } } // �?使用step函数消除分支 vec3 eliminateBranch(vec3 color, float condition) { float factor = mix(0.5, 2.0, step(0.5, condition)); return color * factor; } // �?使用lerp消除分支 vec3 lerpBranch(vec3 color, float condition) { vec3 resultA = color * 2.0; vec3 resultB = color * 0.5; return mix(resultB, resultA, step(0.5, condition)); } }%
|
2. 静态分支优�?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
| // 静态分支和宏定�?CCProgram static_branches %{ // 使用宏定义创建静态分�? #if defined(ENABLE_NORMAL_MAPPING) vec3 calculateNormal() { vec3 normal = texture(normalTexture, v_uv).xyz * 2.0 - 1.0; return normalize(normal); } #else vec3 calculateNormal() { return normalize(v_worldNormal); } #endif // 特性级别静态分�? #if FEATURE_LEVEL >= 3 // 高端设备:完整PBR vec3 pbrLighting() { return calculateFullPBR(); } #elif FEATURE_LEVEL >= 2 // 中端设备:简化PBR vec3 pbrLighting() { return calculateSimplifiedPBR(); } #else // 低端设备:Blinn-Phong vec3 pbrLighting() { return calculateBlinnPhong(); } #endif }%
|
3. 分支预测优化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| class ShaderVariantManager { private variants: Map<string, Shader> = new Map(); public getOptimalShader(context: RenderContext): Shader { const key = this.generateVariantKey(context); if (!this.variants.has(key)) { this.variants.set(key, this.compileVariant(context)); } return this.variants.get(key)!; } private generateVariantKey(context: RenderContext): string { const features = []; if (context.hasNormalMap) features.push('NORMAL_MAP'); if (context.lightCount > 4) features.push('MANY_LIGHTS'); if (context.enableShadows) features.push('SHADOWS'); if (context.enableSSAO) features.push('SSAO'); return features.join('|'); } private compileVariant(context: RenderContext): Shader { const defines = this.generateDefines(context); return this.shaderCompiler.compile(this.baseShader, defines); } }
|
📊 内存带宽优化
1. 减少内存访问
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| // 内存访问优化 CCProgram memory_optimization %{ // �?重复的内存访�? void redundantAccess() { vec3 normal = normalize(v_worldNormal); vec3 lightDir = normalize(lightPosition - v_worldPos); vec3 viewDir = normalize(cameraPosition - v_worldPos); // v_worldPos被多次访�? float dist1 = distance(lightPosition, v_worldPos); float dist2 = distance(cameraPosition, v_worldPos); } // �?缓存频繁访问的�? void cachedAccess() { vec3 worldPos = v_worldPos; // 缓存到寄存器 vec3 normal = normalize(v_worldNormal); vec3 lightDir = lightPosition - worldPos; vec3 viewDir = cameraPosition - worldPos; float lightDist = length(lightDir); float viewDist = length(viewDir); lightDir /= lightDist; // 复用长度计算结果 viewDir /= viewDist; } }%
|
2. 数据打包技�?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
| // 数据打包优化 CCProgram data_packing %{ // �?未打包的数据 struct UnpackedData { float roughness; // 4 bytes float metallic; // 4 bytes float ao; // 4 bytes float height; // 4 bytes // 总计: 16 bytes }; // �?打包的数�? struct PackedData { vec4 packed; // 4 bytes // R: roughness, G: metallic, B: ao, A: height }; // 法线向量打包 vec2 packNormal(vec3 normal) { // 球面坐标打包,节省一个分�? return normal.xy / (normal.z + 1.0); } vec3 unpackNormal(vec2 packed) { vec2 f = packed; float f2 = dot(f, f); float g = sqrt(1.0 - f2 / 4.0); return vec3(f * g, 1.0 - f2 / 2.0); } // 颜色打包到更少位�? uint packColor(vec3 color) { uvec3 c = uvec3(color * 255.0); return (c.r << 16) | (c.g << 8) | c.b; // RGB888 } }%
|
🎯 LOD和可见性优�?
1. 着色器LOD系统
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| // 着色器LOD实现 CCProgram shader_lod %{ uniform float distanceToCamera; uniform float lodBias; // 计算LOD级别 float calculateShaderLOD() { float distance = distanceToCamera; float lod = log2(distance) + lodBias; return clamp(lod, 0.0, 3.0); } // 基于LOD的着色器选择 vec3 calculateLighting() { float lod = calculateShaderLOD(); if (lod < 1.0) { // LOD 0: 完整PBR光照 return calculateFullPBR(); } else if (lod < 2.0) { // LOD 1: 简化PBR return calculateSimplifiedPBR(); } else if (lod < 3.0) { // LOD 2: Blinn-Phong return calculateBlinnPhong(); } else { // LOD 3: 环境光只 return calculateAmbientOnly(); } } }%
|
2. 动态质量调�?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
| @ccclass('AdaptiveQualityManager') export class AdaptiveQualityManager extends Component { @property targetFrameTime: number = 16.67; @property qualityLevel: number = 2; private frameTimeHistory: number[] = []; public update() { this.updateFrameTimeHistory(); this.adjustQuality(); } private adjustQuality() { const avgFrameTime = this.getAverageFrameTime(); if (avgFrameTime > this.targetFrameTime * 1.2) { this.applyQualitySettings(); } else if (avgFrameTime < this.targetFrameTime * 0.8) { this.applyQualitySettings(); } } private applyQualitySettings() { const settings = this.getQualitySettings(this.qualityLevel); rendering.setGlobalMacro('SHADER_LOD', this.qualityLevel); rendering.setGlobalInt('MAX_LIGHTS', settings.maxLights); rendering.setGlobalFloat('SHADOW_DISTANCE', settings.shadowDistance); } }
|
📝 本章小结
通过本教程,你应该掌握了�?
- GPU架构理解: 了解GPU的执行模型和内存层次
- 计算优化: 掌握数学运算和向量化优化技�?3. 内存优化: 学会减少内存访问和数据打�?4. LOD优化: 理解着色器级别的细节控�?
🚀 下一步学�?
继续学习移动端特有的优化策略!🎮✨