Rewrite the ARB lanczos shader to work with the Radeon R300 limitations.

svn path=/branches/KDE/4.6/kdebase/workspace/; revision=1215505
2011-01-18 22:43:08 +00:00 · 2011-01-18 22:43:08 +00:00 · bfa2fc68e6
parent 1a93cc40d2
commit bfa2fc68e6
3 changed files with 44 additions and 29 deletions
--- a/lanczos-fragment.glsl
+++ b/lanczos-fragment.glsl
@ -1,13 +1,13 @@
 uniform sampler2D texUnit;
-uniform vec2 offsets[25];
-uniform vec4 kernel[25];
+uniform vec2 offsets[16];
+uniform vec4 kernel[16];

 varying vec2 varyingTexCoords;

 void main(void)
 {
    vec4 sum = texture2D(texUnit, varyingTexCoords.st) * kernel[0];
-    for (int i = 1; i < 25; i++) {
+    for (int i = 1; i < 16; i++) {
        sum += texture2D(texUnit, varyingTexCoords.st - offsets[i]) * kernel[i];
        sum += texture2D(texUnit, varyingTexCoords.st + offsets[i]) * kernel[i];
    }
--- a/lanczosfilter.cpp
+++ b/lanczosfilter.cpp
@ -138,7 +138,7 @@ void LanczosShader::createKernel( float delta, int *size )

    // The two outermost samples always fall at points where the lanczos
    // function returns 0, so we'll skip them.
-    const int sampleCount = qBound( 3, qCeil(delta * a) * 2 + 1 - 2, 49 );
+    const int sampleCount = qBound( 3, qCeil(delta * a) * 2 + 1 - 2, 29 );
    const int center = sampleCount / 2;
    const int kernelSize = center + 1;
    const float factor = 1.0 / delta;
@ -152,7 +152,7 @@ void LanczosShader::createKernel( float delta, int *size )
        values[i] = val;
    }

-    memset(m_kernel, 0, 25 * sizeof(QVector4D));
+    memset(m_kernel, 0, 16 * sizeof(QVector4D));

    // Normalize the kernel
    for ( int i = 0; i < kernelSize; i++ ) {
@ -165,7 +165,7 @@ void LanczosShader::createKernel( float delta, int *size )

 void LanczosShader::createOffsets( int count, float width, Qt::Orientation direction )
    {
-    memset(m_offsets, 0, 25 * sizeof(QVector2D));
+    memset(m_offsets, 0, 16 * sizeof(QVector2D));
    for ( int i = 0; i < count; i++ ) {
        m_offsets[i] = ( direction == Qt::Horizontal ) ?
                QVector2D( i / width, 0 ) : QVector2D( 0, i / width );
@ -612,19 +612,19 @@ void LanczosShader::setUniforms()
    if( m_shader )
        {
        glUniform1i( m_uTexUnit, 0 );
-        glUniform2fv( m_uOffsets, 25, (const GLfloat*)m_offsets );
-        glUniform4fv( m_uKernel, 25, (const GLfloat*)m_kernel );
+        glUniform2fv( m_uOffsets, 16, (const GLfloat*)m_offsets );
+        glUniform4fv( m_uKernel, 16, (const GLfloat*)m_kernel );
        }
 #ifndef KWIN_HAVE_OPENGLES
    else
        {
-        for( int i=0; i<25; ++i )
+        for( int i=0; i<16; ++i )
            {
            glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i, m_offsets[i].x(), m_offsets[i].y(), 0, 0 );
            }
-        for( int i=0; i<25; ++i )
+        for( int i=0; i<16; ++i )
            {
-            glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i+25, m_kernel[i].x(), m_kernel[i].y(), m_kernel[i].z(), m_kernel[i].w() );
+            glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i+16, m_kernel[i].x(), m_kernel[i].y(), m_kernel[i].z(), m_kernel[i].w() );
            }
        }
 #endif
@ -632,9 +632,11 @@ void LanczosShader::setUniforms()

 bool LanczosShader::init()
    {
+    GLPlatform *gl = GLPlatform::instance();
    if ( GLShader::fragmentShaderSupported() &&
         GLShader::vertexShaderSupported() &&
-         GLRenderTarget::supported() )
+         GLRenderTarget::supported() &&
+         !(gl->isRadeon() && gl->chipClass() < R600))
        {
        m_shader = ShaderManager::instance()->loadFragmentShader(ShaderManager::SimpleShader, ":/resources/lanczos-fragment.glsl");
        if (m_shader->isValid())
@ -665,22 +667,35 @@ bool LanczosShader::init()
    QByteArray text;
    QTextStream stream(&text);

+    // Note: This program uses 31 temporaries, 61 ALU instructions, 31 texture
+    //       fetches, 3 texture indirections and 93 instructions.
+    //       The R300 limitations are 32, 64, 32, 4 and 96 respectively.
    stream << "!!ARBfp1.0\n";
-    stream << "TEMP coord;\n"; // temporary variable to store texcoord
-    stream << "TEMP color;\n"; // temporary variable to store fetched texture colors
-    stream << "TEMP sum;\n"; // variable to render the final result
-    stream << "TEX sum, fragment.texcoord, texture[0], 2D;\n"; // sum = texture2D(texUnit, gl_TexCoord[0].st)
-    stream << "MUL sum, sum, program.local[25];\n"; // sum = sum * kernel[0]
-    for( int i=1; i<25; ++i )
-        {
-        stream << "ADD coord, fragment.texcoord, program.local[" << i << "];\n"; // coord = gl_TexCoord[0] + offset[i]
-        stream << "TEX color, coord, texture[0], 2D;\n"; // color = texture2D(texUnit, coord)
-        stream << "MAD sum, color, program.local[" << (25+i) << "], sum;\n"; // sum += color * kernel[i]
-        stream << "SUB coord, fragment.texcoord, program.local[" << i << "];\n"; // coord = gl_TexCoord[0] - offset[i]
-        stream << "TEX color, coord, texture[0], 2D;\n"; // color = texture2D(texUnit, coord)
-        stream << "MAD sum, color, program.local[" << (25+i) << "], sum;\n"; // sum += color * kernel[i]
-        }
-    stream << "MOV result.color, sum;\n";  // gl_FragColor = sum
+    stream << "TEMP sum;\n";
+
+    // Declare 30 temporaries for holding texcoords and TEX results
+    for (int i = 0; i < 30; i++)
+        stream << "TEMP temp" << i << ";\n";
+
+    // Compute the texture coordinates
+    for (int i = 0, j = 0; i < 30 / 2; i++) {
+        stream << "ADD temp" << j++ << ", fragment.texcoord, program.local[" << i+1 << "];\n";
+        stream << "SUB temp" << j++ << ", fragment.texcoord, program.local[" << i+1 << "];\n";
+    }
+
+    // Sample the texture coordinates
+    stream << "TEX sum, fragment.texcoord, texture[0], 2D;\n";
+    for (int i = 0; i < 30; i++)
+        stream << "TEX temp" << i << ", temp" << i << ", texture[0], 2D;\n";
+
+    // Process the results
+    stream << "MUL sum, sum, program.local[16];\n"; // sum = sum * kernel[0]
+    for (int i = 0, j = 0; i < 30 / 2; i++) {
+        stream << "MAD sum, temp" << j++ << ", program.local[" << (17+i) << "], sum;\n";
+        stream << "MAD sum, temp" << j++ << ", program.local[" << (17+i) << "], sum;\n";
+    }
+
+    stream << "MOV result.color, sum;\n";
    stream << "END\n";
    stream.flush();

--- a/lanczosfilter.h
+++ b/lanczosfilter.h
@ -90,8 +90,8 @@ class LanczosShader
        int m_uTexUnit;
        int m_uOffsets;
        int m_uKernel;
-        QVector2D m_offsets[25];
-        QVector4D m_kernel[25];
+        QVector2D m_offsets[16];
+        QVector4D m_kernel[16];
        uint m_arbProgram; // TODO: GLuint
 };
 #endif