c++ 使用alpha优化两种颜色的alpha混合

pn9klfpd 于 2023-11-19 发布在其他

关注(0)|答案(1)|浏览(146)

我正在寻找一种优化alpha混合的方法，但是对于alpha的两种颜色（与问题How to alpha blend RGBA unsigned byte color fast?有什么不同）
最初我使用了一个浮点数的解决方案（RGB范围从0.0f到255.0f，A范围从0.0f到1.0f）：

inline void alphaBlend(Color& baseColor, Color targetColor)
{
    float newAlpha = (1 - targetColor.A) * baseColor.A + targetColor.A;
    baseColor.R = ((1 - targetColor.A) * baseColor.A * baseColor.R + targetColor.A * targetColor.R) / newAlpha;
    baseColor.G = ((1 - targetColor.A) * baseColor.A * baseColor.G + targetColor.A * targetColor.G) / newAlpha;
    baseColor.B = ((1 - targetColor.A) * baseColor.A * baseColor.B + targetColor.A * targetColor.B) / newAlpha;
}

字符串
我改变了算法，使其适用于unsigned int RGBA颜色。我将所有对alpha的引用替换为（alpha / 255），然后更正了公式，使值仍然在正确的范围内。

baseColor.R = ((1 - targetColor.A) * baseColor.A * baseColor.R + targetColor.A * targetColor.R) / newAlpha;

Shorthand (targetColor.A -> tA etc.):

R = ((1 - tA) * bA * bR + tA * tR) / newAlpha

(introducing 255-based alpha requires replacing all A instances with A/255)

  = ((1 - (tA / 255)) * (bA / 255) * bR + (tA / 255) * tR) / (newAlpha / 255)

(remove 255 from the denominator's denominator)

  = (((1 - (tA / 255)) * (bA / 255) * bR + (tA / 255) * tR) * 255) / newAlpha

(get rid of direct alpha divisions by 255 by multiplying parethesis by 255/255)

  = (( ((255 - tA) * bA * bR) / 255^2 + (tA * tR) / 255) * 255) / newAlpha

(multiplying by the last 255 causes denominators to reduce)

  = ( ((255 - tA) * bA * bR) / 255 + (tA * tR * 255) / 255 ) / newAlpha
  
(Pushing numerator's denominator (255) to the denominator)

  = ( (255 - tA) * bA * bR) + (tA * tR * 255) ) / (255 * newAlpha)

(Expanding first multiplication in numerator)

  = ( 255 * bA * bR - tA * bA * bR + tA * tR * 255) / (255 * newAlpha)
                      ^^^^^^^^^^^^   ^^^^^^^^^^^^^
(reordering not to fall below 0 during calculations)

  = ( 255 * bA * bR + tA * tR * 255 - tA * bA * bR ) / (255 * newAlpha)

(grouping to minimize multiplications)

 = ( (ba * bR + tA * tR) * 255 - tA * bA * bR ) / (255 * newAlpha)

(introducing bit shifting - losing precision, but in an acceptable range)

 ~= ( ((ba * bR + tA * tR) << 8) - tA * bA * bR) / (newAlpha << 8)

型
我设法编写了以下代码：

inline void alphaBlend(IntColor& baseColor, IntColor targetColor)
{
    unsigned int a = (((baseColor.A + targetColor.A) << 8) - targetColor.A * baseColor.A) >> 8;

    if (a > 0)
    {
        unsigned int divisor = a << 8;

        unsigned int baseAR = baseColor.A * baseColor.R;
        baseColor.R = (((targetColor.A * targetColor.R + baseAR) << 8) - (baseAR * targetColor.A)) / divisor;

        unsigned int baseAG = baseColor.A * baseColor.G;
        baseColor.G = (((targetColor.A * targetColor.G + baseAG) << 8) - (baseAG * targetColor.A)) / divisor;

        unsigned int baseAB = baseColor.A * baseColor.B;
        baseColor.B = (((targetColor.A * targetColor.B + baseAB) << 8) - (baseAB * targetColor.A)) / divisor;

        baseColor.A = a;
    }
    else
    {
        baseColor.R = 0;
        baseColor.G = 0;
        baseColor.B = 0;
        baseColor.A = 0;
    }
}

型
这一更改将样本数据的渲染从27559 ms减少到17751 ms。由于alpha混合似乎是渲染工作流程中最常见的操作，我很好奇是否有方法进一步优化它。
我想同时对R和B进行计算，但不幸的是，在某些情况下，计算将超过两个字节（例如，如果bA = bR = tA = tR = 255，减法的左部分将等于33162750 = 0x 1faa 05 fe）。
有没有其他的优化方法可以让这段代码更快？

**编辑：**回复评论：

目标体系结构为x64，目标处理器可能为Intel Core系列
输入类型保证为32位RGBA
内存布局为BGRA（8888）
关于SIMD，我的应用程序是一个矢量动画渲染器。每个对象都在一个单独的位图上渲染，然后阿尔法混合到结果中，因为每个对象都可能应用了阿尔法/遮罩/变换/效果，或者可能由多个子对象组成，每个对象也可能应用了这些。
编译器是来自Microsoft Visual Studio 2022的编译器。应用程序仅适用于Windows。

c++

来源：https://stackoverflow.com/questions/77006591/optimizing-alpha-blending-for-two-colors-with-alpha

1条答案

按热度按时间

l7mqbcuq1#

我把这个答案留给那些也在寻找基于整数计算的alpha混合两种颜色的alpha（也就是说，允许“背景”或“基础”颜色也是半透明的）的人。它不是 * 非常 * 快，但肯定比它的浮点等价物快。
不幸的是，我问题中的代码存在缺陷，有时会给出256的结果，这在某些情况下会导致丑陋的黑色像素（(unsigned char)256 == 0）。
下面的代码提供了解决方案，也可以作为正确性检查。它验证：

结果alpha和结果颜色与浮点解的差异都不超过一个单位（1）。差异来自整数除法时的精度不足
结果alpha和结果颜色都不超过[0..255]的边界

从信息上来说，浮点alpha通常会保留在[0.0f..1.0f]范围内，现在它被规范化为[0.0f..255.0f]，这样我就可以将它与它的int对应项进行比较。
验证代码/解决方案如下。

#include <iostream>

int main()
{
    uint64_t diffs = 0;

    for (unsigned int baseAlpha = 0; baseAlpha < 256; baseAlpha++) 
    {
        printf("Processing a1 = %d\n", baseAlpha);

        for (unsigned int baseColor = 0; baseColor < 256; baseColor++)
            for (unsigned int targetAlpha = 0; targetAlpha < 256; targetAlpha++)
                for (unsigned int targetColor = 0; targetColor < 256; targetColor++)
                {
                    // Evaluate float result (FLOAT ALPHA BLENDING)
                    // R, G, B in [0.0f,255.0f]; 
                    // A in [0.0f, 1.0f]

                    float floatBaseAlpha = baseAlpha / 255.0f;
                    float floatTargetAlpha = targetAlpha / 255.0f;

                    float floatResultAlpha = (1 - floatTargetAlpha) * floatBaseAlpha + floatTargetAlpha;
                    float floatResultColor = 0.0f;
                    if (floatResultAlpha >= 1.0f / 255.0f)
                    {
                        floatResultColor = ((1 - floatTargetAlpha) * floatBaseAlpha * baseColor + floatTargetAlpha * targetColor) / floatResultAlpha;
                    }
                    else
                    {
                        floatResultColor = 0.0f;
                    }

                    floatResultAlpha *= 255.0f;

                    // Evaluate int result (INT ALPHA BLENDING)
                    // R, G, B, A in [0, 255]

                    int intResultAlpha = (((baseAlpha + targetAlpha) * 255) - targetAlpha * baseAlpha);
                    int intResultColor;
                    if (intResultAlpha > 0)
                    {
                        unsigned int divisor = intResultAlpha;

                        unsigned int baseAR = baseAlpha * baseColor;
                        intResultColor = (((targetAlpha * targetColor + baseAR) * 255) - (baseAR * targetAlpha)) / divisor;
                    }
                    else
                    {
                        intResultColor = 0;
                    }

                    intResultAlpha = intResultAlpha / 255;

                    // Compare

                    int alphaFromFloat = (int)floatResultAlpha;
                    int colorFromFloat = (int)floatResultColor;

                    int alphaFromInt = (int)intResultAlpha;
                    int colorFromInt = (int)intResultColor;

                    int aDiff = std::abs(alphaFromFloat - alphaFromInt);
                    int cDiff = std::abs(colorFromFloat - colorFromInt);

                    if (colorFromInt > 255 || colorFromInt < 0)
                    {
                        printf("Int color outside range!");
                    }

                    if (aDiff > 1 || cDiff > 1)
                    {
                        printf("Critical difference: bA: %u, bC: %u, tA: %u, tC: %u\n", baseAlpha, baseColor, targetAlpha, targetColor);
                        printf("Float result: A: %d, C: %d\n", alphaFromFloat, colorFromFloat);
                        printf("Int result: A: %d, C: %d\n", alphaFromInt, colorFromInt);
                        printf("Alpha difference: %d\n", aDiff);
                        printf("Color difference: %d\n", cDiff);
                    }

                    if (aDiff > 0 || cDiff > 0)
                        diffs++;
                }
    }

    printf("Total differences: %lld (%lld%%)\n", diffs, (100Ui64 * diffs / (1Ui64 << 32)));

    getchar();
}

字符串
结果令人惊讶地好-应用程序测试了所有可能的颜色和阿尔法组合，并且与浮点计算不同的那些组合（我认为是有效的）的百分比低于1%（组合总数为4 294 967 296）：

Total differences: 4959508 (0%)

型
阿尔法混合的流行优化是通过替换（* 255, / 255）操作，分别（<< 8, >> 8），等于（* 256, / 256）。由于我们需要乘和除以255而不是256，优化的代价是精度下降。坏消息是错误结果的数量急剧增加，但好消息是，误差仍然不超过alpha和颜色的单位值：

int intResultAlpha = (((baseAlpha + targetAlpha) << 8) - targetAlpha * baseAlpha);
int intResultColor;
if (intResultAlpha > 0)
{
    unsigned int divisor = intResultAlpha;

    unsigned int baseAR = baseAlpha * baseColor;
    intResultColor = (((targetAlpha * targetColor + baseAR) << 8) - (baseAR * targetAlpha)) / divisor;
}
else
{
    intResultColor = 0;
}

intResultAlpha = intResultAlpha >> 8;

Total differences: 1218912093 (28%)

的字符串
所以现在：

如果您关心精确的结果，请选择缓慢但精确的浮点解决方案（记住将alpha值保持在[0.0f..1.0f]范围内，而不是[0.0f..255.0f]！
如果你想要更快，但（可以忽略）不太准确的解决方案，选择整数解决方案与255的除法和乘法。
如果你想要更快，但不太准确的解决方案（虽然仍然不是肉眼），选择位移解决方案。
如果你想要更快的解决方案，选择其中一个并进一步优化它们（想法包括尝试将两个通道合并到一个64位int中，并尝试一次处理两个通道-改进33%或使用各种扩展（如MMX）在汇编程序中实现代码）。

如果你决定选择最后一个解决方案，不要忘记在这里发布另一个答案-我相信每个人都会从快速阿尔法混合算法中受益。

赞(0）回复(0）举报 2023-11-19

我来回答

c++ 使用alpha优化两种颜色的alpha混合

1条答案

相关问题

热门标签

最新问答