C语言 5位精度互补误差函数的快速实现

t1qtbnec 于 2024-01-06 发布在其他

关注(0)|答案(2)|浏览(183)

[虽然这是一个自我回答的问题，但我很乐意投票并接受任何替代答案，无论是在相同的计算工作量下提供上级准确度，还是在保持相同准确度的情况下减少计算工作量。]
我以前有demonstrated如何计算互补误差函数erfcf()，其最大误差小于3个ulps。这可以作为其他函数的构建块，例如标准正态分布的CDF Φ（x）= 1/2 erfc（-λ 1/2 x）或高斯Q函数，Q（x）= 1-Φ（x）= 1/2 erfc（λ 1/2 x）。然而，对于某些使用情况，不需要完全精确到单精度的计算，而erfc()评估对总运行时间的贡献不可忽略。
文献提供了对互补误差函数的各种低精度近似，但它们要么限于完整输入域的子集，要么针对绝对误差进行了优化，要么计算过于复杂，例如，需要多次调用超越函数。如何才能实现erfcf()具有高性能和大约5位十进制数的相对精度 * 在整个输入域中 *？

c

来源：https://stackoverflow.com/questions/77741402/fast-implementation-of-complementary-error-function-with-5-digit-accuracy

2条答案

按热度按时间

6yt4nkrj1#

以下假设平台符合IEEE-754（2008）浮点标准，在该平台上float被Map到IEEE-754 binary32，并且在32位整数和float之间使用相同的字节序。（如果需要，通过设置适当的命令行开关）保留IEEE-754语义。我使用的是带有开关-march=skylake-avx152 -O3 -fp-model=precise的Intel C/C++编译器。
由于互补误差函数关于（0，1）对称，因此可以关注正半平面中的函数输入。这里函数大致像exp（-x2）一样衰减，并且对于x > 10.5的参数，float计算下溢到零。如果将erfc（x）/ exp（-x2）绘制在[0，10.5]形状表明，用多项式近似有点困难，但应该很容易用有理函数近似，即，两个多项式的比率。一些初始实验表明，两个多项式的次数各为3应该足以达到五位数的精度。
虽然有许多工具可以生成多项式近似，但不幸的是，有理近似并非如此。我使用Remez算法的修改来生成初始极大极小近似R（x）= P（x）/Q（x）到erfc（x）/ exp（-x2），但必须进行相当广泛的启发式搜索，以获得一个近似值，该近似值提供 * 接近 * 误差峰值的等振荡，实现了10-5的相对误差，其余的差异对于我的需要来说可以忽略不计。
通过计算erfc（x）= exp（-x2）R（x），所获得的精度显然取决于平台expf()实现的精度。此功能的忠实全面的实现（最大误差<= 1ulp）是常见的。虽然英特尔编译器附带了一个高度精确的数学库，可以提供近乎正确的四舍五入实现（最大误差非常接近0.5 ulps），我也尝试了我自己的忠实四舍五入的替代my_expf()，误差更大，只观察到对fast_erfcf()精度的影响非常小。

#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#define USE_FMA          (1)
#define USE_BUILTIN_EXP  (0)
#if !USE_BUILTIN_EXP
float my_expf (float a);
#endif // USE_BUILTIN_EXP
/* Fast computation of the complementary error function. For argument x > 0
   erfc(x) = exp(-x*x) * P(x) / Q(x), where P(x) and Q(x) are polynomials. 
   If expf() is faithfully rounded, the following error bounds should hold:
   Maximum relative error < 1.065e-5, maximum absolute error < 9.50e-6, and 
   maximum ulp error < 176.5
*/
float fast_erfcf (float x)
{
    float a, c, e, p, q, r, s;
    a = fabsf (x);
    c = fminf (a, 10.5f);
    s = -c * c;
#if USE_BUILTIN_EXP
    e = expf (s);
#else // USE_BUILTIN_EXP
    e = my_expf (s);
#endif // USE_BUILTIN_EXP
#if USE_FMA
    q =             3.82346243e-1f;  //  0x1.8785c6p-2
    p =            -4.38094139e-5f;  // -0x1.6f8000p-15
    q = fmaf (q, c, 1.30382288e+0f); //  0x1.4dc756p+0
    p = fmaf (p, c, 2.16852024e-1f); //  0x1.bc1ceap-3
    q = fmaf (q, c, 1.85278833e+0f); //  0x1.da5056p+0
    p = fmaf (p, c, 7.23953605e-1f); //  0x1.72aa0cp-1
    q = fmaf (q, c, 9.99991655e-1f); //  0x1.fffee8p-1
    p = fmaf (p, c, 1.00000000e+0f); //  0x1.000000p+0
#else // USE_FMA
    q =         3.82346272e-1f; //  0x1.8785c8p-2f
    p =        -4.38243151e-5f; // -0x1.6fa000p-15
    q = q * c + 1.30382371e+0f; //  0x1.4dc764p+0
    p = p * c + 2.16852218e-1f; //  0x1.bc1d04p-3
    q = q * c + 1.85278797e+0f; //  0x1.da5050p+0
    p = p * c + 7.23953605e-1f; //  0x1.72aa0cp-1
    q = q * c + 9.99991596e-1f; //  0x1.fffee6p-1
    p = p * c + 1.00000000e+0f; //  0x1.000000p+0
#endif // USE_FMA
    r = e / q;
    r = r * p;
    if (x < 0.0f) r = 2.0f - r;
    if (isnan(x)) r = x + x;
    return r;
}
float uint32_as_float (uint32_t a)
{
    float r;
    memcpy (&r, &a, sizeof r);
    return r;
}
/* Exponential function. Maximum error 0.86565 ulps */
float my_expf (float a)
{
    float f, r, j, s, t;
    int i;
    unsigned int ia;
    // exp(a) = 2**i * exp(f); i = rintf (a / log(2))
    j = fmaf (1.442695f, a, 12582912.f); // 0x1.715476p0 // log2(e)
    j = j - 12582912.f; // 0x1.8p23 // 2**23+2**22
    f = fmaf (j, -6.93145752e-1f, a); // -0x1.62e400p-1  // log_2_hi 
    f = fmaf (j, -1.42860677e-6f, f); // -0x1.7f7d1cp-20 // log_2_lo 
    i = (int)j;
    // approximate r = exp(f) on interval [-log(2)/2, +log(2)/2]
    r =             1.37805939e-3f;  // 0x1.694000p-10
    r = fmaf (r, f, 8.37312452e-3f); // 0x1.125edcp-7
    r = fmaf (r, f, 4.16695364e-2f); // 0x1.555b5ap-5
    r = fmaf (r, f, 1.66664720e-1f); // 0x1.555450p-3
    r = fmaf (r, f, 4.99999851e-1f); // 0x1.fffff6p-2
    r = fmaf (r, f, 1.00000000e+0f); // 0x1.000000p+0
    r = fmaf (r, f, 1.00000000e+0f); // 0x1.000000p+0
    // exp(a) = 2**i * r
    ia = (i > 0) ? 0 : 0x83000000;
    s = uint32_as_float (0x7f000000 + ia);
    t = uint32_as_float ((i << 23) - ia);
    r = r * s;
    r = r * t;
    // handle special cases: severe overflow / underflow
    if (fabsf (a) >= 104.0f) r = (a < 0) ? 0.0f : INFINITY;
    return r;
}
uint32_t float_as_uint32 (float a)
{
    uint32_t r;
    memcpy (&r, &a, sizeof r);
    return r;
}
uint64_t double_as_uint64 (double a)
{
    uint64_t r;
    memcpy (&r, &a, sizeof r);
    return r;
}
double floatUlpErr (float res, double ref)
{
    uint64_t refi, i, j, err;
    int expoRef;
    
    /* ulp error cannot be computed if either operand is NaN, infinity, zero */
    if (isnan (res) || isnan (ref) || isinf(res) || isinf (ref) ||
        (res == 0.0f) || (ref == 0.0)) {
        return 0.0;
    }
    i = ((int64_t)float_as_uint32 (res)) << 32;
    expoRef = (int)(((double_as_uint64 (ref) >> 52) & 0x7ff) - 1023);
    refi = double_as_uint64 (ref);
    if (expoRef >= 129) {
        j = (refi & 0x8000000000000000ULL) | 0x7fffffffffffffffULL;
    } else if (expoRef < -126) {
        j = ((refi << 11) | 0x8000000000000000ULL) >> 8;
        j = j >> (-(expoRef + 126));
        j = j | (refi & 0x8000000000000000ULL);
    } else {
        j = ((refi << 11) & 0x7fffffffffffffffULL) >> 8;
        j = j | ((uint64_t)(expoRef + 127) << 55);
        j = j | (refi & 0x8000000000000000ULL);
    }
    err = (i < j) ? (j - i) : (i - j);
    return err / 4294967296.0;
}
int main (void)
{
    uint32_t argi, resi, refi, diff;
    float arg, res, reff, abserrloc = NAN, relerrloc = NAN, ulperrloc = NAN;
    double ref, relerr, abserr, ulperr;
    double maxabserr = 0, maxrelerr = 0, maxulperr = 0;
    argi = 0;
    do {
        arg = uint32_as_float (argi);
        ref = erfc ((double)arg);
        res = fast_erfcf (arg);
        reff = (float)ref;
        resi = float_as_uint32 (res);
        refi = float_as_uint32 (reff);
        ulperr = floatUlpErr (res, ref);
        if (ulperr > maxulperr) {
            ulperrloc = arg;
            maxulperr = ulperr;
        }
        abserr = fabs (res - ref);
        if (abserr > maxabserr) {
            abserrloc = arg;
            maxabserr = abserr;
        }
        if (fabs (ref) >= 0x1.0p-126) {
            relerr = fabs ((res - ref) / ref);
            if (relerr > maxrelerr) {
                relerrloc = arg;
                maxrelerr = relerr;
            }
        }
        diff = (resi > refi) ? (resi - refi) : (refi - resi);
        if (diff > 200) {
            printf ("diff=%u @ %15.8e : res=% 15.8e  ref=% 15.8e\n", 
                    diff, arg, res, ref);
            return EXIT_FAILURE;
        }
        argi++;
    } while (argi);
    printf ("max rel err = %.6e @ % 15.8e\n"
            "max abs err = %.6e @ % 15.8e\n"
            "max ulp err = %.6e @ % 15.8e\n",
            maxrelerr, relerrloc, 
            maxabserr, abserrloc,
            maxulperr, ulperrloc);
    return EXIT_SUCCESS;
}

字符串

展开查看全部

赞(0）回复(0）举报 2024-01-06

exdqitrt2#

OP提供了用于评估性能测试的范围的注解：1 ULP步骤中的0.0至10.5。
由于所有float [-1.6e-08...+1.6e-08]中约有40%返回值约为1.0，代码可以使用以下内容。对我来说，整体性能至少 * 翻了一番 *。

float fast_erfcf(float x) {
  if (fabsf(x) <= 1.60e-8) {  // or maybe a bit more.
    return 1.0;
  }
  ...
}

字符串
由于erfc(x)中float的大范围可以返回1.0，因此我建议进行这种预先测试。
优点包括：

对于所有float的40%来说，速度要快得多。
erfc(x_near_0)预期返回1.0。OP的fast_erfcf(0.0)返回1.00000834。考虑到可以容忍的大ULP，可以接受。1.0仍然很好。
OP的fast_erfcf(small negative value)返回0.999991536。可以接受，因为可以容忍较大的ULP，但在可能的情况下，希望有erfc(x) >= erfc(next x towards + infinity)。

此外：建议在[0.1.... 10.0]范围内对最感兴趣的值进行性能测试。
这个建议对OP没有多大帮助，所以把它移到这个答案的底部。
与其用my_expf(float a)执行 ex，不如用my_exp2f(float a)执行 2x（这会影响有理函数的常数）。
j = fmaf(1.442695f, a, 12582912.f);被删除。
使用base 2比base e 执行取幂稍微容易一些。
我估计代码会快几个百分点（~3%），而ULP结果不会有太大变化。
确实，这是一个微优化，但它实际上是免费的，因为它不会给链接答案的方法增加任何真实的复杂性。

展开查看全部

赞(0）回复(0）举报 2024-01-06

我来回答

C语言 5位精度互补误差函数的快速实现

2条答案

相关问题

热门标签

最新问答