8
\$\begingroup\$

If this code is written correctly, it will block undefined behavior from happening in the case of signed overflow (guaranteeing that the program halts instead), and also guarantee that the program halts in the case of unsigned overflow/wraparound.

//overflow.h
#define err exit(1)
//overflow blocking code based on details at https://cplusplus.com/articles/DE18T05o/
#include <inttypes.h>

#define adr size_t
#define long int64_t
#define ulong uint64_t
const long LZ = ((long)0);
long divL(long v, long b)
{
    if (b == LZ)
        err;
    if ((v == INT64_MIN) && (b == ((long)(-1))))
        err;
    return (long)(v / b);
}
ulong usdivL(ulong v, ulong b)
{
    if (b == ((ulong)0))
        err; 

    return (ulong)(v / b);
}
adr u_as_adr(ulong n)
{
    if (n > SIZE_MAX)
        err;
    return (adr)n;
}

#ifdef __GNUC__
long add(long a, long b)
{
    long r = 0;
    if (__builtin_add_overflow(a, b, &r))
        err;
    return r;
}
long sub(long a, long b)
{
    long r = 0;
    if (__builtin_sub_overflow(a, b, &r))
        err;
    return r;
}
long mul(long a, long b)
{
    long r = 0;
    if (__builtin_mul_overflow(a, b, &r))
        err;
    return r;
}

adr uadd(adr a, adr b)
{
    adr r = 0;
    if (__builtin_add_overflow(a, b, &r))
        err;
    return r;
}
adr usub(adr a, adr b)
{
    adr r = 0;
    if (__builtin_sub_overflow(a, b, &r))
        err;
    return r;
}
adr umul(adr a, adr b)
{
    adr r = 0;
    if (__builtin_mul_overflow(a, b, &r))
        err;
    return r;
}

ulong usadd(ulong a, ulong b)
{
    ulong r = 0;
    if (__builtin_add_overflow(a, b, &r))
        err;
    return r;
}
ulong ussub(ulong a, ulong b)
{
    ulong r = 0;
    if (__builtin_sub_overflow(a, b, &r))
        err;
    return r;
}
ulong usmul(ulong a, ulong b)
{
    ulong r = 0;
    if (__builtin_mul_overflow(a, b, &r))
        err;
    return r;
}

#else

ulong usadd(ulong a, ulong b)
{
    ulong n = a + b;// C standard guarantees wraparound here
    if (n < a)
        err;
    return n;
}
ulong ussub(ulong a, ulong b)
{
    if (b > a)
        err;
    return a - b;
}

ulong usmul(ulong a, ulong b)
{if(b==0)return 0;
    if (a > (usdivL(UINT64_MAX, b)))
        err;
    return a * b;
}

long inv(long n)
{//inverting a number means subtracting it from zero
    if (n == LZ)
        return n;
    if (n == INT64_MIN)
        err;
    return LZ - n;
}
long clamp(ulong n)
{
    if (n > ((ulong)(INT64_MAX)))
        err;
    return (long)n;
}

long mul(long a, long b)
{ //signed multiplication
    if ((b == LZ) || (a == LZ))
        return LZ;
    if (b == ((long)1))
        return a;
    if (a == ((long)1))
        return b;
    long an = 0;
    long bn = 0;
    if (a < LZ)
        an = inv(a);
    else
        an = a;
    if (b < LZ)
        bn = inv(b);
    else
        bn = b;
    ulong r = usmul((ulong)an, (ulong)bn);
    if ((a < LZ) != (b < LZ))
    {
        if (r == (((ulong)(INT64_MAX)) + ((ulong)1)))
            return INT64_MIN;
        return inv(clamp(r));
    }
    return clamp(r);
    
} 

adr uadd(adr a, adr b) { return u_as_adr(usadd((ulong)a, (ulong)b)); }
adr usub(adr a, adr b) { return u_as_adr(ussub((ulong)a, (ulong)b)); }
adr umul(adr a, adr b) { return u_as_adr(usmul((ulong)a, (ulong)b)); }

long sub(long a, long b)
{ //signed subtraction
    if (b == LZ)
        return a;
    if (a == LZ)
        return inv(b);
    if (a > LZ)
    {
        if (b > LZ)
        {
            return a - b;
        }
        else
        {//(a)-(-b)=a+b
            return clamp(usadd((ulong)(a), (ulong)(inv(b))));
        }
    }
    else
    {
        if (b > LZ)
        {//(-a)-(b)=-(a+b)
            return inv(clamp(usadd((ulong)(inv(a)), (ulong)(b))));
        }
        else
        {//(-a)-(-b)=b-a=b+(-a)
            return a - b;
        }
    }
}
long add(long a, long b)
{ 
    if (a == LZ)
        return b;
    if (b == LZ)
        return a;
    if ((a > LZ) != (b > LZ))//overflow can't happen if signs are opposite
        return a + b;
    if (a == INT64_MIN)//can't invert the minimum
        return sub(a, inv(b));//a+b=b+a=a-(-b)
    return sub(b, inv(a));
}
#endif
\$\endgroup\$
4
  • 1
    \$\begingroup\$ Learning exercises aside, don't reinvent the wheel; use the SafeInt library that already exists ("it's nearly 7300 LOC"). (Yes, the library was originally/primarily designed for C++, but there is a C variant. Read the readme carefully.) \$\endgroup\$
    – Cody Gray
    Commented Oct 13, 2022 at 7:35
  • \$\begingroup\$ hahaha #define err exit(1) haven't seen something like that. do people really do it? \$\endgroup\$ Commented Oct 13, 2022 at 11:30
  • 2
    \$\begingroup\$ Never use mysterious crap types over standardized integer types. Every single #define in this code needs to go. \$\endgroup\$
    – Lundin
    Commented Oct 13, 2022 at 14:15
  • 1
    \$\begingroup\$ Never use a macro where a proper type alias (with typedef) will do. (These macros are even worse because they intend to replace keywords) \$\endgroup\$
    – Ben Voigt
    Commented Oct 13, 2022 at 16:55

4 Answers 4

21
\$\begingroup\$

This code has a lot of problems.

#define err exit(1)

exit(1) is not a good way to terminate the program when trapping a bug. abort or an assert would be better. However, terminating the program is not an appropriate action for library code anyway. A good overflow-catching function set would be setup for the caller to handle the case, since normally the possibility of integer overflow is something that arises naturally out of the ability to provide arbitrary inputs, and a reasonable program should treat this as a reportable error condition, not terminate unexpectedly.

const long LZ = ((long)0);

This creates a variable with external linkage named LZ which could have any value assigned to it, and results in code which loads the value of that variable to compare against it each time. This is gratuitously dangerous and costly at runtime, and confusing to the reader. There is no reason not to just write 0 or 0L if you want to force it to have type long (not necessary in your usage).

#define adr size_t
#define long int64_t
#define ulong uint64_t

Defining long as a macro results in undefined behavior and is very confusing. There is no legitimate reason to do this.

#else

...

The portable (non-__GNUC__) versions of these functions are overcomplicated and not obviously correct, and it should be possible to express them a lot more simply.

Overall (and this is my subjective opinion) this code suffers from a common thing I see with new C programmers: wanting to design non-idiomatic programming frameworks rather than actually get stuff done in the language. This in turn makes your code error-prone, hard to read, and non-productive. Trying to redefine standard types contrary to their normal meaning and use weird custom-named types (is adr and attempt to pretend size_t values are addresses? they're not...) is another manifestation of this.

\$\endgroup\$
1
  • 2
    \$\begingroup\$ Good review, I strongly agree with everything said. \$\endgroup\$
    – Lundin
    Commented Oct 13, 2022 at 14:17
17
\$\begingroup\$

I'm pretty sure this causes undefined behaviour:

#define long int64_t

Even if I'm wrong, it's extremely confusing to readers, and will break any subsequently-included headers.

It should be possible (e.g. by repeated inclusion) to make a set of these functions for the full range of arithmetic types.

Simple exit(1) on error isn't very informative (for example, doesn't cause a core dump that can be examined). I recommend producing some output to stderr and/or raising a signal (probably by using abort()).

\$\endgroup\$
3
  • 5
    \$\begingroup\$ I'd even recommend using err() from <err.h>, and if it doesn't exist, provide a drop-in replacement for it. \$\endgroup\$
    – G. Sliepen
    Commented Oct 11, 2022 at 14:37
  • 3
    \$\begingroup\$ For completeness, the reason why this is bad is because int64_t has a fixed size of 64 bits while long is defined as a range [-(2^31-1), 2^31-1] which requires at least 32 bits (bonus: the C standard doesn't even mandate using two's complement) \$\endgroup\$
    – qwr
    Commented Oct 13, 2022 at 6:00
  • \$\begingroup\$ *at least holding that range \$\endgroup\$
    – qwr
    Commented Oct 13, 2022 at 16:37
3
\$\begingroup\$

Document your functions! (Other answers already went through the issues of #define long int64_t)

For example for divL, even minimal documentation makes the library much easier to use. It's not the usual C naming style to use camel case, so maybe the function is better named div_long or divl (in C++ you can have overloaded div functions based on type). By the way, the technical term for the quantities involved in division are dividend, divisor, and quotient, although numerator and denominator are also common.

Here's some very basic documentation:

/* Returns integer division of v / b. 
 * Throws an error for unsafe cases: division by zero OR if (v is INT64_MIN and b is -1).  
 */
long divL(long v, long b)
{
    if (b == LZ)
        err;
    if ((v == INT64_MIN) && (b == ((long)(-1))))
        err;
    return (long)(v / b);
}

Test your code! Since you start with "If this code is written correctly", you should ensure that by writing many test cases, especially for a library designed to have safe functions.

\$\endgroup\$
1
\$\begingroup\$

This is a good and interesting idea, and the actual integer math part of the implementation looks good. (And use of GNU C builtins should help efficiency.)

But the type name #defines are crazy, cluttering the global namespace and not matching what the types you picked are for. size_t is for object sizes, not addresses. int64_t is long long on some systems. It's completely insane to #define an existing primitive type like long to something else.

#define adr size_t
#define long int64_t
#define ulong uint64_t

Don't make the common case run even more code to check for special-case fast paths unless they're much faster, and those special values actually happen very often.

long inv(long n)
{
    //if (n == LZ)        // -n  is already safe
    //    return n;
    if (INT64_MIN != -INT64_MAX && n == INT64_MIN)   // only 2's complement can overflow
        err;
    return LZ - n;        // 0UL - n  uses unsigned wrapping, but we don't need that because we already checked for wrapping.
}

Don't cast when there's no conversion:

signed div_signed(long v, long b)
{
    if (b == LZ)
        err;
    if ((v == INT64_MIN) && (b == ((long)(-1))))
        err;
    return (long)(v / b);      // v/b already has type 
}

(I was going to write more, but got side-tracked and have had this tab sitting around for a month. I'll post this for now.)

\$\endgroup\$

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.