CUDA Support

Description

The types of this library support compilation with NVCC. To get the safety guarantees, there are some small modifications to the way that CUDA code is written. Normally you would have something like this:

using test_type = boost::safe_numbers::u128;

__global__ void cuda_test(const test_type *in, test_type *out, int numElements)
{
    int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i < numElements)
    {
        out[i] = boost::safe_numbers::bit_ceil(in[i]);
    }
}

int main()
{
    // Setup: generate inputs, allocate space for the output, etc.

    // Launch the CUDA kernel, and then synchronize the results and get errors (if they exist)
    cudaError_t err = cudaSuccess;
    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
    cudaDeviceSynchronize();
    err = cudaGetLastError();

    if (err != cudaSuccess)
    {
        // Handle error
    }
}

For the on-device computation behavior to match the CPU computation behavior, we have our own error context class This reduces our above example to the following:

using test_type = boost::safe_numbers::u128;

__global__ void cuda_test(const test_type *in, test_type *out, int numElements)
{
    int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i < numElements)
    {
        out[i] = boost::safe_numbers::bit_ceil(in[i]);
    }
}

int main()
{
    // Setup: generate inputs, allocate space for the output, etc.

    // First, initialize the device error context
    // Then, launch the CUDA kernel, and then synchronize the results and get errors (if they exist)
    // Upon synchronization, the device_error_context will throw the same exception type as the operation would have on the host
    boost::safe_numbers::device_error_context ctx;
    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);

    try
    {
        // ctx.synchronize() will internally call BOOST_THROW_EXCEPTION if an error occured on device
        ctx.synchronize();
    }
    catch (const /*exception type*/& e)
    {
        // Perform error handling
    }
}

An exception thrown by ctx.synchronize() will have an e.what() that looks something like:

Device error on thread 256 at /home/runner/work/safe_numbers/boost-root/libs/safe_numbers/include/boost/safe_numbers/detail/unsigned_integer_basis.hpp:1067: Underflow detected in u16 subtraction

The device_error_context will also attempt to printf the error into the terminal. This works when compiling with verbose mode -V. printf error messages will look the same as the message displayed by