Floating Point Constant Values In C++, CUDA, and Python

08-22-202508-22-2025 blog 6 minutes read (About 889 words) visits

Introduction

Floating-point constants, such as infinity, maximum, minimum, and zero, are essential for many numerical algorithms and scientific computations.

In this blog post, I would like to discuss how to use floating-point constant values in C++, CUDA, and Python.

Floating Point Constant Values In C++, CUDA, and Python

The usages and assertions of floating-point constants in C++, CUDA, and Python are demonstrated in the following examples.

C++ Example

floating_point.cpp

#include <limits>
#include <stdfloat>

#include <cassert>
#include <cmath>

template <typename T>
constexpr bool stdfloat_has_special_values()
{
    return std::numeric_limits<T>::has_infinity &&
           std::numeric_limits<T>::has_quiet_NaN;
}

// Constexpr operator overloading for == by checking bitwise equality.
template <typename T>
constexpr bool operator==(T const& lhs, T const& rhs)
{
    // Check if bitwise identical using & and ~.
    return (lhs & ~rhs) == 0 && (rhs & ~lhs) == 0;
}

template <typename T>
constexpr bool operator!=(T const& lhs, T const& rhs)
{
    return !(lhs == rhs);
}

int main()
{
    static_assert(__STDCPP_FLOAT16_T__ == 1, "No float16 support");
    static_assert(__STDCPP_FLOAT32_T__ == 1, "No float32 support");
    static_assert(__STDCPP_FLOAT64_T__ == 1, "No float64 support");
    static_assert(__STDCPP_FLOAT128_T__ == 1, "No float128 support");
    static_assert(__STDCPP_BFLOAT16_T__ == 1, "No bfloat16 support");

    static_assert(stdfloat_has_special_values<std::float16_t>(),
                  "No special values for float16");
    static_assert(stdfloat_has_special_values<std::float32_t>(),
                  "No special values for float32");
    static_assert(stdfloat_has_special_values<std::float64_t>(),
                  "No special values for float64");
    static_assert(stdfloat_has_special_values<std::float128_t>(),
                  "No special values for float128");
    static_assert(stdfloat_has_special_values<std::bfloat16_t>(),
                  "No special values for bfloat16");

    static_assert(std::numeric_limits<std::float16_t>::infinity() >
                      std::numeric_limits<std::float16_t>::max(),
                  "No infinity for float16");
    static_assert(std::numeric_limits<std::float32_t>::infinity() >
                      std::numeric_limits<std::float32_t>::max(),
                  "No infinity for float32");
    static_assert(std::numeric_limits<std::float64_t>::infinity() >
                      std::numeric_limits<std::float64_t>::max(),
                  "No infinity for float64");
    static_assert(std::numeric_limits<std::float128_t>::infinity() >
                      std::numeric_limits<std::float128_t>::max(),
                  "No infinity for float128");
    static_assert(std::numeric_limits<std::bfloat16_t>::infinity() >
                      std::numeric_limits<std::bfloat16_t>::max(),
                  "No infinity for bfloat16");

    static_assert(-std::numeric_limits<std::float16_t>::infinity() <
                      std::numeric_limits<std::float16_t>::lowest(),
                  "No negative infinity for float16");
    static_assert(-std::numeric_limits<std::float32_t>::infinity() <
                      std::numeric_limits<std::float32_t>::lowest(),
                  "No negative infinity for float32");
    static_assert(-std::numeric_limits<std::float64_t>::infinity() <
                      std::numeric_limits<std::float64_t>::lowest(),
                  "No negative infinity for float64");
    static_assert(-std::numeric_limits<std::float128_t>::infinity() <
                      std::numeric_limits<std::float128_t>::lowest(),
                  "No negative infinity for float128");
    static_assert(-std::numeric_limits<std::bfloat16_t>::infinity() <
                      std::numeric_limits<std::bfloat16_t>::lowest(),
                  "No negative infinity for bfloat16");

    // std::exp is not a constexpr function.
    assert(std::exp(-std::numeric_limits<std::float16_t>::infinity()) ==
           0.0f16);
    assert(std::exp(-std::numeric_limits<std::float32_t>::infinity()) ==
           0.0f32);
    assert(std::exp(-std::numeric_limits<std::float64_t>::infinity()) ==
           0.0f64);
    assert(std::exp(-std::numeric_limits<std::float128_t>::infinity()) ==
           0.0f128);
    assert(std::exp(-std::numeric_limits<std::bfloat16_t>::infinity()) ==
           0.0bf16);
}

1 2	$ g++ floating_point.cpp -o floating_point_cpp -std=c++23 $ ./floating_point_cpp

CUDA Example

floating_point.cu

#include <cassert>

#include <cuda_bf16.h>
#include <cuda_fp16.h>

// https://docs.nvidia.com/cuda/archive/12.8.0/cuda-math-api/cuda_math_api/group__CUDA__MATH__INTRINSIC__HALF__CONSTANTS.html
constexpr unsigned short CUDART_INF_FP16_BITS = 0x7C00U;
constexpr unsigned short CUDART_MAX_NORMAL_FP16_BITS = 0x7BFFU;
constexpr unsigned short CUDART_MIN_DENORM_FP16_BITS = 0x0001U;
constexpr unsigned short CUDART_ZERO_FP16_BITS = 0x0000U;

// https://docs.nvidia.com/cuda/archive/12.8.0/cuda-math-api/cuda_math_api/group__CUDA__MATH__INTRINSIC__BFLOAT16__CONSTANTS.html
constexpr unsigned short CUDART_INF_BF16_BITS = 0x7F80U;
constexpr unsigned short CUDART_MAX_NORMAL_BF16_BITS = 0x7F7FU;
constexpr unsigned short CUDART_MIN_DENORM_BF16_BITS = 0x0001U;
constexpr unsigned short CUDART_ZERO_BF16_BITS = 0x0000U;

int main()
{
    assert(CUDART_INF_FP16 ==
           reinterpret_cast<__half const&>(CUDART_INF_FP16_BITS));
    assert(CUDART_MAX_NORMAL_FP16 ==
           reinterpret_cast<__half const&>(CUDART_MAX_NORMAL_FP16_BITS));
    assert(CUDART_MIN_DENORM_FP16 ==
           reinterpret_cast<__half const&>(CUDART_MIN_DENORM_FP16_BITS));
    assert(CUDART_ZERO_FP16 ==
           reinterpret_cast<__half const&>(CUDART_ZERO_FP16_BITS));
    assert(CUDART_INF_FP16 > CUDART_MAX_NORMAL_FP16);
    assert(-CUDART_INF_FP16 < -CUDART_MAX_NORMAL_FP16);

    assert(CUDART_INF_BF16 ==
           reinterpret_cast<__nv_bfloat16 const&>(CUDART_INF_BF16_BITS));
    assert(CUDART_MAX_NORMAL_BF16 ==
           reinterpret_cast<__nv_bfloat16 const&>(CUDART_MAX_NORMAL_BF16_BITS));
    assert(CUDART_MIN_DENORM_BF16 ==
           reinterpret_cast<__nv_bfloat16 const&>(CUDART_MIN_DENORM_BF16_BITS));
    assert(CUDART_ZERO_BF16 ==
           reinterpret_cast<__nv_bfloat16 const&>(CUDART_ZERO_BF16_BITS));
    assert(CUDART_INF_BF16 > CUDART_MAX_NORMAL_BF16);
    assert(-CUDART_INF_BF16 < -CUDART_MAX_NORMAL_BF16);
}

1 2	$ nvcc floating_point.cu -o floating_point_cuda $ ./floating_point_cuda

Python Example

floating_point.py

import numpy as np

if __name__ == "__main__":

    float16_inf = np.float16("inf")
    float32_inf = np.float32("inf")
    float64_inf = np.float64("inf")

    float16_max = np.finfo(np.float16).max
    float32_max = np.finfo(np.float32).max
    float64_max = np.finfo(np.float64).max

    float16_min = np.finfo(np.float16).min
    float32_min = np.finfo(np.float32).min
    float64_min = np.finfo(np.float64).min

    assert float16_inf > float16_max
    assert float32_inf > float32_max
    assert float64_inf > float64_max

    assert -float16_inf < float16_min
    assert -float32_inf < float32_min
    assert -float64_inf < float64_min

    assert np.exp(-float16_inf) == np.float16(0)
    assert np.exp(-float32_inf) == np.float32(0)
    assert np.exp(-float64_inf) == np.float64(0)

1	$ python floating_point.py

References

Floating Point Constant Values In C++, CUDA, and Python

https://leimao.github.io/blog/Floating-Point-Constant-Values-CPP-CUDA-Python/

Author

Lei Mao

Posted on

08-22-2025

Updated on

08-22-2025

Licensed under

CPP,

Python,

CUDA

Floating Point Constant Values In C++, CUDA, and Python

Introduction

Floating Point Constant Values In C++, CUDA, and Python

C++ Example

CUDA Example

Python Example

References

Author

Posted on

Updated on

Licensed under

Like this article? Support the author with

Comments

Advertisement

Catalogue