$darkmode
Suckless OGL 1.0.0
A lean, high-performance C11 PBR Renderer
simd_utils.c File Reference
#include "simd_utils.h"
#include "log.h"
#include <immintrin.h>
#include <stdbool.h>
#include <stdint.h>
Include dependency graph for simd_utils.c:

Functions

static uint16_t float_to_half_soft (float value)
 
void convert_float_to_half_simd (const float *src, uint16_t *dst, size_t count)
 Converts an array of 32-bit floats to 16-bit half-floats using SIMD (AVX2/F16C). More...
 

Variables

static const uint32_t SIGN_MASK = 0x80000000U
 
static const uint32_t EXP_MASK = 0x7F800000U
 
static const uint32_t MANT_MASK = 0x007FFFFFU
 
static const uint32_t EXP_SHIFT = 23U
 
static const int EXP_BIAS_F32 = 127
 
static const int EXP_BIAS_F16 = 15
 
static const int EXP_MAX_F16 = 31
 
static const uint32_t EXP_MAX_F32 = 255U
 
static const uint32_t MANT_SHIFT_DIFF = 13U
 
static const uint32_t HALF_MANT_SHIFT = 10U
 
static const uint16_t HALF_QNAN_BIT = 0x0200U
 
static const uint32_t HALF_SIGN_SHIFT = 16U
 

Function Documentation

◆ convert_float_to_half_simd()

void convert_float_to_half_simd ( const float *  src,
uint16_t *  dst,
size_t  count 
)

Converts an array of 32-bit floats to 16-bit half-floats using SIMD (AVX2/F16C).

This function utilizes hardware acceleration to perform the conversion significantly faster than a scalar implementation. It falls back to a scalar loop for the remaining elements (tail) or if SIMD is completely unavailable at compile time.

Parameters
srcPointer to the source array of floats.
dstPointer to the destination array of half-floats (uint16_t storage).
countNumber of elements to convert.
Here is the call graph for this function:

◆ float_to_half_soft()

static uint16_t float_to_half_soft ( float  value)
static

Variable Documentation

◆ EXP_BIAS_F16

const int EXP_BIAS_F16 = 15
static

◆ EXP_BIAS_F32

const int EXP_BIAS_F32 = 127
static

◆ EXP_MASK

const uint32_t EXP_MASK = 0x7F800000U
static

◆ EXP_MAX_F16

const int EXP_MAX_F16 = 31
static

◆ EXP_MAX_F32

const uint32_t EXP_MAX_F32 = 255U
static

◆ EXP_SHIFT

const uint32_t EXP_SHIFT = 23U
static

◆ HALF_MANT_SHIFT

const uint32_t HALF_MANT_SHIFT = 10U
static

◆ HALF_QNAN_BIT

const uint16_t HALF_QNAN_BIT = 0x0200U
static

◆ HALF_SIGN_SHIFT

const uint32_t HALF_SIGN_SHIFT = 16U
static

◆ MANT_MASK

const uint32_t MANT_MASK = 0x007FFFFFU
static

◆ MANT_SHIFT_DIFF

const uint32_t MANT_SHIFT_DIFF = 13U
static

◆ SIGN_MASK

const uint32_t SIGN_MASK = 0x80000000U
static