diff options
Diffstat (limited to 'lib/defs.h')
| -rw-r--r-- | lib/defs.h | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/lib/defs.h b/lib/defs.h new file mode 100644 index 0000000..e645532 --- /dev/null +++ b/lib/defs.h @@ -0,0 +1,119 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#pragma once + +#include "fresample.h" + +#define LFR_UNREACHABLE (void) 0 + +#if defined(__clang__) +# if __has_builtin(__builtin_unreachable) +# undef LFR_UNREACHABLE +# define LFR_UNREACHABLE __builtin_unreachable() +# endif +#elif defined(__GNUC__) +# if (__GNUC__ >= 4 && __GNU_MINOR__ >= 5) || __GNUC__ > 4 +# undef LFR_UNREACHABLE +# define LFR_UNREACHABLE __builtin_unreachable() +# endif +#endif + +#define INLINE static inline + +/* + Constants used by dithering algorithm. We usa a simple linear + congruential generator to generate a uniform signal for dithering, + taking the high order bits: + + x_{n+1} = (A * x_n + C) mod 2^32 + + The derived constants, AN/CN, are used for stepping the LCG forward + by N steps. AI is the inverse of A. +*/ + +constexpr auto LCG_A = 1103515245u; +constexpr auto LCG_A2 = 3265436265u; +constexpr auto LCG_A4 = 3993403153u; + +constexpr auto LCG_C = 12345u; +constexpr auto LCG_C2 = 3554416254u; +constexpr auto LCG_C4 = 3596950572u; + +constexpr auto LCG_AI = 4005161829u; +constexpr auto LCG_CI = 4235699843u; + +/* ==================== + Utility functions + ==================== */ + +#if defined(LFR_SSE2) && defined(LFR_CPU_X86) +#include <emmintrin.h> + +/* + Store 16-bit words [i0,i1) in the given location. +*/ +INLINE void lfr_storepartial_epi16(__m128i *dest, __m128i x, int i0, int i1) +{ + union { + unsigned short h[8]; + __m128i x; + } u; + u.x = x; + for (int i = i0; i < i1; ++i) + ((unsigned short *) dest)[i] = u.h[i]; +} + +/* + Advance four linear congruential generators. The four generators + should use the same A and C constants. + + The 32-bit multiply we want requires SSE 4.1. We construct it out of + two 32 to 64 bit multiply operations. +*/ +INLINE __m128i +lfr_rand_epu32(__m128i x, __m128i a, __m128i c) +{ + return _mm_add_epi32( + _mm_unpacklo_epi32( + _mm_shuffle_epi32( + _mm_mul_epu32(x, a), + _MM_SHUFFLE(0, 0, 2, 0)), + _mm_shuffle_epi32( + _mm_mul_epu32(_mm_srli_si128(x, 4), a), + _MM_SHUFFLE(0, 0, 2, 0))), + c); +} + +#endif + +#if defined(LFR_ALTIVEC) && defined(LFR_CPU_PPC) +#if !defined(__APPLE_ALTIVEC__) +#include <altivec.h> +#endif + +/* + Advance four linear congruential generators. The four generators + should use the same A and C constants. + + The 32-bit multiply we want does not exist. We construct it out of + 16-bit multiply operations. +*/ +INLINE vector unsigned int +lfr_vecrand(vector unsigned int x, vector unsigned int a, + vector unsigned int c) +{ + vector unsigned int s = vec_splat_u32(-16); + return vec_add( + vec_add( + vec_mulo( + (vector unsigned short) x, + (vector unsigned short) a), + c), + vec_sl( + vec_msum( + (vector unsigned short) x, + (vector unsigned short) vec_rl(a, s), + vec_splat_u32(0)), + s)); +} + +#endif
\ No newline at end of file |
