aboutsummaryrefslogtreecommitdiffstats
path: root/lib/defs.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/defs.h')
-rw-r--r--lib/defs.h119
1 files changed, 119 insertions, 0 deletions
diff --git a/lib/defs.h b/lib/defs.h
new file mode 100644
index 0000000..e645532
--- /dev/null
+++ b/lib/defs.h
@@ -0,0 +1,119 @@
+/* Copyright 2012 Dietrich Epp <depp@zdome.net> */
+#pragma once
+
+#include "fresample.h"
+
+#define LFR_UNREACHABLE (void) 0
+
+#if defined(__clang__)
+# if __has_builtin(__builtin_unreachable)
+# undef LFR_UNREACHABLE
+# define LFR_UNREACHABLE __builtin_unreachable()
+# endif
+#elif defined(__GNUC__)
+# if (__GNUC__ >= 4 && __GNU_MINOR__ >= 5) || __GNUC__ > 4
+# undef LFR_UNREACHABLE
+# define LFR_UNREACHABLE __builtin_unreachable()
+# endif
+#endif
+
+#define INLINE static inline
+
+/*
+ Constants used by dithering algorithm. We usa a simple linear
+ congruential generator to generate a uniform signal for dithering,
+ taking the high order bits:
+
+ x_{n+1} = (A * x_n + C) mod 2^32
+
+ The derived constants, AN/CN, are used for stepping the LCG forward
+ by N steps. AI is the inverse of A.
+*/
+
+constexpr auto LCG_A = 1103515245u;
+constexpr auto LCG_A2 = 3265436265u;
+constexpr auto LCG_A4 = 3993403153u;
+
+constexpr auto LCG_C = 12345u;
+constexpr auto LCG_C2 = 3554416254u;
+constexpr auto LCG_C4 = 3596950572u;
+
+constexpr auto LCG_AI = 4005161829u;
+constexpr auto LCG_CI = 4235699843u;
+
+/* ====================
+ Utility functions
+ ==================== */
+
+#if defined(LFR_SSE2) && defined(LFR_CPU_X86)
+#include <emmintrin.h>
+
+/*
+ Store 16-bit words [i0,i1) in the given location.
+*/
+INLINE void lfr_storepartial_epi16(__m128i *dest, __m128i x, int i0, int i1)
+{
+ union {
+ unsigned short h[8];
+ __m128i x;
+ } u;
+ u.x = x;
+ for (int i = i0; i < i1; ++i)
+ ((unsigned short *) dest)[i] = u.h[i];
+}
+
+/*
+ Advance four linear congruential generators. The four generators
+ should use the same A and C constants.
+
+ The 32-bit multiply we want requires SSE 4.1. We construct it out of
+ two 32 to 64 bit multiply operations.
+*/
+INLINE __m128i
+lfr_rand_epu32(__m128i x, __m128i a, __m128i c)
+{
+ return _mm_add_epi32(
+ _mm_unpacklo_epi32(
+ _mm_shuffle_epi32(
+ _mm_mul_epu32(x, a),
+ _MM_SHUFFLE(0, 0, 2, 0)),
+ _mm_shuffle_epi32(
+ _mm_mul_epu32(_mm_srli_si128(x, 4), a),
+ _MM_SHUFFLE(0, 0, 2, 0))),
+ c);
+}
+
+#endif
+
+#if defined(LFR_ALTIVEC) && defined(LFR_CPU_PPC)
+#if !defined(__APPLE_ALTIVEC__)
+#include <altivec.h>
+#endif
+
+/*
+ Advance four linear congruential generators. The four generators
+ should use the same A and C constants.
+
+ The 32-bit multiply we want does not exist. We construct it out of
+ 16-bit multiply operations.
+*/
+INLINE vector unsigned int
+lfr_vecrand(vector unsigned int x, vector unsigned int a,
+ vector unsigned int c)
+{
+ vector unsigned int s = vec_splat_u32(-16);
+ return vec_add(
+ vec_add(
+ vec_mulo(
+ (vector unsigned short) x,
+ (vector unsigned short) a),
+ c),
+ vec_sl(
+ vec_msum(
+ (vector unsigned short) x,
+ (vector unsigned short) vec_rl(a, s),
+ vec_splat_u32(0)),
+ s));
+}
+
+#endif \ No newline at end of file