1 files changed, 119 insertions, 0 deletions
diff --git a/lib/defs.h b/lib/defs.h
new file mode 100644
index 0000000..e645532
--- /dev/null
+++ b/lib/defs.h
@@ -0,0 +1,119 @@
+/* Copyright 2012 Dietrich Epp <depp@zdome.net> */
+#pragma once
+
+#include "fresample.h"
+
+#define LFR_UNREACHABLE (void) 0
+
+#if defined(__clang__)
+# if __has_builtin(__builtin_unreachable)
+#  undef LFR_UNREACHABLE
+#  define LFR_UNREACHABLE __builtin_unreachable()
+# endif
+#elif defined(__GNUC__)
+# if (__GNUC__ >= 4 && __GNU_MINOR__ >= 5) || __GNUC__ > 4
+#  undef LFR_UNREACHABLE
+#  define LFR_UNREACHABLE __builtin_unreachable()
+# endif
+#endif
+
+#define INLINE static inline
+
+/*
+  Constants used by dithering algorithm.  We usa a simple linear
+  congruential generator to generate a uniform signal for dithering,
+  taking the high order bits:
+
+  x_{n+1} = (A * x_n + C) mod 2^32
+
+  The derived constants, AN/CN, are used for stepping the LCG forward
+  by N steps.  AI is the inverse of A.
+*/
+
+constexpr auto LCG_A  = 1103515245u;
+constexpr auto LCG_A2 = 3265436265u;
+constexpr auto LCG_A4 = 3993403153u;
+
+constexpr auto LCG_C  =      12345u;
+constexpr auto LCG_C2 = 3554416254u;
+constexpr auto LCG_C4 = 3596950572u;
+
+constexpr auto LCG_AI = 4005161829u;
+constexpr auto LCG_CI = 4235699843u;
+
+/* ====================
+   Utility functions
+   ==================== */
+
+#if defined(LFR_SSE2) && defined(LFR_CPU_X86)
+#include <emmintrin.h>
+
+/*
+  Store 16-bit words [i0,i1) in the given location.
+*/
+INLINE void lfr_storepartial_epi16(__m128i *dest, __m128i x, int i0, int i1)
+{
+    union {
+        unsigned short h[8];
+        __m128i x;
+    } u;
+    u.x = x;
+    for (int i = i0; i < i1; ++i)
+        ((unsigned short *) dest)[i] = u.h[i];
+}
+
+/*
+  Advance four linear congruential generators.  The four generators
+  should use the same A and C constants.
+
+  The 32-bit multiply we want requires SSE 4.1.  We construct it out of
+  two 32 to 64 bit multiply operations.
+*/
+INLINE __m128i
+lfr_rand_epu32(__m128i x, __m128i a, __m128i c)
+{
+    return _mm_add_epi32(
+        _mm_unpacklo_epi32(
+            _mm_shuffle_epi32(
+                _mm_mul_epu32(x, a),
+                _MM_SHUFFLE(0, 0, 2, 0)),
+            _mm_shuffle_epi32(
+                _mm_mul_epu32(_mm_srli_si128(x, 4), a),
+                _MM_SHUFFLE(0, 0, 2, 0))),
+        c);
+}
+
+#endif
+
+#if defined(LFR_ALTIVEC) && defined(LFR_CPU_PPC)
+#if !defined(__APPLE_ALTIVEC__)
+#include <altivec.h>
+#endif
+
+/*
+  Advance four linear congruential generators.  The four generators
+  should use the same A and C constants.
+
+  The 32-bit multiply we want does not exist.  We construct it out of
+  16-bit multiply operations.
+*/
+INLINE vector unsigned int
+lfr_vecrand(vector unsigned int x, vector unsigned int a,
+			vector unsigned int c)
+{
+	vector unsigned int s = vec_splat_u32(-16);
+	return vec_add(
+		vec_add(
+			vec_mulo(
+				(vector unsigned short) x,
+				(vector unsigned short) a),
+			c),
+		vec_sl(
+			vec_msum(
+				(vector unsigned short) x,
+				(vector unsigned short) vec_rl(a, s),
+				vec_splat_u32(0)),
+			s));
+}
+
+#endif
+\ No newline at end of file