diff options
| -rw-r--r-- | LICENSE.txt | 26 | ||||
| -rw-r--r-- | README | 3 | ||||
| -rw-r--r-- | include/.header | 4 | ||||
| -rw-r--r-- | include/fresample.h | 227 | ||||
| -rw-r--r-- | lib/.header | 3 | ||||
| -rw-r--r-- | lib/convert.h | 5 | ||||
| -rw-r--r-- | lib/defs.h | 119 | ||||
| -rw-r--r-- | lib/filter.cpp | 334 | ||||
| -rw-r--r-- | lib/filter.h | 181 | ||||
| -rw-r--r-- | lib/info_name.cpp | 39 | ||||
| -rw-r--r-- | lib/meson.build | 25 | ||||
| -rw-r--r-- | lib/param.cpp | 236 | ||||
| -rw-r--r-- | lib/param.h | 135 | ||||
| -rw-r--r-- | lib/param_name.cpp | 41 | ||||
| -rw-r--r-- | lib/resample.h | 84 | ||||
| -rw-r--r-- | lib/resample_func.h | 92 | ||||
| -rw-r--r-- | lib/resample_s16func.cpp | 51 | ||||
| -rw-r--r-- | lib/swap16.cpp | 65 | ||||
| -rw-r--r-- | lib/types.h | 44 | ||||
| -rw-r--r-- | meson.build | 1 |
20 files changed, 1715 insertions, 0 deletions
diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..24871ca --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,26 @@ +Copyright 2012 - 2013 Dietrich Epp <depp@zdome.net> +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @@ -0,0 +1,3 @@ +Minimal C++ rewrite of https://github.com/depp/libfresample + +All credits to Dietrich Epp diff --git a/include/.header b/include/.header new file mode 100644 index 0000000..c35f326 --- /dev/null +++ b/include/.header @@ -0,0 +1,4 @@ +{ + "guard_prefix": "", + "extern_c": true +} diff --git a/include/fresample.h b/include/fresample.h new file mode 100644 index 0000000..1fc8824 --- /dev/null +++ b/include/fresample.h @@ -0,0 +1,227 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#pragma once +#include <stddef.h> +#include <string_view> +#include "../lib/param.h" +#include "../lib/filter.h" +#include "../lib/types.h" + +#define LFR_PUBLIC +#if defined(LFR_IMPLEMENTATION) +# define LFR_PRIVATE +#endif + +#if defined(_MSC_VER) +# define LFR_INT64 __int64 +#endif + +#if defined(__GNUC__) +# undef LFR_PRIVATE +# undef LFR_PUBLIC +# if defined(LFR_IMPLEMENTATION) +# define LFR_PRIVATE __attribute__((visibility("hidden"))) +# if defined(__ELF__) +# define LFR_PUBLIC __attribute__((visibility("protected"))) +# else +# define LFR_PUBLIC __attribute__((visibility("default"))) +# endif +# else +# define LFR_PUBLIC +# endif +# define LFR_INT64 long long +#endif + +#if defined(_M_X64) || defined(__x86_64__) +# define LFR_CPU_X64 1 +# define LFR_CPU_X86 1 +#elif defined(_M_IX86) || defined(__i386__) +# define LFR_CPU_X86 1 +#elif defined(__ppc64__) +# define LFR_CPU_PPC64 1 +# define LFR_CPU_PPC 1 +#elif defined(__ppc__) +# define LFR_CPU_PPC 1 +#endif + +#define LFR_LITTLE_ENDIAN 1234 +#define LFR_BIG_ENDIAN 4321 + +#if defined(__BYTE_ORDER__) +# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define LFR_BYTE_ORDER LFR_BIG_ENDIAN +# elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define LFR_BYTE_ORDER LFR_LITTLE_ENDIAN +# endif +#elif defined(__BIG_ENDIAN__) +# define LFR_BYTE_ORDER LFR_BIG_ENDIAN +#elif defined(__LITTLE_ENDIAN__) +# define LFR_BYTE_ORDER LFR_LITTLE_ENDIAN +#endif + +#if !defined(LFR_BYTE_ORDER) +# if defined(LFR_CPU_X86) +# define LFR_BYTE_ORDER LFR_LITTLE_ENDIAN +# elif defined(LFR_CPU_PPC) +# define LFR_BYTE_ORDER LFR_BIG_ENDIAN +# else +# error "cannot determine machine byte order" +# endif +#endif + +/* ======================================== + CPU features + ======================================== */ + +/* + CPU features to use or disable. +*/ +enum { +#if defined(LFR_CPU_X86) + LFR_CPUF_MMX = (1u << 0), + LFR_CPUF_SSE = (1u << 1), + LFR_CPUF_SSE2 = (1u << 2), + LFR_CPUF_SSE3 = (1u << 3), + LFR_CPUF_SSSE3 = (1u << 4), + LFR_CPUF_SSE4_1 = (1u << 5), + LFR_CPUF_SSE4_2 = (1u << 6), +#else + LFR_CPUF_MMX = 0u, + LFR_CPUF_SSE = 0u, + LFR_CPUF_SSE2 = 0u, + LFR_CPUF_SSE3 = 0u, + LFR_CPUF_SSSE3 = 0u, + LFR_CPUF_SSE4_1 = 0u, + LFR_CPUF_SSE4_2 = 0u, +#endif + +#if defined(LFR_CPU_PPC) + LFR_CPUF_ALTIVEC = (1u << 0), +#else + LFR_CPUF_ALTIVEC = 0u, +#endif + + LFR_CPUF_NONE = 0u, + LFR_CPUF_ALL = 0xffffffffu +}; + +/* + Information about a CPU flag. +*/ +struct lfr_cpuf { + char name[8]; + unsigned flag; +}; + +/* + Array of names for the CPU features this architecture supports. + Names are the lower case version of the flag names above, e.g., + LFR_CPUF_MMX becomes "mmx". Terminated by a zeroed entry. +*/ +LFR_PUBLIC extern const struct lfr_cpuf LFR_CPUF[]; + +/* + Set which CPU features are allowed or disallowed. This is primarily + used for comparing the performance and correctness of vector + implementations and scalar implementations. It can also be used to + prohibit features that your CPU supports but which your OS does not. + + Returns the CPU flags actually enabled, which will be the + intersection of the set of allowed flags (the argument) with the set + of features that the current CPU actually supports. +*/ +LFR_PUBLIC unsigned +lfr_setcpufeatures(unsigned flags); + +#if LFR_BYTE_ORDER == LFR_BIG_ENDIAN +# define LFR_FMT_S16_NATIVE LFR_FMT_S16BE +# define LFR_FMT_S16_SWAPPED LFR_FMT_S16LE +# define LFR_FMT_F32_NATIVE LFR_FMT_F32BE +# define LFR_FMT_F32_SWAPPED LFR_FMT_F32LE +#else +# define LFR_FMT_S16_NATIVE LFR_FMT_S16LE +# define LFR_FMT_S16_SWAPPED LFR_FMT_S16BE +# define LFR_FMT_F32_NATIVE LFR_FMT_F32LE +# define LFR_FMT_F32_SWAPPED LFR_FMT_F32BE +#endif + +/* + Swap the byte order on 16-bit data. The destination can either be + the same buffer as the source, or it can be a non-overlapping + buffer. Behavior is undefined if the two buffers partially overlap. +*/ +LFR_PUBLIC void +lfr_swap16(void *dest, const void *src, size_t count); + +/* ======================================== + Resampling parameters + ======================================== */ + +/* + Names for filter quality presets. +*/ +enum { + /* + Low quality: Currently, quality 0..3 are identical, since + further reductions in quality do not increase performance. + */ + LFR_QUALITY_LOW = 2, + + /* + Medium quality: Transition band of 23%, nominal attenuation of + 60 dB. Actual attenuation may be higher. + */ + LFR_QUALITY_MEDIUM = 5, + + /* + High quality: Transition band of 10%, nominal attenuation of 96 + dB. It is not normally reasonable to increase quality beyond + this level unless you are competing for the prettiest + spectrogram. + */ + LFR_QUALITY_HIGH = 8, + + /* + Ultra quality: Transition band of 3%, nominal attenuation of 120 + dB. Filter coefficients may not fit in L2 cache. Impulse + response may be several milliseconds long. + */ + LFR_QUALITY_ULTRA = 10 +}; + +/* + Get the name of a parameter, or return NULL if the paramater does + not exist. +*/ +LFR_PUBLIC std::string_view lfr_param_name(lfr_param_t pname); + +/* + Get the index of a parameter by name, or return -1 if the parameter + does not exist. +*/ +LFR_PUBLIC int lfr_param_lookup(const std::string_view& pname); + +/* ======================================== + Resampling + ======================================== */ + +/* + Get the name of a filter info query, or return NULL if the info query + does not exist. +*/ +LFR_PUBLIC std::string_view +lfr_info_name(int pname); + +/* + Get the index of a filter info query by name, or return -1 if the + info query does not exist. +*/ +LFR_PUBLIC int +lfr_info_lookup(const std::string_view& pname); + +/* + Get a function for resampling native 16-bit data with the given + number of channels. Returns NULL if no such function is available. + This will always return non-NULL for mono and stereo data. +*/ +LFR_PUBLIC lfr_resample_func_t +lfr_resample_s16func(int nchan, const struct lfr_filter *filter); diff --git a/lib/.header b/lib/.header new file mode 100644 index 0000000..522f27e --- /dev/null +++ b/lib/.header @@ -0,0 +1,3 @@ +{ + "guard_prefix": "" +} diff --git a/lib/convert.h b/lib/convert.h new file mode 100644 index 0000000..f869f2b --- /dev/null +++ b/lib/convert.h @@ -0,0 +1,5 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#pragma once +#include "defs.h" + +void lfr_swap16(void *dest, const void *src, size_t count); diff --git a/lib/defs.h b/lib/defs.h new file mode 100644 index 0000000..e645532 --- /dev/null +++ b/lib/defs.h @@ -0,0 +1,119 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#pragma once + +#include "fresample.h" + +#define LFR_UNREACHABLE (void) 0 + +#if defined(__clang__) +# if __has_builtin(__builtin_unreachable) +# undef LFR_UNREACHABLE +# define LFR_UNREACHABLE __builtin_unreachable() +# endif +#elif defined(__GNUC__) +# if (__GNUC__ >= 4 && __GNU_MINOR__ >= 5) || __GNUC__ > 4 +# undef LFR_UNREACHABLE +# define LFR_UNREACHABLE __builtin_unreachable() +# endif +#endif + +#define INLINE static inline + +/* + Constants used by dithering algorithm. We usa a simple linear + congruential generator to generate a uniform signal for dithering, + taking the high order bits: + + x_{n+1} = (A * x_n + C) mod 2^32 + + The derived constants, AN/CN, are used for stepping the LCG forward + by N steps. AI is the inverse of A. +*/ + +constexpr auto LCG_A = 1103515245u; +constexpr auto LCG_A2 = 3265436265u; +constexpr auto LCG_A4 = 3993403153u; + +constexpr auto LCG_C = 12345u; +constexpr auto LCG_C2 = 3554416254u; +constexpr auto LCG_C4 = 3596950572u; + +constexpr auto LCG_AI = 4005161829u; +constexpr auto LCG_CI = 4235699843u; + +/* ==================== + Utility functions + ==================== */ + +#if defined(LFR_SSE2) && defined(LFR_CPU_X86) +#include <emmintrin.h> + +/* + Store 16-bit words [i0,i1) in the given location. +*/ +INLINE void lfr_storepartial_epi16(__m128i *dest, __m128i x, int i0, int i1) +{ + union { + unsigned short h[8]; + __m128i x; + } u; + u.x = x; + for (int i = i0; i < i1; ++i) + ((unsigned short *) dest)[i] = u.h[i]; +} + +/* + Advance four linear congruential generators. The four generators + should use the same A and C constants. + + The 32-bit multiply we want requires SSE 4.1. We construct it out of + two 32 to 64 bit multiply operations. +*/ +INLINE __m128i +lfr_rand_epu32(__m128i x, __m128i a, __m128i c) +{ + return _mm_add_epi32( + _mm_unpacklo_epi32( + _mm_shuffle_epi32( + _mm_mul_epu32(x, a), + _MM_SHUFFLE(0, 0, 2, 0)), + _mm_shuffle_epi32( + _mm_mul_epu32(_mm_srli_si128(x, 4), a), + _MM_SHUFFLE(0, 0, 2, 0))), + c); +} + +#endif + +#if defined(LFR_ALTIVEC) && defined(LFR_CPU_PPC) +#if !defined(__APPLE_ALTIVEC__) +#include <altivec.h> +#endif + +/* + Advance four linear congruential generators. The four generators + should use the same A and C constants. + + The 32-bit multiply we want does not exist. We construct it out of + 16-bit multiply operations. +*/ +INLINE vector unsigned int +lfr_vecrand(vector unsigned int x, vector unsigned int a, + vector unsigned int c) +{ + vector unsigned int s = vec_splat_u32(-16); + return vec_add( + vec_add( + vec_mulo( + (vector unsigned short) x, + (vector unsigned short) a), + c), + vec_sl( + vec_msum( + (vector unsigned short) x, + (vector unsigned short) vec_rl(a, s), + vec_splat_u32(0)), + s)); +} + +#endif
\ No newline at end of file diff --git a/lib/filter.cpp b/lib/filter.cpp new file mode 100644 index 0000000..0ca829e --- /dev/null +++ b/lib/filter.cpp @@ -0,0 +1,334 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#include "filter.h" +#include "param.h" +#include "../include/fresample.h" +#include <cmath> +#include <memory> +#include <exception> +#include <numeric> +#include <algorithm> +#include <iostream> + +namespace { +/* + Simple sinc function implementation. Note that this is the + unnormalized sinc function, with zeroes at nonzero multiples of pi. +*/ +constexpr double sinc(double x) +{ + return std::abs(x) < 1e-8 ? 1.0 : std::sin(x) / x; +} + +/* + Modified Bessel function of the first kind of order 0, i0. Only + considered valid over the domain [0,18]. + + This is the naive algorithm. + + In "Computation of Special Functions" by Shanjie Zhang and Jianming + Jin, this technique is only used for 0<X<18. However, we only need + it for the Kaiser window, and a beta of 18 is fairly ridiculous -- + it gives a side lobe height of -180 dB, which is not only beyond the + actual SNR of high end recording systems, it is beyond the SNR of + theoretically perfect 24-bit systems. +*/ +constexpr double bessel_i0(double x) +{ + double a = 1.0, y = 1.0, x2 = x*x; + for (int i = 1; i <= 50; ++i) { + a *= 0.25 * x2 / (i * i); + y += a; + if (a < y * 1e-15) + break; + } + return y; +} + +std::vector<double> filter_calculate(std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta) +{ + std::vector<double> data(nsamp * nfilt); + + const auto yscale = 2.0 * cutoff / bessel_i0(beta); + const auto xscale = (8.0 * std::atan(1.0)) * cutoff; + for (std::size_t i = 0; i < nfilt; ++i) { + const auto x0 = (nsamp - 1) / 2 + offset * i; + for (std::size_t j = 0; j < nsamp; ++j) { + const auto x = j - x0; + const auto t = x * (2.0 / (nsamp - 2)); + double y; + if (t <= -1.0 || t >= 1.0) { + y = 0.0; + } else { + y = yscale * bessel_i0(beta * sqrt(1.0 - t * t)) * sinc(xscale * x); + } + data[i * nsamp + j] = y; + } + } + + return data; +} + +template<typename type, type scale> +void filter_calculate_integral(type* data, std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta) +{ + const auto& filter = filter_calculate(nsamp, nfilt, offset, cutoff, beta); + + for (std::size_t i = 0; i < nfilt; ++i) { + const auto sum = accumulate(begin(filter) + i * nsamp, begin(filter) + (i + 1) * nsamp, 0.0); + const auto fac = static_cast<double>(scale) / sum; + auto err = 0.0; + for (std::size_t j = 0; j < nsamp; ++j) { + const auto y = filter[i * nsamp + j] * fac + err; + const auto z = std::round(y); + err = z - y; + data[i * nsamp + j] = static_cast<type>(z); + } + } +} + +void lfr_filter_calculate_s16(std::int16_t* data, std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta) +{ + if(nsamp <= 8) + { + filter_calculate_integral<std::int16_t, 31500>(data, nsamp, nfilt, offset, cutoff, beta); + } + else + { + filter_calculate_integral<std::int16_t, 32767>(data, nsamp, nfilt, offset, cutoff, beta); + } +} + +void lfr_filter_calculate_f32(float *data, std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta) +{ + const auto& filter = filter_calculate(nsamp, nfilt, offset, cutoff, beta); + + transform(begin(filter), end(filter), data, [](const auto val){ return static_cast<float>(val); }); +} +} + +lfr_filter::lfr_filter(lfr_param& param) +{ + double dw, dw2, lenf, atten2, atten3, maxerror, beta; + double ierror, rerror, error; + double t, a, ulp; + int align=8, max_oversample; + + param.calculate(); + + f_pass = param.param[LFR_PARAM_FPASS]; + f_stop = param.param[LFR_PARAM_FSTOP]; + atten = param.param[LFR_PARAM_ATTEN]; + + /* atten2 is the attenuation of the ideal (un-rounded) filter, + maxerror is the ratio of a 0 dBFS signal to the permitted + roundoff and interpolation error. This divides the + "attenuation" into error budgets with the assumption that the + error from each source is uncorrelated. Note that atten2 is + measured in dB, whereas maxerror is a ratio to a full scale + signal. */ + atten2 = atten + 3.0; + maxerror = exp((atten + 3.0) * (-log(10.0) / 20.0)); + + /* Calculate the filter order from the attenuation. Round up to + the nearest multiple of the SIMD register alignment. */ + dw = (8.0 * std::atan(1.0)) * (f_stop - f_pass); + lenf = (atten2 - 8.0) / (4.57 * dw) + 1; + nsamp = static_cast<int>(std::ceil(lenf / align) * align); + if (nsamp < align) + nsamp = align; + + /* ======================================== + Determine filter coefficient size. + ======================================== */ + + /* We can calculate expected roundoff error from filter length. + We assume that the roundoff error at each coefficient is an + independent uniform variable with a range of 1 ULP, and that + there is an equal roundoff step during interpolation. This + gives roundoff error with a variance of N/6 ULP^2. Roundoff + error is a ratio relative to 1 ULP. */ + rerror = std::sqrt(nsamp * (1.0 / 6.0)); + + /* The interpolation error can be adjusted by choosing the amount + of oversampling. The curvature of the sinc function is bounded + by (2*pi*f)^2, so the interpolation error is bounded by + (pi*f/M)^2. + + We set a maximum oversampling of 2^8 for 16-bit, and 2^12 for + floating point. */ + t = f_pass * (4.0 * std::atan(1.0)); + a = 1.0 / 256; + ierror = (t * a) * (t * a); + ulp = 1.0 / 32768.0; + /* We assume, again, that interpolation and roundoff error are + uncorrelated and normal. If error at 16-bit exceeds our + budget, then use floating point. */ + error = (ierror * ierror + rerror * rerror) * (ulp * ulp); + if (error <= maxerror * maxerror) { + type = LFR_FTYPE_S16; + ulp = 1.0 / 32768.0; + max_oversample = 8; + } else { + type = LFR_FTYPE_F32; + ulp = 1.0 / 16777216.0; + max_oversample = 12; + } + + /* Calculate the interpolation error budget by subtracting + roundoff error from the total error budget, since roundoff + error is fixed. */ + ierror = maxerror * maxerror - (rerror * rerror) * (ulp * ulp); + ierror = (ierror > 0) ? sqrt(ierror) : 0; + if (ierror < ulp) + ierror = ulp; + + /* Calculate oversampling from the error budget. */ + a = (f_pass * (4.0 * std::atan(1.0))) / std::sqrt(ierror); + log2nfilt = static_cast<int>(std::ceil(std::log(a) * (1.0 / std::log(2.0)))); + if (log2nfilt < 0) + log2nfilt = 0; + else if (log2nfilt > max_oversample) + log2nfilt = max_oversample; + + /* ======================================== + Calculate window parameters + ======================================== */ + + /* Since we rounded up the filter order, we can increase the stop + band attenuation or bandwidth without making the transition + bandwidth exceed the design parameters. This gives a "free" + boost in quality. We choose to increase both. + + If we didn't increase these parameters, we would still incur + the additional computational cost but it would be spent + increasing the width of the stop band, which is not useful. */ + + /* atten3 is the free stopband attenuation */ + atten3 = (nsamp - 1) * 4.57 * dw + 8; + t = (-20.0 / std::log(10.0)) * std::log(rerror * ulp); + if (t < atten3) + atten3 = t; + if (atten2 > atten3) + atten3 = atten2; + else + atten3 = 0.5 * (atten2 + atten3); + + /* f_pass2 is the free pass band */ + dw2 = (atten3 - 8.0) / (4.57 * (nsamp - 1)); + auto f_pass2 = f_stop - dw2 * (1.0 / (8.0 * std::atan(1.0))); + + if (atten3 > 50) + beta = 0.1102 * (atten3 - 8.7); + else if (atten3 > 21) + beta = 0.5842 * std::pow(atten3 - 21, 0.4) + 0.07866 * (atten3 - 21); + else + beta = 0; + + setup_window(f_pass2, beta); +} + +void lfr_filter::setup_window(double cutoff, double beta) +{ + size_t esz = 1, align = 16, nfilt; + if (nsamp < 1 || log2nfilt < 0) + goto error; + + switch (type) { + case LFR_FTYPE_S16: + esz = sizeof(short); + break; + + case LFR_FTYPE_F32: + esz = sizeof(float); + break; + } + nfilt = (1u << log2nfilt) + 1; + if ((size_t) nsamp > (size_t) -1 / esz) + goto error; + if (((size_t) -1 - (align - 1) - sizeof(*this)) / nfilt < nsamp * esz) + goto error; + data.resize(nsamp * nfilt * esz + align - 1); + + delay = static_cast<lfr_fixed_t>((nsamp - 1) / 2) << 32; + + switch (type) { + case LFR_FTYPE_S16: + lfr_filter_calculate_s16( + reinterpret_cast<short int*>(data.data()), nsamp, nfilt, + 1.0 / static_cast<double>(1 << log2nfilt), cutoff, beta); + break; + + case LFR_FTYPE_F32: + lfr_filter_calculate_f32( + reinterpret_cast<float*>(data.data()), nsamp, nfilt, + 1.0 / static_cast<double>(1 << log2nfilt), cutoff, beta); + break; + } + return; + +error: + throw std::runtime_error{"Error in filter setup: lfr_filter::setup_window"}; +} + +int lfr_filter::geti(LFR_INFO_TYPE iname, bool convert) const +{ + switch(iname) + { + case LFR_INFO_DELAY: + return delay * (1.0 / 4294967296.0); + case LFR_INFO_FPASS: + return f_pass; + case LFR_INFO_FSTOP: + return f_stop; + case LFR_INFO_ATTEN: + return atten; + default: + if(convert) + { + return static_cast<int>(getf(iname, false)); + } + } + return {}; +} + +double lfr_filter::getf(LFR_INFO_TYPE iname, bool convert) const +{ + switch(iname) + { + case LFR_INFO_DELAY: + return static_cast<int>(delay >> 32); + case LFR_INFO_SIZE: + return nsamp; + case LFR_INFO_MEMSIZE: + { + int sz = nsamp * ((1 << log2nfilt) + 1); + switch (type) + { + case LFR_FTYPE_S16: return sz * 2; + case LFR_FTYPE_F32: return sz * 4; + default: return {}; + } + } + default: + if(convert) + { + return geti(iname, false); + } + } + return {}; +} + +void lfr_filter::resample( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, + unsigned *dither, int nchan, + void *out, lfr_fmt_t outfmt, int outlen, + const void *in, lfr_fmt_t infmt, int inlen) +{ + lfr_resample_func_t func; + if (outfmt == LFR_FMT_S16_NATIVE && infmt == LFR_FMT_S16_NATIVE) { + func = lfr_resample_s16func(nchan, this); + if (!func) + return; + func(pos, inv_ratio, dither, out, outlen, in, inlen, this); + } +}
\ No newline at end of file diff --git a/lib/filter.h b/lib/filter.h new file mode 100644 index 0000000..b6ca66e --- /dev/null +++ b/lib/filter.h @@ -0,0 +1,181 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#pragma once +#include "types.h" +#include "param.h" + +#include <vector> + +/* Bits used for interpolating between two filters. The scalar + implementation supports up to 16 bits, but the SIMD implementations + generally only support 14 so the values can fit in a signed 16-bit + word. We bring all implementations to the same accuracy in order + to ensure that all implementations produce the same results. */ +#define INTERP_BITS 14 + +/* + Filter types +*/ +enum lfr_ftype_t { + LFR_FTYPE_S16, + LFR_FTYPE_F32 +}; + +/* + Array of FIR filters. + + Let N=2^log2nfilt, M=nsamp. + + This is an array of N+1 filters, 0..N, with each filter M samples + long. Filter number I will be centered on the sample with offset: + + floor(M/2) + I/N + + This means that filter N will be a copy of filter 0, except shifted + to the right by one sample. This extra filter makes the + interpolation code simpler. +*/ + + +/* + Information queries that a filter responds to. Each query gives an + integer or floating point result, automatically cast to the type + requested. +*/ +enum LFR_INFO_TYPE { + /* + The size of the filter, also known as the filter's order. This + does not include oversampling. The resampler will read this + many samples, starting with the current position (rounded down), + whenever it calculates a sample. + + In other words, this is the amount of overlap needed when + resampling consecutive buffers. + */ + LFR_INFO_SIZE, + + /* + Filter delay. Note that lfr_filter_delay returns a fixed point + number and is usually preferable. + */ + LFR_INFO_DELAY, + + /* + The is the number of bytes used by filter coefficients. + */ + LFR_INFO_MEMSIZE, + + /* + The normalized pass frequency the filter was designed with. + */ + LFR_INFO_FPASS, + + /* + The normalized stop frequency the filter was designed with. + */ + LFR_INFO_FSTOP, + + /* + The stopband attenuation, in dB, that the filter was designed + with. + */ + LFR_INFO_ATTEN, + + LFR_INFO_COUNT +}; + +/* + * A filter for resampling audio. + */ +struct lfr_filter { + /* + FIR filter coefficient data type. + */ + lfr_ftype_t type; + + /* + FIR filter data. + */ + std::vector<char> data; + + /* + Length of each filter, in samples. This is chosen to make each + filter size a multiple of the SIMD register size. Most SIMD + implementations today use 16 byte registers, so this will + usually be a multiple of 8 or 4, depending on the coefficient + type. + */ + int nsamp; + + /* + Base 2 logarithm of the number of different filters. There is + always an additional filter at the end of the filter array to + simplify interpolation, this extra filter is a copy of the first + filter shifted right by one sample. + */ + int log2nfilt; + + /* + Filter delay. Filters are causal, so this should be + non-negative. + */ + lfr_fixed_t delay; + + /* + Design parameters of the filter. + */ + double f_pass, f_stop, atten; + + lfr_filter(lfr_param& param); + int geti(LFR_INFO_TYPE iname, bool convert = true) const; + double getf(LFR_INFO_TYPE iname, bool convert = true) const; + + +/* + Resample an audio buffer. Note that this function may need to + create intermediate buffers if there is no function which can + directly operate on the input and output formats. No intermediate + buffers will be necessary if the following conditions are met: + + - Input and output formats are identical. + + - Sample format is either S16_NATIVE or F32_NATIVE. + + - The number of channels is either 1 (mono) or 2 (stereo). + + pos: Current position relative to the start of the input buffer, + expressed as a 32.32 fixed point number. On return, this will + contain the updated position. Positions outside the input buffer + are acceptable, it will be treated as if the input buffer were + padded with an unlimited number of zeroes on either side. + + inv_ratio: Inverse of the resampling ratio, expressed as a 32.32 + fixed point number. This number is equal to the input sample rate + divided by the output sample rate. + + dither: State of the PRNG used for dithering. + + nchan: Number of interleaved channels. + + out, in: Input and output buffers. The buffers are not permitted to + alias each other. + + outlen, inlen: Length of buffers, in frames. Note that the length + type is 'int' instead of 'size_t'; this matches the precision of + buffer positions. + + outfmt, infmt: Format of input and output buffers. + + filter: A suitable low-pass filter for resampling at the given + ratio. +*/ + void resample( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, + unsigned *dither, int nchan, + void *out, lfr_fmt_t outfmt, int outlen, + const void *in, lfr_fmt_t infmt, int inlen); +private: + /* + Create a Kaiser-windowed sinc filter. + */ + void setup_window(double cutoff, double beta); +};
\ No newline at end of file diff --git a/lib/info_name.cpp b/lib/info_name.cpp new file mode 100644 index 0000000..d0897d2 --- /dev/null +++ b/lib/info_name.cpp @@ -0,0 +1,39 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#include "defs.h" +#include "fresample.h" +#include <string.h> +#include <array> +#include <string_view> + +using namespace std::string_view_literals; + +constexpr std::array<std::string_view, LFR_INFO_COUNT> LFR_INFO_NAME = { + "size"sv, + "delay"sv, + "memsize"sv, + "fpass"sv, + "fstop"sv, + "atten"sv +}; + +std::string_view +lfr_info_name(int pname) +{ + if (pname < 0 || pname >= LFR_INFO_COUNT) + return {}; + return LFR_INFO_NAME[pname]; +} + +int lfr_info_lookup(const std::string_view& pname) +{ + int i = 0; + for(const auto& name : LFR_INFO_NAME) + { + if(name == pname) + { + return i; + } + ++i; + } + return -1; +} diff --git a/lib/meson.build b/lib/meson.build new file mode 100644 index 0000000..822473e --- /dev/null +++ b/lib/meson.build @@ -0,0 +1,25 @@ +fresample_sources = [ + 'filter.cpp', + 'info_name.cpp', + 'param.cpp', + 'param_name.cpp', + 'resample_s16func.cpp', + 'swap16.cpp', +] + +cc = meson.get_compiler('c') +m_dep = cc.find_library('m', required : false) + +fresample = static_library( + 'fresample', + fresample_sources, + include_directories: '../include', + dependencies: [ + m_dep, + ], + cpp_args: '-O0' +) + +fresample_dep = declare_dependency( + link_with: fresample, +) diff --git a/lib/param.cpp b/lib/param.cpp new file mode 100644 index 0000000..58bcf5b --- /dev/null +++ b/lib/param.cpp @@ -0,0 +1,236 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#include "param.h" + +void lfr_param::seti(lfr_param_t pname, int value) +{ + int n = pname; + if (n < 0 || n >= LFR_PARAM_COUNT) + return; + set |= 1u << n; + current = 0; + param[n] = (double) value; +} + +void lfr_param::setf(lfr_param_t pname, double value) +{ + int n = pname; + if (n < 0 || n >= LFR_PARAM_COUNT) + return; + set |= 1u << n; + current = 0; + param[n] = value; +} + +void lfr_param::geti(lfr_param_t pname, int *value) +{ + int n = pname; + if (n < 0 || n >= LFR_PARAM_COUNT) + return; + if (!current) + calculate(); + *value = static_cast<int>(param[n] + 0.5); +} + +void lfr_param::getf(lfr_param_t pname, double *value) +{ + int n = pname; + if (n < 0 || n >= LFR_PARAM_COUNT) + return; + if (!current) + calculate(); + *value = param[n]; +} + +constexpr auto MAX_QUALITY = 10; + +struct lfr_quality { + unsigned short atten; /* dB */ + unsigned short transition; /* out of 1000 */ + unsigned short kind; /* 0 = loose, 2 = maxfreq=0 */ + unsigned short minfpass; /* out of 1000 */ +}; + +constexpr lfr_quality LFR_QUALITY[MAX_QUALITY + 1] = { + { 35, 350, 0, 500 }, + { 35, 350, 0, 500 }, + { 35, 350, 0, 500 }, /* low */ + { 35, 350, 0, 500 }, + { 50, 290, 0, 600 }, + { 60, 230, 1, 700 }, /* medium */ + { 80, 180, 1, 800 }, + { 90, 140, 1, 850 }, + { 96, 100, 1, 900 }, /* high */ + { 108, 60, 2, 915 }, + { 120, 30, 2, 930 } /* ultra */ +}; + +constexpr auto MIN_OUTPUT = 1.0 / 128; +constexpr auto MIN_ATTEN = 13; +constexpr auto MAX_ATTEN = 144; +constexpr auto MIN_TRANSITION = 1.0 / 256.0; +constexpr auto MAX_MINPASS = 31.0 / 32.0; +constexpr auto MIN_MINPASS = 8.0 / 32.0; + +#define ISSET(n) ((set & (1u << (LFR_PARAM_ ## n))) != 0) +#define GETF(n) (param[LFR_PARAM_ ## n]) +#define GETI(n) (static_cast<int>(GETF(n) + 0.5)) + +void lfr_param::calculate() +{ + int qual; + double rate; + double f_output, f_transition, f_maxfreq, f_minpass; + double atten, f_stop, f_pass; + double t; + int loose; + + /* Quality */ + if (ISSET(QUALITY)) { + qual = GETI(QUALITY); + if (qual < 0) + qual = 0; + else if (qual > MAX_QUALITY) + qual = MAX_QUALITY; + } else { + qual = 8; + } + GETF(QUALITY) = qual; + + /* Input rate */ + if (ISSET(INRATE)) { + rate = GETF(INRATE); + if (rate <= 0) + rate = -1.0; + } else { + rate = -1.0; + } + GETF(INRATE) = rate; + + /* Output rate */ + if (ISSET(OUTRATE)) { + t = GETF(OUTRATE); + if (rate > 0) { + if (t > rate) { + f_output = 1.0; + GETF(OUTRATE) = rate; + } else { + f_output = t / rate; + if (f_output < MIN_OUTPUT) { + f_output = MIN_OUTPUT; + GETF(OUTRATE) = MIN_OUTPUT * rate; + } + } + } else { + if (t >= 1.0) { + f_output = 1.0; + GETF(OUTRATE) = 1.0; + } else { + f_output = t; + if (f_output < MIN_OUTPUT) { + f_output = MIN_OUTPUT; + GETF(OUTRATE) = MIN_OUTPUT; + } + } + } + } else { + f_output = 1.0; + if (rate > 0) + GETF(OUTRATE) = 1.0; + else + GETF(OUTRATE) = rate; + } + + /* Transition bandwidth */ + if (ISSET(FTRANSITION)) { + f_transition = GETF(FTRANSITION); + if (f_transition < MIN_TRANSITION) + f_transition = 1.0/32; + if (f_transition > 1.0) + f_transition = 1.0; + } else { + f_transition = (0.5/1000) * (double) LFR_QUALITY[qual].transition; + } + GETF(FTRANSITION) = f_transition; + + /* Maximum audible frequency */ + if (ISSET(MAXFREQ)) { + f_maxfreq = GETF(MAXFREQ); + } else { + if (LFR_QUALITY[qual].kind < 2) + f_maxfreq = 20000.0; + else + f_maxfreq = -1; + GETF(MAXFREQ) = f_maxfreq; + } + if (rate > 0) + f_maxfreq = f_maxfreq / rate; + else + f_maxfreq = 1.0; + + /* "Loose" flag */ + if (ISSET(LOOSE)) { + loose = GETI(LOOSE) > 0; + } else { + loose = LFR_QUALITY[qual].kind < 1; + } + GETF(LOOSE) = (double) loose; + + /* Minimum output bandwidth */ + if (ISSET(MINFPASS)) { + f_minpass = GETF(MINFPASS); + if (f_minpass < MIN_MINPASS) + f_minpass = MIN_MINPASS; + else if (f_minpass > MAX_MINPASS) + f_minpass = MAX_MINPASS; + } else { + f_minpass = 0.001 * LFR_QUALITY[qual].minfpass; + } + GETF(MINFPASS) = f_minpass; + + /* Stop band attenuation */ + if (ISSET(ATTEN)) { + atten = GETF(ATTEN); + if (atten < MIN_ATTEN) + atten = MIN_ATTEN; + else if (atten > MAX_ATTEN) + atten = MAX_ATTEN; + } else { + atten = (double) LFR_QUALITY[qual].atten; + } + GETF(ATTEN) = atten; + + /* Stop band frequency */ + if (ISSET(FSTOP)) { + f_stop = GETF(FSTOP); + if (f_stop > 1.0) + f_stop = 1.0; + else if (f_stop < MIN_TRANSITION) + f_stop = MIN_TRANSITION; + } else { + f_stop = 0.5 * f_output; + if (loose && f_maxfreq > 0) { + t = f_output - f_maxfreq; + if (t > f_stop) + f_stop = t; + } + } + GETF(FSTOP) = f_stop; + + /* Pass band frequency */ + if (ISSET(ATTEN)) { + f_pass = GETF(FPASS); + } else { + f_pass = f_stop - f_transition; + t = 0.5 * f_output * f_minpass; + if (t > f_pass) + f_pass = t; + if (f_maxfreq > 0 && f_pass > f_maxfreq) + f_pass = f_maxfreq; + } + t = f_stop - MIN_TRANSITION; + if (t < 0) + t = 0; + if (t < f_pass) + f_pass = t; + GETF(FPASS) = f_pass; +} diff --git a/lib/param.h b/lib/param.h new file mode 100644 index 0000000..ab65c63 --- /dev/null +++ b/lib/param.h @@ -0,0 +1,135 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#pragma once + +#include <array> + +/* + Parameters for the filter generator. + + Filter generation goes through two stages. + + 1. In the first stage, the resampling parameters are used to create + a filter specification. The filter specification consists of + normalized frequencies for the pass band, stop band, and stop band + attenuation. This stage uses simple logic to create a filter + specification that "makes sense" for any input. It relaxes the + filter specification for ultrasonic (inaudible) frequencies and + ensures that enough of the input signal passes through. + + 2. In the second stage, an FIR filter is generated that fits the + filter specified by the first stage. + + Normally, for resampling, you will specify QUALITY, INRATE, and + OUTRATE. A filter specification for the conversion will be + automatically generated with the given subjective quality level. + + If you are a signal-processing guru, you can create the filter + specification directly by setting FPASS, FSTOP, and ATTEN. + */ +enum lfr_param_t { + /* High level filter parameters. These are typi*cally the only + parameters you will need to set. */ + + /* Filter quality, default 8. An integer between 0 and 10 which + determines the default values for other para*meters. */ + LFR_PARAM_QUALITY, + + /* Input sample rate, in Hz. The default is -1, which creates a + generic filter. */ + LFR_PARAM_INRATE, + + /* Output sample rate. If the input sample rate is specified, + then this is measured in Hz. Otherwise, if *the input rate is + -1, then this value is relative to the input sample rate (so + you would use 0.5 for downsampling by a factor of two). + Defaults to the same value as the input sample rate, which + creates a filter which can be used for upsampling at any ratio. + Note that increasing the output rate above the input rate has + no effect, all upsampling filters for a given input frequency + are identical. */ + LFR_PARAM_OUTRATE, + + /* + Medium level filter parameters. These parame*ters affect how the + filter specification is generated from the input and output + sample rates. Most of these parameters have default values + which depend on the QUALITY setting. + */ + + /* The width of the filter transition band, as a fraction of the + input sample rate. This value will be enlar*ged to extend the + transition band to MAXFREQ and will be narrowed to extend the + pass band to MINBW. The default value depends on the QUALITY + setting, and gets narrower as QUALITY increases. */ + LFR_PARAM_FTRANSITION, + + /* Maximum audible frequency. The pass band will be narrowed to + fit within the range of audible frequencies.* Default value is + 20 kHz if the input frequency is set, otherwise this parameter + is unused. If you want to preserve ultrasonic frequencies, + disable this parameter by setting it to -1. This is disabled + by default at absurd quality settings (9 and 10). */ + LFR_PARAM_MAXFREQ, + + /* A flag which allows aliasing noise as long as it is above + MAXFREQ. This flag improves the subjective *quality of + low-quality filters by increasing their bandwidth, but causes + problems for high-quality filters by increasing noise. Default + value depends on QUALITY setting, and is set for low QUALITY + values. Has no effect if MAXFREQ is disabled. */ + LFR_PARAM_LOOSE, + + /* Minimum size of pass band, as a fraction of the output + bandwidth. This prevents the filter designe*r from filtering + out the entire signal, which can happen when downsampling by a + large enough ratio. The default value is 0.5 at low quality + settings, and higher at high quality settings. The filter size + can increase dramatically as this number approaches 1.0. */ + LFR_PARAM_MINFPASS, + + /* + Filter specification. These parameters are n*ormally generated + from the higher level parameters. If the filter specification + is set, then the higher level parameters will all be ignored. + */ + + /* The end of the pass band, as a fraction of the input sample + rate. Normally, the filter designer chooses* this value. */ + LFR_PARAM_FPASS, + + /* The start of the stop band, as a fraction of the input sample + rate. Normally, the filter designer chooses* this value. */ + LFR_PARAM_FSTOP, + + /* Desired stop band attenuation, in dB. Larger numbers are + increase filter quality. Default value depe*nds on the QUALITY + setting. */ + LFR_PARAM_ATTEN, + + LFR_PARAM_COUNT +}; + +class lfr_param { + /* + Bit set of which parameters were set by the user. + */ + unsigned set{0}; + + /* + Set to 1 if the derived parameters are up to date. + */ + int current{0}; + +public: + /* + Parameter values. + */ + std::array<double, LFR_PARAM_COUNT> param; + + void calculate(); + void seti(lfr_param_t pname, int value); + void setf(lfr_param_t pname, double value); + void geti(lfr_param_t pname, int* value); + void getf(lfr_param_t pname, double* value); +}; + diff --git a/lib/param_name.cpp b/lib/param_name.cpp new file mode 100644 index 0000000..b90fc1c --- /dev/null +++ b/lib/param_name.cpp @@ -0,0 +1,41 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#include "defs.h" +#include "fresample.h" +#include <array> +#include <string_view> +using namespace std::string_view_literals; + +constexpr std::array<std::string_view, LFR_PARAM_COUNT> LFR_PARAM_NAME { + "quality"sv, + "inrate"sv, + "outrate"sv, + "ftransition"sv, + "maxfreq"sv, + "loose"sv, + "minfpass"sv, + "fpass"sv, + "fstop"sv, + "atten"sv +}; + +std::string_view lfr_param_name(lfr_param_t pname) +{ + int n = pname; + if (n < 0 || n >= LFR_PARAM_COUNT) + return {}; + return LFR_PARAM_NAME[n]; +} + +int lfr_param_lookup(const std::string_view& pname) +{ + int i = 0; + for(const auto& name : LFR_PARAM_NAME) + { + if(name == pname) + { + return i; + } + ++i; + } + return -1; +} diff --git a/lib/resample.h b/lib/resample.h new file mode 100644 index 0000000..0ae8856 --- /dev/null +++ b/lib/resample.h @@ -0,0 +1,84 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#pragma once + +#include "defs.h" +#include "filter.h" + +/* + Resampling function naming convention: + + lfr_resample_<atype><nchan><ftype>_<feature> + + atype: audio data type, s16 or f32 + nchan: number of channels, n1 or n2 + ftype: filter coefficient type, s16 or f32 + + feature: CPU feature set required +*/ + +/* ======================================== */ + +void lfr_resample_s16n1s16_scalar( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n1s16_sse2( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n1s16_altivec( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +/* ======================================== */ + +void lfr_resample_s16n2s16_scalar( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n2s16_sse2( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n2s16_altivec( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +/* ======================================== */ + +void lfr_resample_s16n1f32_scalar( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n1f32_sse2( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n1f32_altivec( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +/* ======================================== */ + +void lfr_resample_s16n2f32_scalar( lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n2f32_sse2( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n2f32_altivec( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); diff --git a/lib/resample_func.h b/lib/resample_func.h new file mode 100644 index 0000000..9e894d4 --- /dev/null +++ b/lib/resample_func.h @@ -0,0 +1,92 @@ +#pragma once + +#include <array> +#include <algorithm> +#include <cmath> +#include <type_traits> +#include <limits> +#include "resample.h" +#include "filter.h" + +namespace { + template<typename result_type, typename type> + constexpr result_type dither_truncate(type value, unsigned dither) + { + using limits = std::numeric_limits<result_type>; + using dither_limits = std::numeric_limits<decltype(dither)>; + if constexpr(std::is_floating_point_v<type>) + { + value += dither * type{1} / type{1ul << dither_limits::digits}; + value = std::floor(value); + } + else + { + static_assert(std::is_integral_v<type>, "Unsupported acc_type"); + + value += dither >> (dither_limits::digits - limits::digits); // 17 for short + value >>= limits::digits; // 15 for short + } + + return std::clamp<type>(value, limits::min(), limits::max()); + } +} + +template<typename sample_type_in, typename sample_type_out, typename filter_type, typename acc_type, size_t channels = 1, size_t INTERP_BIT = 14> +void lfr_resample(lfr_fixed_t& pos, const lfr_fixed_t inv_ratio, unsigned& dither, void* out, std::size_t outlen, const void* in, std::size_t inlen, const lfr_filter& filter) +{ + static_assert(channels > 0, "0 channels do not make any sense"); + + auto inp = reinterpret_cast<const sample_type_in*>(in); + auto outp = reinterpret_cast<sample_type_out*>(out); + auto fd = reinterpret_cast<const filter_type*>(filter.data.data()); + + const auto flen = filter.nsamp; + const auto log2nfilt = filter.log2nfilt; + auto x = pos; + std::array<unsigned, channels> ds{dither}; + for(std::size_t i = 1; i < channels; ++i) + { + ds[i] = LCG_A * ds[i - 1] + LCG_C; + } + + for(std::size_t i = 0; i < outlen; ++i) + { + /* fn: filter number + ff0: filter factor for filter fn + ff1: filter factor for filter fn+1 */ + + const auto fn = (static_cast<unsigned>(x >> 1) >> (31 - log2nfilt)) & ((1u << log2nfilt) - 1); + int ff1 = (static_cast<unsigned>(x >> (32 - log2nfilt - INTERP_BIT))) & ((1u << INTERP_BIT) - 1); + int ff0 = (1u << INTERP_BIT) - ff1; + + /* off: offset in input corresponding to first sample in filter */ + auto off = static_cast<int>(x >> 32); + /* fidx0, fidx1: start, end indexes in FIR data */ + int fidx0 = std::max(-off, 0); + int fidx1 = std::min<int>(inlen - off, flen); + + /* acc: FIR accumulator */ + std::array<acc_type, channels> acc{0}; + + for(int j = fidx0; j < fidx1; ++j) + { + acc_type f = (fd[(fn + 0) * flen + j] * ff0 + fd[(fn + 1) * flen + j] * ff1) / (1 << INTERP_BIT); + for(std::size_t c = 0; c < channels; ++c) + { + acc[c] += inp[(j + off) * channels + c] * f; + } + } + + for(std::size_t c = 0; c < channels; ++c) + { + outp[i * channels + c] = dither_truncate<sample_type_out>(acc[c], ds[c]); + + ds[c] = LCG_A2 * ds[c] + LCG_C2; + } + + x += inv_ratio; + } + + pos = x; + dither = ds[0]; +} diff --git a/lib/resample_s16func.cpp b/lib/resample_s16func.cpp new file mode 100644 index 0000000..07c086e --- /dev/null +++ b/lib/resample_s16func.cpp @@ -0,0 +1,51 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#include "defs.h" +#include "filter.h" +#include "resample.h" +#include "resample_func.h" + +namespace { + template<auto func> + void helper(lfr_fixed_t* pos, lfr_fixed_t inv_ratio, unsigned* dither, void* out, int outlen, const void* in, int inlen, const lfr_filter* filter) + { + return func(*pos, inv_ratio, *dither, out, outlen, in, inlen, *filter); + } +} + +lfr_resample_func_t +lfr_resample_s16func(int nchan, const struct lfr_filter *filter) +{ + const auto ftype = filter->type; + switch (nchan) { + case 1: + switch (ftype) { + case LFR_FTYPE_S16: + return helper<lfr_resample<short, short, short, int, 1, 14>>; + + case LFR_FTYPE_F32: + return helper<lfr_resample<short, short, short, int, 1, 14>>; + + default: + break; + } + break; + + case 2: + switch (ftype) { + case LFR_FTYPE_S16: + return helper<lfr_resample<short, short, short, int, 2, 14>>; + + case LFR_FTYPE_F32: + return helper<lfr_resample<short, short, float, float, 2, 14>>; + + default: + break; + } + break; + + default: + break; + } + + return NULL; +} diff --git a/lib/swap16.cpp b/lib/swap16.cpp new file mode 100644 index 0000000..f20efc0 --- /dev/null +++ b/lib/swap16.cpp @@ -0,0 +1,65 @@ +/* Copyright 2012 Dietrich Epp <depp@zdome.net> */ +#include "convert.h" +#include <cstdint> + +void lfr_swap16(void *dest, const void *src, size_t count) +{ + size_t i, n; + unsigned x, y; + const unsigned char *s8; + unsigned char *d8; + const unsigned short *s16; + unsigned short *d16; + const unsigned *s32; + unsigned *d32; + + if (!count) + return; + + if ((((uintptr_t) dest | (uintptr_t) src) & 1u) != 0) { + /* completely unaligned */ + s8 = reinterpret_cast<const unsigned char*>(src); + d8 = reinterpret_cast<unsigned char*>(dest); + for (i = 0; i < count; ++i) { + x = s8[i*2+0]; + y = s8[i*2+1]; + d8[i*2+0] = static_cast<unsigned char>(y); + d8[i*2+1] = static_cast<unsigned char>(x); + } + } else if ((((uintptr_t) dest - (uintptr_t) src) & 3u) != 0) { + /* 16-bit aligned */ + s16 = reinterpret_cast<const unsigned short*>(src); + d16 = reinterpret_cast<unsigned short*>(dest);; + for (i = 0; i < count; ++i) { + x = s16[i]; + d16[i] = static_cast<unsigned short>((x >> 8) | (x << 8)); + } + } else { + /* 16-bit aligned, with 32-bit aligned delta */ + s16 = reinterpret_cast<const unsigned short*>(src); + d16 = reinterpret_cast<unsigned short*>(dest); + n = count; + if ((uintptr_t) dest & 3u) { + x = s16[0]; + d16[0] = static_cast<unsigned short>((x >> 8) | (x << 8)); + n -= 1; + s16 += 1; + d16 += 1; + } + s32 = reinterpret_cast<const unsigned*>(s16); + d32 = reinterpret_cast<unsigned*>(d16); + for (i = 0; i < n/2; ++i) { + x = s32[i]; + d32[i] = ((x >> 8) & 0xff00ffu) | ((x & 0xff00ffu) << 8); + } + d32 += n/2; + s32 += n/2; + n -= (n/2)*2; + if (n) { + s16 = reinterpret_cast<const unsigned short*>(s32); + d16 = reinterpret_cast<unsigned short*>(d32); + x = s16[0]; + d16[0] = static_cast<unsigned short>((x >> 8) | (x << 8)); + } + } +} diff --git a/lib/types.h b/lib/types.h new file mode 100644 index 0000000..c023a9a --- /dev/null +++ b/lib/types.h @@ -0,0 +1,44 @@ +#pragma once +#include <cstdint> + +/* + A 32.32 fixed point number. Th*is is used for expressing fractional + positions in a buffer. + + When used for converting from sample rate f_s to f_d, the timing + error at time t is bounded by t * 2^-33 * r_d / r_s. For a + pessimistic conversion ratio, 8 kHz -> 192 kHz, this means that it + will take at least five days to accumulate one millisecond of error. + */ +using lfr_fixed_t = std::int64_t; + + +/* ======================================== + Sample formats + ======================================== */ + +/* + Audio sample formats. + */ +enum lfr_fmt_t { + LFR_FMT_U8, + LFR_FMT_S16BE, + LFR_FMT_S16LE, + LFR_FMT_S24BE, + LFR_FMT_S24LE, + LFR_FMT_F32BE, + LFR_FMT_F32LE, + LFR_FMT_COUNT +}; + + +/* + Specialized resampling function, designed to work with a specific + sample format and filter type. Do not attempt to reuse a function + for a different filter. + + Note that lengths are still measured in frames, but since the + function is specialized for a given format and number of channels, + there is no need to specify the format or number of channels. +*/ +using lfr_resample_func_t = void (*)( lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, void *out, int outlen, const void *in, int inlen, const struct lfr_filter *filter);
\ No newline at end of file diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..c2f563b --- /dev/null +++ b/meson.build @@ -0,0 +1 @@ +subdir('lib') |
