From 4cb83acddbe154312b74d2d63985ac1100bad7c5 Mon Sep 17 00:00:00 2001 From: Markus Mittendrein Date: Mon, 28 Nov 2022 20:16:46 +0100 Subject: Initial --- lib/.header | 3 + lib/convert.h | 5 + lib/defs.h | 119 +++++++++++++++++ lib/filter.cpp | 334 +++++++++++++++++++++++++++++++++++++++++++++++ lib/filter.h | 181 +++++++++++++++++++++++++ lib/info_name.cpp | 39 ++++++ lib/meson.build | 25 ++++ lib/param.cpp | 236 +++++++++++++++++++++++++++++++++ lib/param.h | 135 +++++++++++++++++++ lib/param_name.cpp | 41 ++++++ lib/resample.h | 84 ++++++++++++ lib/resample_func.h | 92 +++++++++++++ lib/resample_s16func.cpp | 51 ++++++++ lib/swap16.cpp | 65 +++++++++ lib/types.h | 44 +++++++ 15 files changed, 1454 insertions(+) create mode 100644 lib/.header create mode 100644 lib/convert.h create mode 100644 lib/defs.h create mode 100644 lib/filter.cpp create mode 100644 lib/filter.h create mode 100644 lib/info_name.cpp create mode 100644 lib/meson.build create mode 100644 lib/param.cpp create mode 100644 lib/param.h create mode 100644 lib/param_name.cpp create mode 100644 lib/resample.h create mode 100644 lib/resample_func.h create mode 100644 lib/resample_s16func.cpp create mode 100644 lib/swap16.cpp create mode 100644 lib/types.h (limited to 'lib') diff --git a/lib/.header b/lib/.header new file mode 100644 index 0000000..522f27e --- /dev/null +++ b/lib/.header @@ -0,0 +1,3 @@ +{ + "guard_prefix": "" +} diff --git a/lib/convert.h b/lib/convert.h new file mode 100644 index 0000000..f869f2b --- /dev/null +++ b/lib/convert.h @@ -0,0 +1,5 @@ +/* Copyright 2012 Dietrich Epp */ +#pragma once +#include "defs.h" + +void lfr_swap16(void *dest, const void *src, size_t count); diff --git a/lib/defs.h b/lib/defs.h new file mode 100644 index 0000000..e645532 --- /dev/null +++ b/lib/defs.h @@ -0,0 +1,119 @@ +/* Copyright 2012 Dietrich Epp */ +#pragma once + +#include "fresample.h" + +#define LFR_UNREACHABLE (void) 0 + +#if defined(__clang__) +# if __has_builtin(__builtin_unreachable) +# undef LFR_UNREACHABLE +# define LFR_UNREACHABLE __builtin_unreachable() +# endif +#elif defined(__GNUC__) +# if (__GNUC__ >= 4 && __GNU_MINOR__ >= 5) || __GNUC__ > 4 +# undef LFR_UNREACHABLE +# define LFR_UNREACHABLE __builtin_unreachable() +# endif +#endif + +#define INLINE static inline + +/* + Constants used by dithering algorithm. We usa a simple linear + congruential generator to generate a uniform signal for dithering, + taking the high order bits: + + x_{n+1} = (A * x_n + C) mod 2^32 + + The derived constants, AN/CN, are used for stepping the LCG forward + by N steps. AI is the inverse of A. +*/ + +constexpr auto LCG_A = 1103515245u; +constexpr auto LCG_A2 = 3265436265u; +constexpr auto LCG_A4 = 3993403153u; + +constexpr auto LCG_C = 12345u; +constexpr auto LCG_C2 = 3554416254u; +constexpr auto LCG_C4 = 3596950572u; + +constexpr auto LCG_AI = 4005161829u; +constexpr auto LCG_CI = 4235699843u; + +/* ==================== + Utility functions + ==================== */ + +#if defined(LFR_SSE2) && defined(LFR_CPU_X86) +#include + +/* + Store 16-bit words [i0,i1) in the given location. +*/ +INLINE void lfr_storepartial_epi16(__m128i *dest, __m128i x, int i0, int i1) +{ + union { + unsigned short h[8]; + __m128i x; + } u; + u.x = x; + for (int i = i0; i < i1; ++i) + ((unsigned short *) dest)[i] = u.h[i]; +} + +/* + Advance four linear congruential generators. The four generators + should use the same A and C constants. + + The 32-bit multiply we want requires SSE 4.1. We construct it out of + two 32 to 64 bit multiply operations. +*/ +INLINE __m128i +lfr_rand_epu32(__m128i x, __m128i a, __m128i c) +{ + return _mm_add_epi32( + _mm_unpacklo_epi32( + _mm_shuffle_epi32( + _mm_mul_epu32(x, a), + _MM_SHUFFLE(0, 0, 2, 0)), + _mm_shuffle_epi32( + _mm_mul_epu32(_mm_srli_si128(x, 4), a), + _MM_SHUFFLE(0, 0, 2, 0))), + c); +} + +#endif + +#if defined(LFR_ALTIVEC) && defined(LFR_CPU_PPC) +#if !defined(__APPLE_ALTIVEC__) +#include +#endif + +/* + Advance four linear congruential generators. The four generators + should use the same A and C constants. + + The 32-bit multiply we want does not exist. We construct it out of + 16-bit multiply operations. +*/ +INLINE vector unsigned int +lfr_vecrand(vector unsigned int x, vector unsigned int a, + vector unsigned int c) +{ + vector unsigned int s = vec_splat_u32(-16); + return vec_add( + vec_add( + vec_mulo( + (vector unsigned short) x, + (vector unsigned short) a), + c), + vec_sl( + vec_msum( + (vector unsigned short) x, + (vector unsigned short) vec_rl(a, s), + vec_splat_u32(0)), + s)); +} + +#endif \ No newline at end of file diff --git a/lib/filter.cpp b/lib/filter.cpp new file mode 100644 index 0000000..0ca829e --- /dev/null +++ b/lib/filter.cpp @@ -0,0 +1,334 @@ +/* Copyright 2012 Dietrich Epp */ +#include "filter.h" +#include "param.h" +#include "../include/fresample.h" +#include +#include +#include +#include +#include +#include + +namespace { +/* + Simple sinc function implementation. Note that this is the + unnormalized sinc function, with zeroes at nonzero multiples of pi. +*/ +constexpr double sinc(double x) +{ + return std::abs(x) < 1e-8 ? 1.0 : std::sin(x) / x; +} + +/* + Modified Bessel function of the first kind of order 0, i0. Only + considered valid over the domain [0,18]. + + This is the naive algorithm. + + In "Computation of Special Functions" by Shanjie Zhang and Jianming + Jin, this technique is only used for 0 filter_calculate(std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta) +{ + std::vector data(nsamp * nfilt); + + const auto yscale = 2.0 * cutoff / bessel_i0(beta); + const auto xscale = (8.0 * std::atan(1.0)) * cutoff; + for (std::size_t i = 0; i < nfilt; ++i) { + const auto x0 = (nsamp - 1) / 2 + offset * i; + for (std::size_t j = 0; j < nsamp; ++j) { + const auto x = j - x0; + const auto t = x * (2.0 / (nsamp - 2)); + double y; + if (t <= -1.0 || t >= 1.0) { + y = 0.0; + } else { + y = yscale * bessel_i0(beta * sqrt(1.0 - t * t)) * sinc(xscale * x); + } + data[i * nsamp + j] = y; + } + } + + return data; +} + +template +void filter_calculate_integral(type* data, std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta) +{ + const auto& filter = filter_calculate(nsamp, nfilt, offset, cutoff, beta); + + for (std::size_t i = 0; i < nfilt; ++i) { + const auto sum = accumulate(begin(filter) + i * nsamp, begin(filter) + (i + 1) * nsamp, 0.0); + const auto fac = static_cast(scale) / sum; + auto err = 0.0; + for (std::size_t j = 0; j < nsamp; ++j) { + const auto y = filter[i * nsamp + j] * fac + err; + const auto z = std::round(y); + err = z - y; + data[i * nsamp + j] = static_cast(z); + } + } +} + +void lfr_filter_calculate_s16(std::int16_t* data, std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta) +{ + if(nsamp <= 8) + { + filter_calculate_integral(data, nsamp, nfilt, offset, cutoff, beta); + } + else + { + filter_calculate_integral(data, nsamp, nfilt, offset, cutoff, beta); + } +} + +void lfr_filter_calculate_f32(float *data, std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta) +{ + const auto& filter = filter_calculate(nsamp, nfilt, offset, cutoff, beta); + + transform(begin(filter), end(filter), data, [](const auto val){ return static_cast(val); }); +} +} + +lfr_filter::lfr_filter(lfr_param& param) +{ + double dw, dw2, lenf, atten2, atten3, maxerror, beta; + double ierror, rerror, error; + double t, a, ulp; + int align=8, max_oversample; + + param.calculate(); + + f_pass = param.param[LFR_PARAM_FPASS]; + f_stop = param.param[LFR_PARAM_FSTOP]; + atten = param.param[LFR_PARAM_ATTEN]; + + /* atten2 is the attenuation of the ideal (un-rounded) filter, + maxerror is the ratio of a 0 dBFS signal to the permitted + roundoff and interpolation error. This divides the + "attenuation" into error budgets with the assumption that the + error from each source is uncorrelated. Note that atten2 is + measured in dB, whereas maxerror is a ratio to a full scale + signal. */ + atten2 = atten + 3.0; + maxerror = exp((atten + 3.0) * (-log(10.0) / 20.0)); + + /* Calculate the filter order from the attenuation. Round up to + the nearest multiple of the SIMD register alignment. */ + dw = (8.0 * std::atan(1.0)) * (f_stop - f_pass); + lenf = (atten2 - 8.0) / (4.57 * dw) + 1; + nsamp = static_cast(std::ceil(lenf / align) * align); + if (nsamp < align) + nsamp = align; + + /* ======================================== + Determine filter coefficient size. + ======================================== */ + + /* We can calculate expected roundoff error from filter length. + We assume that the roundoff error at each coefficient is an + independent uniform variable with a range of 1 ULP, and that + there is an equal roundoff step during interpolation. This + gives roundoff error with a variance of N/6 ULP^2. Roundoff + error is a ratio relative to 1 ULP. */ + rerror = std::sqrt(nsamp * (1.0 / 6.0)); + + /* The interpolation error can be adjusted by choosing the amount + of oversampling. The curvature of the sinc function is bounded + by (2*pi*f)^2, so the interpolation error is bounded by + (pi*f/M)^2. + + We set a maximum oversampling of 2^8 for 16-bit, and 2^12 for + floating point. */ + t = f_pass * (4.0 * std::atan(1.0)); + a = 1.0 / 256; + ierror = (t * a) * (t * a); + ulp = 1.0 / 32768.0; + /* We assume, again, that interpolation and roundoff error are + uncorrelated and normal. If error at 16-bit exceeds our + budget, then use floating point. */ + error = (ierror * ierror + rerror * rerror) * (ulp * ulp); + if (error <= maxerror * maxerror) { + type = LFR_FTYPE_S16; + ulp = 1.0 / 32768.0; + max_oversample = 8; + } else { + type = LFR_FTYPE_F32; + ulp = 1.0 / 16777216.0; + max_oversample = 12; + } + + /* Calculate the interpolation error budget by subtracting + roundoff error from the total error budget, since roundoff + error is fixed. */ + ierror = maxerror * maxerror - (rerror * rerror) * (ulp * ulp); + ierror = (ierror > 0) ? sqrt(ierror) : 0; + if (ierror < ulp) + ierror = ulp; + + /* Calculate oversampling from the error budget. */ + a = (f_pass * (4.0 * std::atan(1.0))) / std::sqrt(ierror); + log2nfilt = static_cast(std::ceil(std::log(a) * (1.0 / std::log(2.0)))); + if (log2nfilt < 0) + log2nfilt = 0; + else if (log2nfilt > max_oversample) + log2nfilt = max_oversample; + + /* ======================================== + Calculate window parameters + ======================================== */ + + /* Since we rounded up the filter order, we can increase the stop + band attenuation or bandwidth without making the transition + bandwidth exceed the design parameters. This gives a "free" + boost in quality. We choose to increase both. + + If we didn't increase these parameters, we would still incur + the additional computational cost but it would be spent + increasing the width of the stop band, which is not useful. */ + + /* atten3 is the free stopband attenuation */ + atten3 = (nsamp - 1) * 4.57 * dw + 8; + t = (-20.0 / std::log(10.0)) * std::log(rerror * ulp); + if (t < atten3) + atten3 = t; + if (atten2 > atten3) + atten3 = atten2; + else + atten3 = 0.5 * (atten2 + atten3); + + /* f_pass2 is the free pass band */ + dw2 = (atten3 - 8.0) / (4.57 * (nsamp - 1)); + auto f_pass2 = f_stop - dw2 * (1.0 / (8.0 * std::atan(1.0))); + + if (atten3 > 50) + beta = 0.1102 * (atten3 - 8.7); + else if (atten3 > 21) + beta = 0.5842 * std::pow(atten3 - 21, 0.4) + 0.07866 * (atten3 - 21); + else + beta = 0; + + setup_window(f_pass2, beta); +} + +void lfr_filter::setup_window(double cutoff, double beta) +{ + size_t esz = 1, align = 16, nfilt; + if (nsamp < 1 || log2nfilt < 0) + goto error; + + switch (type) { + case LFR_FTYPE_S16: + esz = sizeof(short); + break; + + case LFR_FTYPE_F32: + esz = sizeof(float); + break; + } + nfilt = (1u << log2nfilt) + 1; + if ((size_t) nsamp > (size_t) -1 / esz) + goto error; + if (((size_t) -1 - (align - 1) - sizeof(*this)) / nfilt < nsamp * esz) + goto error; + data.resize(nsamp * nfilt * esz + align - 1); + + delay = static_cast((nsamp - 1) / 2) << 32; + + switch (type) { + case LFR_FTYPE_S16: + lfr_filter_calculate_s16( + reinterpret_cast(data.data()), nsamp, nfilt, + 1.0 / static_cast(1 << log2nfilt), cutoff, beta); + break; + + case LFR_FTYPE_F32: + lfr_filter_calculate_f32( + reinterpret_cast(data.data()), nsamp, nfilt, + 1.0 / static_cast(1 << log2nfilt), cutoff, beta); + break; + } + return; + +error: + throw std::runtime_error{"Error in filter setup: lfr_filter::setup_window"}; +} + +int lfr_filter::geti(LFR_INFO_TYPE iname, bool convert) const +{ + switch(iname) + { + case LFR_INFO_DELAY: + return delay * (1.0 / 4294967296.0); + case LFR_INFO_FPASS: + return f_pass; + case LFR_INFO_FSTOP: + return f_stop; + case LFR_INFO_ATTEN: + return atten; + default: + if(convert) + { + return static_cast(getf(iname, false)); + } + } + return {}; +} + +double lfr_filter::getf(LFR_INFO_TYPE iname, bool convert) const +{ + switch(iname) + { + case LFR_INFO_DELAY: + return static_cast(delay >> 32); + case LFR_INFO_SIZE: + return nsamp; + case LFR_INFO_MEMSIZE: + { + int sz = nsamp * ((1 << log2nfilt) + 1); + switch (type) + { + case LFR_FTYPE_S16: return sz * 2; + case LFR_FTYPE_F32: return sz * 4; + default: return {}; + } + } + default: + if(convert) + { + return geti(iname, false); + } + } + return {}; +} + +void lfr_filter::resample( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, + unsigned *dither, int nchan, + void *out, lfr_fmt_t outfmt, int outlen, + const void *in, lfr_fmt_t infmt, int inlen) +{ + lfr_resample_func_t func; + if (outfmt == LFR_FMT_S16_NATIVE && infmt == LFR_FMT_S16_NATIVE) { + func = lfr_resample_s16func(nchan, this); + if (!func) + return; + func(pos, inv_ratio, dither, out, outlen, in, inlen, this); + } +} \ No newline at end of file diff --git a/lib/filter.h b/lib/filter.h new file mode 100644 index 0000000..b6ca66e --- /dev/null +++ b/lib/filter.h @@ -0,0 +1,181 @@ +/* Copyright 2012 Dietrich Epp */ +#pragma once +#include "types.h" +#include "param.h" + +#include + +/* Bits used for interpolating between two filters. The scalar + implementation supports up to 16 bits, but the SIMD implementations + generally only support 14 so the values can fit in a signed 16-bit + word. We bring all implementations to the same accuracy in order + to ensure that all implementations produce the same results. */ +#define INTERP_BITS 14 + +/* + Filter types +*/ +enum lfr_ftype_t { + LFR_FTYPE_S16, + LFR_FTYPE_F32 +}; + +/* + Array of FIR filters. + + Let N=2^log2nfilt, M=nsamp. + + This is an array of N+1 filters, 0..N, with each filter M samples + long. Filter number I will be centered on the sample with offset: + + floor(M/2) + I/N + + This means that filter N will be a copy of filter 0, except shifted + to the right by one sample. This extra filter makes the + interpolation code simpler. +*/ + + +/* + Information queries that a filter responds to. Each query gives an + integer or floating point result, automatically cast to the type + requested. +*/ +enum LFR_INFO_TYPE { + /* + The size of the filter, also known as the filter's order. This + does not include oversampling. The resampler will read this + many samples, starting with the current position (rounded down), + whenever it calculates a sample. + + In other words, this is the amount of overlap needed when + resampling consecutive buffers. + */ + LFR_INFO_SIZE, + + /* + Filter delay. Note that lfr_filter_delay returns a fixed point + number and is usually preferable. + */ + LFR_INFO_DELAY, + + /* + The is the number of bytes used by filter coefficients. + */ + LFR_INFO_MEMSIZE, + + /* + The normalized pass frequency the filter was designed with. + */ + LFR_INFO_FPASS, + + /* + The normalized stop frequency the filter was designed with. + */ + LFR_INFO_FSTOP, + + /* + The stopband attenuation, in dB, that the filter was designed + with. + */ + LFR_INFO_ATTEN, + + LFR_INFO_COUNT +}; + +/* + * A filter for resampling audio. + */ +struct lfr_filter { + /* + FIR filter coefficient data type. + */ + lfr_ftype_t type; + + /* + FIR filter data. + */ + std::vector data; + + /* + Length of each filter, in samples. This is chosen to make each + filter size a multiple of the SIMD register size. Most SIMD + implementations today use 16 byte registers, so this will + usually be a multiple of 8 or 4, depending on the coefficient + type. + */ + int nsamp; + + /* + Base 2 logarithm of the number of different filters. There is + always an additional filter at the end of the filter array to + simplify interpolation, this extra filter is a copy of the first + filter shifted right by one sample. + */ + int log2nfilt; + + /* + Filter delay. Filters are causal, so this should be + non-negative. + */ + lfr_fixed_t delay; + + /* + Design parameters of the filter. + */ + double f_pass, f_stop, atten; + + lfr_filter(lfr_param& param); + int geti(LFR_INFO_TYPE iname, bool convert = true) const; + double getf(LFR_INFO_TYPE iname, bool convert = true) const; + + +/* + Resample an audio buffer. Note that this function may need to + create intermediate buffers if there is no function which can + directly operate on the input and output formats. No intermediate + buffers will be necessary if the following conditions are met: + + - Input and output formats are identical. + + - Sample format is either S16_NATIVE or F32_NATIVE. + + - The number of channels is either 1 (mono) or 2 (stereo). + + pos: Current position relative to the start of the input buffer, + expressed as a 32.32 fixed point number. On return, this will + contain the updated position. Positions outside the input buffer + are acceptable, it will be treated as if the input buffer were + padded with an unlimited number of zeroes on either side. + + inv_ratio: Inverse of the resampling ratio, expressed as a 32.32 + fixed point number. This number is equal to the input sample rate + divided by the output sample rate. + + dither: State of the PRNG used for dithering. + + nchan: Number of interleaved channels. + + out, in: Input and output buffers. The buffers are not permitted to + alias each other. + + outlen, inlen: Length of buffers, in frames. Note that the length + type is 'int' instead of 'size_t'; this matches the precision of + buffer positions. + + outfmt, infmt: Format of input and output buffers. + + filter: A suitable low-pass filter for resampling at the given + ratio. +*/ + void resample( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, + unsigned *dither, int nchan, + void *out, lfr_fmt_t outfmt, int outlen, + const void *in, lfr_fmt_t infmt, int inlen); +private: + /* + Create a Kaiser-windowed sinc filter. + */ + void setup_window(double cutoff, double beta); +}; \ No newline at end of file diff --git a/lib/info_name.cpp b/lib/info_name.cpp new file mode 100644 index 0000000..d0897d2 --- /dev/null +++ b/lib/info_name.cpp @@ -0,0 +1,39 @@ +/* Copyright 2012 Dietrich Epp */ +#include "defs.h" +#include "fresample.h" +#include +#include +#include + +using namespace std::string_view_literals; + +constexpr std::array LFR_INFO_NAME = { + "size"sv, + "delay"sv, + "memsize"sv, + "fpass"sv, + "fstop"sv, + "atten"sv +}; + +std::string_view +lfr_info_name(int pname) +{ + if (pname < 0 || pname >= LFR_INFO_COUNT) + return {}; + return LFR_INFO_NAME[pname]; +} + +int lfr_info_lookup(const std::string_view& pname) +{ + int i = 0; + for(const auto& name : LFR_INFO_NAME) + { + if(name == pname) + { + return i; + } + ++i; + } + return -1; +} diff --git a/lib/meson.build b/lib/meson.build new file mode 100644 index 0000000..822473e --- /dev/null +++ b/lib/meson.build @@ -0,0 +1,25 @@ +fresample_sources = [ + 'filter.cpp', + 'info_name.cpp', + 'param.cpp', + 'param_name.cpp', + 'resample_s16func.cpp', + 'swap16.cpp', +] + +cc = meson.get_compiler('c') +m_dep = cc.find_library('m', required : false) + +fresample = static_library( + 'fresample', + fresample_sources, + include_directories: '../include', + dependencies: [ + m_dep, + ], + cpp_args: '-O0' +) + +fresample_dep = declare_dependency( + link_with: fresample, +) diff --git a/lib/param.cpp b/lib/param.cpp new file mode 100644 index 0000000..58bcf5b --- /dev/null +++ b/lib/param.cpp @@ -0,0 +1,236 @@ +/* Copyright 2012 Dietrich Epp */ +#include "param.h" + +void lfr_param::seti(lfr_param_t pname, int value) +{ + int n = pname; + if (n < 0 || n >= LFR_PARAM_COUNT) + return; + set |= 1u << n; + current = 0; + param[n] = (double) value; +} + +void lfr_param::setf(lfr_param_t pname, double value) +{ + int n = pname; + if (n < 0 || n >= LFR_PARAM_COUNT) + return; + set |= 1u << n; + current = 0; + param[n] = value; +} + +void lfr_param::geti(lfr_param_t pname, int *value) +{ + int n = pname; + if (n < 0 || n >= LFR_PARAM_COUNT) + return; + if (!current) + calculate(); + *value = static_cast(param[n] + 0.5); +} + +void lfr_param::getf(lfr_param_t pname, double *value) +{ + int n = pname; + if (n < 0 || n >= LFR_PARAM_COUNT) + return; + if (!current) + calculate(); + *value = param[n]; +} + +constexpr auto MAX_QUALITY = 10; + +struct lfr_quality { + unsigned short atten; /* dB */ + unsigned short transition; /* out of 1000 */ + unsigned short kind; /* 0 = loose, 2 = maxfreq=0 */ + unsigned short minfpass; /* out of 1000 */ +}; + +constexpr lfr_quality LFR_QUALITY[MAX_QUALITY + 1] = { + { 35, 350, 0, 500 }, + { 35, 350, 0, 500 }, + { 35, 350, 0, 500 }, /* low */ + { 35, 350, 0, 500 }, + { 50, 290, 0, 600 }, + { 60, 230, 1, 700 }, /* medium */ + { 80, 180, 1, 800 }, + { 90, 140, 1, 850 }, + { 96, 100, 1, 900 }, /* high */ + { 108, 60, 2, 915 }, + { 120, 30, 2, 930 } /* ultra */ +}; + +constexpr auto MIN_OUTPUT = 1.0 / 128; +constexpr auto MIN_ATTEN = 13; +constexpr auto MAX_ATTEN = 144; +constexpr auto MIN_TRANSITION = 1.0 / 256.0; +constexpr auto MAX_MINPASS = 31.0 / 32.0; +constexpr auto MIN_MINPASS = 8.0 / 32.0; + +#define ISSET(n) ((set & (1u << (LFR_PARAM_ ## n))) != 0) +#define GETF(n) (param[LFR_PARAM_ ## n]) +#define GETI(n) (static_cast(GETF(n) + 0.5)) + +void lfr_param::calculate() +{ + int qual; + double rate; + double f_output, f_transition, f_maxfreq, f_minpass; + double atten, f_stop, f_pass; + double t; + int loose; + + /* Quality */ + if (ISSET(QUALITY)) { + qual = GETI(QUALITY); + if (qual < 0) + qual = 0; + else if (qual > MAX_QUALITY) + qual = MAX_QUALITY; + } else { + qual = 8; + } + GETF(QUALITY) = qual; + + /* Input rate */ + if (ISSET(INRATE)) { + rate = GETF(INRATE); + if (rate <= 0) + rate = -1.0; + } else { + rate = -1.0; + } + GETF(INRATE) = rate; + + /* Output rate */ + if (ISSET(OUTRATE)) { + t = GETF(OUTRATE); + if (rate > 0) { + if (t > rate) { + f_output = 1.0; + GETF(OUTRATE) = rate; + } else { + f_output = t / rate; + if (f_output < MIN_OUTPUT) { + f_output = MIN_OUTPUT; + GETF(OUTRATE) = MIN_OUTPUT * rate; + } + } + } else { + if (t >= 1.0) { + f_output = 1.0; + GETF(OUTRATE) = 1.0; + } else { + f_output = t; + if (f_output < MIN_OUTPUT) { + f_output = MIN_OUTPUT; + GETF(OUTRATE) = MIN_OUTPUT; + } + } + } + } else { + f_output = 1.0; + if (rate > 0) + GETF(OUTRATE) = 1.0; + else + GETF(OUTRATE) = rate; + } + + /* Transition bandwidth */ + if (ISSET(FTRANSITION)) { + f_transition = GETF(FTRANSITION); + if (f_transition < MIN_TRANSITION) + f_transition = 1.0/32; + if (f_transition > 1.0) + f_transition = 1.0; + } else { + f_transition = (0.5/1000) * (double) LFR_QUALITY[qual].transition; + } + GETF(FTRANSITION) = f_transition; + + /* Maximum audible frequency */ + if (ISSET(MAXFREQ)) { + f_maxfreq = GETF(MAXFREQ); + } else { + if (LFR_QUALITY[qual].kind < 2) + f_maxfreq = 20000.0; + else + f_maxfreq = -1; + GETF(MAXFREQ) = f_maxfreq; + } + if (rate > 0) + f_maxfreq = f_maxfreq / rate; + else + f_maxfreq = 1.0; + + /* "Loose" flag */ + if (ISSET(LOOSE)) { + loose = GETI(LOOSE) > 0; + } else { + loose = LFR_QUALITY[qual].kind < 1; + } + GETF(LOOSE) = (double) loose; + + /* Minimum output bandwidth */ + if (ISSET(MINFPASS)) { + f_minpass = GETF(MINFPASS); + if (f_minpass < MIN_MINPASS) + f_minpass = MIN_MINPASS; + else if (f_minpass > MAX_MINPASS) + f_minpass = MAX_MINPASS; + } else { + f_minpass = 0.001 * LFR_QUALITY[qual].minfpass; + } + GETF(MINFPASS) = f_minpass; + + /* Stop band attenuation */ + if (ISSET(ATTEN)) { + atten = GETF(ATTEN); + if (atten < MIN_ATTEN) + atten = MIN_ATTEN; + else if (atten > MAX_ATTEN) + atten = MAX_ATTEN; + } else { + atten = (double) LFR_QUALITY[qual].atten; + } + GETF(ATTEN) = atten; + + /* Stop band frequency */ + if (ISSET(FSTOP)) { + f_stop = GETF(FSTOP); + if (f_stop > 1.0) + f_stop = 1.0; + else if (f_stop < MIN_TRANSITION) + f_stop = MIN_TRANSITION; + } else { + f_stop = 0.5 * f_output; + if (loose && f_maxfreq > 0) { + t = f_output - f_maxfreq; + if (t > f_stop) + f_stop = t; + } + } + GETF(FSTOP) = f_stop; + + /* Pass band frequency */ + if (ISSET(ATTEN)) { + f_pass = GETF(FPASS); + } else { + f_pass = f_stop - f_transition; + t = 0.5 * f_output * f_minpass; + if (t > f_pass) + f_pass = t; + if (f_maxfreq > 0 && f_pass > f_maxfreq) + f_pass = f_maxfreq; + } + t = f_stop - MIN_TRANSITION; + if (t < 0) + t = 0; + if (t < f_pass) + f_pass = t; + GETF(FPASS) = f_pass; +} diff --git a/lib/param.h b/lib/param.h new file mode 100644 index 0000000..ab65c63 --- /dev/null +++ b/lib/param.h @@ -0,0 +1,135 @@ +/* Copyright 2012 Dietrich Epp */ +#pragma once + +#include + +/* + Parameters for the filter generator. + + Filter generation goes through two stages. + + 1. In the first stage, the resampling parameters are used to create + a filter specification. The filter specification consists of + normalized frequencies for the pass band, stop band, and stop band + attenuation. This stage uses simple logic to create a filter + specification that "makes sense" for any input. It relaxes the + filter specification for ultrasonic (inaudible) frequencies and + ensures that enough of the input signal passes through. + + 2. In the second stage, an FIR filter is generated that fits the + filter specified by the first stage. + + Normally, for resampling, you will specify QUALITY, INRATE, and + OUTRATE. A filter specification for the conversion will be + automatically generated with the given subjective quality level. + + If you are a signal-processing guru, you can create the filter + specification directly by setting FPASS, FSTOP, and ATTEN. + */ +enum lfr_param_t { + /* High level filter parameters. These are typi*cally the only + parameters you will need to set. */ + + /* Filter quality, default 8. An integer between 0 and 10 which + determines the default values for other para*meters. */ + LFR_PARAM_QUALITY, + + /* Input sample rate, in Hz. The default is -1, which creates a + generic filter. */ + LFR_PARAM_INRATE, + + /* Output sample rate. If the input sample rate is specified, + then this is measured in Hz. Otherwise, if *the input rate is + -1, then this value is relative to the input sample rate (so + you would use 0.5 for downsampling by a factor of two). + Defaults to the same value as the input sample rate, which + creates a filter which can be used for upsampling at any ratio. + Note that increasing the output rate above the input rate has + no effect, all upsampling filters for a given input frequency + are identical. */ + LFR_PARAM_OUTRATE, + + /* + Medium level filter parameters. These parame*ters affect how the + filter specification is generated from the input and output + sample rates. Most of these parameters have default values + which depend on the QUALITY setting. + */ + + /* The width of the filter transition band, as a fraction of the + input sample rate. This value will be enlar*ged to extend the + transition band to MAXFREQ and will be narrowed to extend the + pass band to MINBW. The default value depends on the QUALITY + setting, and gets narrower as QUALITY increases. */ + LFR_PARAM_FTRANSITION, + + /* Maximum audible frequency. The pass band will be narrowed to + fit within the range of audible frequencies.* Default value is + 20 kHz if the input frequency is set, otherwise this parameter + is unused. If you want to preserve ultrasonic frequencies, + disable this parameter by setting it to -1. This is disabled + by default at absurd quality settings (9 and 10). */ + LFR_PARAM_MAXFREQ, + + /* A flag which allows aliasing noise as long as it is above + MAXFREQ. This flag improves the subjective *quality of + low-quality filters by increasing their bandwidth, but causes + problems for high-quality filters by increasing noise. Default + value depends on QUALITY setting, and is set for low QUALITY + values. Has no effect if MAXFREQ is disabled. */ + LFR_PARAM_LOOSE, + + /* Minimum size of pass band, as a fraction of the output + bandwidth. This prevents the filter designe*r from filtering + out the entire signal, which can happen when downsampling by a + large enough ratio. The default value is 0.5 at low quality + settings, and higher at high quality settings. The filter size + can increase dramatically as this number approaches 1.0. */ + LFR_PARAM_MINFPASS, + + /* + Filter specification. These parameters are n*ormally generated + from the higher level parameters. If the filter specification + is set, then the higher level parameters will all be ignored. + */ + + /* The end of the pass band, as a fraction of the input sample + rate. Normally, the filter designer chooses* this value. */ + LFR_PARAM_FPASS, + + /* The start of the stop band, as a fraction of the input sample + rate. Normally, the filter designer chooses* this value. */ + LFR_PARAM_FSTOP, + + /* Desired stop band attenuation, in dB. Larger numbers are + increase filter quality. Default value depe*nds on the QUALITY + setting. */ + LFR_PARAM_ATTEN, + + LFR_PARAM_COUNT +}; + +class lfr_param { + /* + Bit set of which parameters were set by the user. + */ + unsigned set{0}; + + /* + Set to 1 if the derived parameters are up to date. + */ + int current{0}; + +public: + /* + Parameter values. + */ + std::array param; + + void calculate(); + void seti(lfr_param_t pname, int value); + void setf(lfr_param_t pname, double value); + void geti(lfr_param_t pname, int* value); + void getf(lfr_param_t pname, double* value); +}; + diff --git a/lib/param_name.cpp b/lib/param_name.cpp new file mode 100644 index 0000000..b90fc1c --- /dev/null +++ b/lib/param_name.cpp @@ -0,0 +1,41 @@ +/* Copyright 2012 Dietrich Epp */ +#include "defs.h" +#include "fresample.h" +#include +#include +using namespace std::string_view_literals; + +constexpr std::array LFR_PARAM_NAME { + "quality"sv, + "inrate"sv, + "outrate"sv, + "ftransition"sv, + "maxfreq"sv, + "loose"sv, + "minfpass"sv, + "fpass"sv, + "fstop"sv, + "atten"sv +}; + +std::string_view lfr_param_name(lfr_param_t pname) +{ + int n = pname; + if (n < 0 || n >= LFR_PARAM_COUNT) + return {}; + return LFR_PARAM_NAME[n]; +} + +int lfr_param_lookup(const std::string_view& pname) +{ + int i = 0; + for(const auto& name : LFR_PARAM_NAME) + { + if(name == pname) + { + return i; + } + ++i; + } + return -1; +} diff --git a/lib/resample.h b/lib/resample.h new file mode 100644 index 0000000..0ae8856 --- /dev/null +++ b/lib/resample.h @@ -0,0 +1,84 @@ +/* Copyright 2012 Dietrich Epp */ +#pragma once + +#include "defs.h" +#include "filter.h" + +/* + Resampling function naming convention: + + lfr_resample__ + + atype: audio data type, s16 or f32 + nchan: number of channels, n1 or n2 + ftype: filter coefficient type, s16 or f32 + + feature: CPU feature set required +*/ + +/* ======================================== */ + +void lfr_resample_s16n1s16_scalar( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n1s16_sse2( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n1s16_altivec( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +/* ======================================== */ + +void lfr_resample_s16n2s16_scalar( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n2s16_sse2( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n2s16_altivec( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +/* ======================================== */ + +void lfr_resample_s16n1f32_scalar( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n1f32_sse2( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n1f32_altivec( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +/* ======================================== */ + +void lfr_resample_s16n2f32_scalar( lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n2f32_sse2( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); + +void lfr_resample_s16n2f32_altivec( + lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, + void *out, int outlen, const void *in, int inlen, + const lfr_filter *filter); diff --git a/lib/resample_func.h b/lib/resample_func.h new file mode 100644 index 0000000..9e894d4 --- /dev/null +++ b/lib/resample_func.h @@ -0,0 +1,92 @@ +#pragma once + +#include +#include +#include +#include +#include +#include "resample.h" +#include "filter.h" + +namespace { + template + constexpr result_type dither_truncate(type value, unsigned dither) + { + using limits = std::numeric_limits; + using dither_limits = std::numeric_limits; + if constexpr(std::is_floating_point_v) + { + value += dither * type{1} / type{1ul << dither_limits::digits}; + value = std::floor(value); + } + else + { + static_assert(std::is_integral_v, "Unsupported acc_type"); + + value += dither >> (dither_limits::digits - limits::digits); // 17 for short + value >>= limits::digits; // 15 for short + } + + return std::clamp(value, limits::min(), limits::max()); + } +} + +template +void lfr_resample(lfr_fixed_t& pos, const lfr_fixed_t inv_ratio, unsigned& dither, void* out, std::size_t outlen, const void* in, std::size_t inlen, const lfr_filter& filter) +{ + static_assert(channels > 0, "0 channels do not make any sense"); + + auto inp = reinterpret_cast(in); + auto outp = reinterpret_cast(out); + auto fd = reinterpret_cast(filter.data.data()); + + const auto flen = filter.nsamp; + const auto log2nfilt = filter.log2nfilt; + auto x = pos; + std::array ds{dither}; + for(std::size_t i = 1; i < channels; ++i) + { + ds[i] = LCG_A * ds[i - 1] + LCG_C; + } + + for(std::size_t i = 0; i < outlen; ++i) + { + /* fn: filter number + ff0: filter factor for filter fn + ff1: filter factor for filter fn+1 */ + + const auto fn = (static_cast(x >> 1) >> (31 - log2nfilt)) & ((1u << log2nfilt) - 1); + int ff1 = (static_cast(x >> (32 - log2nfilt - INTERP_BIT))) & ((1u << INTERP_BIT) - 1); + int ff0 = (1u << INTERP_BIT) - ff1; + + /* off: offset in input corresponding to first sample in filter */ + auto off = static_cast(x >> 32); + /* fidx0, fidx1: start, end indexes in FIR data */ + int fidx0 = std::max(-off, 0); + int fidx1 = std::min(inlen - off, flen); + + /* acc: FIR accumulator */ + std::array acc{0}; + + for(int j = fidx0; j < fidx1; ++j) + { + acc_type f = (fd[(fn + 0) * flen + j] * ff0 + fd[(fn + 1) * flen + j] * ff1) / (1 << INTERP_BIT); + for(std::size_t c = 0; c < channels; ++c) + { + acc[c] += inp[(j + off) * channels + c] * f; + } + } + + for(std::size_t c = 0; c < channels; ++c) + { + outp[i * channels + c] = dither_truncate(acc[c], ds[c]); + + ds[c] = LCG_A2 * ds[c] + LCG_C2; + } + + x += inv_ratio; + } + + pos = x; + dither = ds[0]; +} diff --git a/lib/resample_s16func.cpp b/lib/resample_s16func.cpp new file mode 100644 index 0000000..07c086e --- /dev/null +++ b/lib/resample_s16func.cpp @@ -0,0 +1,51 @@ +/* Copyright 2012 Dietrich Epp */ +#include "defs.h" +#include "filter.h" +#include "resample.h" +#include "resample_func.h" + +namespace { + template + void helper(lfr_fixed_t* pos, lfr_fixed_t inv_ratio, unsigned* dither, void* out, int outlen, const void* in, int inlen, const lfr_filter* filter) + { + return func(*pos, inv_ratio, *dither, out, outlen, in, inlen, *filter); + } +} + +lfr_resample_func_t +lfr_resample_s16func(int nchan, const struct lfr_filter *filter) +{ + const auto ftype = filter->type; + switch (nchan) { + case 1: + switch (ftype) { + case LFR_FTYPE_S16: + return helper>; + + case LFR_FTYPE_F32: + return helper>; + + default: + break; + } + break; + + case 2: + switch (ftype) { + case LFR_FTYPE_S16: + return helper>; + + case LFR_FTYPE_F32: + return helper>; + + default: + break; + } + break; + + default: + break; + } + + return NULL; +} diff --git a/lib/swap16.cpp b/lib/swap16.cpp new file mode 100644 index 0000000..f20efc0 --- /dev/null +++ b/lib/swap16.cpp @@ -0,0 +1,65 @@ +/* Copyright 2012 Dietrich Epp */ +#include "convert.h" +#include + +void lfr_swap16(void *dest, const void *src, size_t count) +{ + size_t i, n; + unsigned x, y; + const unsigned char *s8; + unsigned char *d8; + const unsigned short *s16; + unsigned short *d16; + const unsigned *s32; + unsigned *d32; + + if (!count) + return; + + if ((((uintptr_t) dest | (uintptr_t) src) & 1u) != 0) { + /* completely unaligned */ + s8 = reinterpret_cast(src); + d8 = reinterpret_cast(dest); + for (i = 0; i < count; ++i) { + x = s8[i*2+0]; + y = s8[i*2+1]; + d8[i*2+0] = static_cast(y); + d8[i*2+1] = static_cast(x); + } + } else if ((((uintptr_t) dest - (uintptr_t) src) & 3u) != 0) { + /* 16-bit aligned */ + s16 = reinterpret_cast(src); + d16 = reinterpret_cast(dest);; + for (i = 0; i < count; ++i) { + x = s16[i]; + d16[i] = static_cast((x >> 8) | (x << 8)); + } + } else { + /* 16-bit aligned, with 32-bit aligned delta */ + s16 = reinterpret_cast(src); + d16 = reinterpret_cast(dest); + n = count; + if ((uintptr_t) dest & 3u) { + x = s16[0]; + d16[0] = static_cast((x >> 8) | (x << 8)); + n -= 1; + s16 += 1; + d16 += 1; + } + s32 = reinterpret_cast(s16); + d32 = reinterpret_cast(d16); + for (i = 0; i < n/2; ++i) { + x = s32[i]; + d32[i] = ((x >> 8) & 0xff00ffu) | ((x & 0xff00ffu) << 8); + } + d32 += n/2; + s32 += n/2; + n -= (n/2)*2; + if (n) { + s16 = reinterpret_cast(s32); + d16 = reinterpret_cast(d32); + x = s16[0]; + d16[0] = static_cast((x >> 8) | (x << 8)); + } + } +} diff --git a/lib/types.h b/lib/types.h new file mode 100644 index 0000000..c023a9a --- /dev/null +++ b/lib/types.h @@ -0,0 +1,44 @@ +#pragma once +#include + +/* + A 32.32 fixed point number. Th*is is used for expressing fractional + positions in a buffer. + + When used for converting from sample rate f_s to f_d, the timing + error at time t is bounded by t * 2^-33 * r_d / r_s. For a + pessimistic conversion ratio, 8 kHz -> 192 kHz, this means that it + will take at least five days to accumulate one millisecond of error. + */ +using lfr_fixed_t = std::int64_t; + + +/* ======================================== + Sample formats + ======================================== */ + +/* + Audio sample formats. + */ +enum lfr_fmt_t { + LFR_FMT_U8, + LFR_FMT_S16BE, + LFR_FMT_S16LE, + LFR_FMT_S24BE, + LFR_FMT_S24LE, + LFR_FMT_F32BE, + LFR_FMT_F32LE, + LFR_FMT_COUNT +}; + + +/* + Specialized resampling function, designed to work with a specific + sample format and filter type. Do not attempt to reuse a function + for a different filter. + + Note that lengths are still measured in frames, but since the + function is specialized for a given format and number of channels, + there is no need to specify the format or number of channels. +*/ +using lfr_resample_func_t = void (*)( lfr_fixed_t *pos, lfr_fixed_t inv_ratio, unsigned *dither, void *out, int outlen, const void *in, int inlen, const struct lfr_filter *filter); \ No newline at end of file -- cgit v1.2.3-54-g00ecf