lib/filter.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334

/* Copyright 2012 Dietrich Epp <depp@zdome.net> */
#include "filter.h"
#include "param.h"
#include "../include/fresample.h"
#include <cmath>
#include <memory>
#include <exception>
#include <numeric>
#include <algorithm>
#include <iostream>

namespace {
/*
  Simple sinc function implementation.  Note that this is the
  unnormalized sinc function, with zeroes at nonzero multiples of pi.
*/
constexpr double sinc(double x)
{
	return std::abs(x) < 1e-8 ? 1.0 : std::sin(x) / x;
}

/*
  Modified Bessel function of the first kind of order 0, i0.  Only
  considered valid over the domain [0,18].

  This is the naive algorithm.

  In "Computation of Special Functions" by Shanjie Zhang and Jianming
  Jin, this technique is only used for 0<X<18.  However, we only need
  it for the Kaiser window, and a beta of 18 is fairly ridiculous --
  it gives a side lobe height of -180 dB, which is not only beyond the
  actual SNR of high end recording systems, it is beyond the SNR of
  theoretically perfect 24-bit systems.
*/
constexpr double bessel_i0(double x)
{
    double a = 1.0, y = 1.0, x2 = x*x;
    for (int i = 1; i <= 50; ++i) {
        a *= 0.25 * x2 / (i * i);
        y += a;
        if (a < y * 1e-15)
            break;
    }
    return y;
}

std::vector<double> filter_calculate(std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta)
{
	std::vector<double> data(nsamp * nfilt);

	const auto yscale = 2.0 * cutoff / bessel_i0(beta);
	const auto xscale = (8.0 * std::atan(1.0)) * cutoff;
	for (std::size_t i = 0; i < nfilt; ++i) {
		const auto x0 = (nsamp - 1) / 2 + offset * i;
		for (std::size_t j = 0; j < nsamp; ++j) {
			const auto x = j - x0;
			const auto t = x * (2.0 / (nsamp - 2));
			double y;
			if (t <= -1.0 || t >= 1.0) {
				y = 0.0;
			} else {
				y = yscale * bessel_i0(beta * sqrt(1.0 - t * t)) * sinc(xscale * x);
			}
			data[i * nsamp + j] = y;
		}
	}

	return data;
}

template<typename type, type scale>
void filter_calculate_integral(type* data, std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta)
{
	const auto& filter = filter_calculate(nsamp, nfilt, offset, cutoff, beta);

	for (std::size_t i = 0; i < nfilt; ++i) {
		const auto sum = accumulate(begin(filter) + i * nsamp, begin(filter) + (i + 1) * nsamp, 0.0);
		const auto fac = static_cast<double>(scale) / sum;
		auto err = 0.0;
		for (std::size_t j = 0; j < nsamp; ++j) {
			const auto y = filter[i * nsamp + j] * fac + err;
			const auto z = std::round(y);
			err = z - y;
			data[i * nsamp + j] = static_cast<type>(z);
		}
	}
}

void lfr_filter_calculate_s16(std::int16_t* data, std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta)
{
	if(nsamp <= 8)
	{
		filter_calculate_integral<std::int16_t, 31500>(data, nsamp, nfilt, offset, cutoff, beta);
	}
	else
	{
		filter_calculate_integral<std::int16_t, 32767>(data, nsamp, nfilt, offset, cutoff, beta);
	}
}

void lfr_filter_calculate_f32(float *data, std::size_t nsamp, std::size_t nfilt, double offset, double cutoff, double beta)
{
	const auto& filter = filter_calculate(nsamp, nfilt, offset, cutoff, beta);

	transform(begin(filter), end(filter), data, [](const auto val){ return static_cast<float>(val); });
}
}

lfr_filter::lfr_filter(lfr_param& param)
{
    double dw, dw2, lenf, atten2, atten3, maxerror, beta;
    double ierror, rerror, error;
    double t, a, ulp;
    int align=8, max_oversample;

    param.calculate();

    f_pass = param.param[LFR_PARAM_FPASS];
    f_stop = param.param[LFR_PARAM_FSTOP];
    atten  = param.param[LFR_PARAM_ATTEN];

    /* atten2 is the attenuation of the ideal (un-rounded) filter,
       maxerror is the ratio of a 0 dBFS signal to the permitted
       roundoff and interpolation error.  This divides the
       "attenuation" into error budgets with the assumption that the
       error from each source is uncorrelated.  Note that atten2 is
       measured in dB, whereas maxerror is a ratio to a full scale
       signal.  */
    atten2 = atten + 3.0;
    maxerror = exp((atten + 3.0) * (-log(10.0) / 20.0));

    /* Calculate the filter order from the attenuation.  Round up to
       the nearest multiple of the SIMD register alignment.  */
    dw = (8.0 * std::atan(1.0)) * (f_stop - f_pass);
    lenf = (atten2 - 8.0) / (4.57 * dw) + 1;
    nsamp = static_cast<int>(std::ceil(lenf / align) * align);
    if (nsamp < align)
        nsamp = align;

    /* ========================================
       Determine filter coefficient size.
       ======================================== */

    /* We can calculate expected roundoff error from filter length.
       We assume that the roundoff error at each coefficient is an
       independent uniform variable with a range of 1 ULP, and that
       there is an equal roundoff step during interpolation.  This
       gives roundoff error with a variance of N/6 ULP^2.  Roundoff
       error is a ratio relative to 1 ULP.  */
    rerror = std::sqrt(nsamp * (1.0 / 6.0));

    /* The interpolation error can be adjusted by choosing the amount
       of oversampling.  The curvature of the sinc function is bounded
       by (2*pi*f)^2, so the interpolation error is bounded by
       (pi*f/M)^2.

       We set a maximum oversampling of 2^8 for 16-bit, and 2^12 for
       floating point.  */
    t = f_pass * (4.0 * std::atan(1.0));
    a = 1.0 / 256;
    ierror = (t * a) * (t * a);
    ulp = 1.0 / 32768.0;
    /* We assume, again, that interpolation and roundoff error are
       uncorrelated and normal.  If error at 16-bit exceeds our
       budget, then use floating point.  */
    error = (ierror * ierror + rerror * rerror) * (ulp * ulp);
    if (error <= maxerror * maxerror) {
        type = LFR_FTYPE_S16;
        ulp = 1.0 / 32768.0;
        max_oversample = 8;
    } else {
        type = LFR_FTYPE_F32;
        ulp = 1.0 / 16777216.0;
        max_oversample = 12;
    }

    /* Calculate the interpolation error budget by subtracting
       roundoff error from the total error budget, since roundoff
       error is fixed.  */
    ierror = maxerror * maxerror - (rerror * rerror) * (ulp * ulp);
    ierror = (ierror > 0) ? sqrt(ierror) : 0;
    if (ierror < ulp)
        ierror = ulp;

    /* Calculate oversampling from the error budget.  */
    a = (f_pass * (4.0 * std::atan(1.0))) / std::sqrt(ierror);
    log2nfilt = static_cast<int>(std::ceil(std::log(a) * (1.0 / std::log(2.0))));
    if (log2nfilt < 0)
        log2nfilt = 0;
    else if (log2nfilt > max_oversample)
        log2nfilt = max_oversample;

    /* ========================================
       Calculate window parameters
       ======================================== */

    /* Since we rounded up the filter order, we can increase the stop
       band attenuation or bandwidth without making the transition
       bandwidth exceed the design parameters.  This gives a "free"
       boost in quality.  We choose to increase both.

       If we didn't increase these parameters, we would still incur
       the additional computational cost but it would be spent
       increasing the width of the stop band, which is not useful.  */

    /* atten3 is the free stopband attenuation */
    atten3 = (nsamp - 1) * 4.57 * dw + 8;
    t = (-20.0 / std::log(10.0)) * std::log(rerror * ulp);
    if (t < atten3)
        atten3 = t;
    if (atten2 > atten3)
        atten3 = atten2;
    else
        atten3 = 0.5 * (atten2 + atten3);

    /* f_pass2 is the free pass band */
    dw2 = (atten3 - 8.0) / (4.57 * (nsamp - 1));
    auto f_pass2 = f_stop - dw2 * (1.0 / (8.0 * std::atan(1.0)));

    if (atten3 > 50)
        beta = 0.1102 * (atten3 - 8.7);
    else if (atten3 > 21)
      beta = 0.5842 * std::pow(atten3 - 21, 0.4) + 0.07866 * (atten3 - 21);
    else
        beta = 0;

    setup_window(f_pass2, beta);
}

void lfr_filter::setup_window(double cutoff, double beta)
{
    size_t esz = 1, align = 16, nfilt;
    if (nsamp < 1 || log2nfilt < 0)
        goto error;

    switch (type) {
    case LFR_FTYPE_S16:
        esz = sizeof(short);
        break;

    case LFR_FTYPE_F32:
        esz = sizeof(float);
        break;
    }
    nfilt = (1u << log2nfilt) + 1;
    if ((size_t) nsamp > (size_t) -1 / esz)
        goto error;
    if (((size_t) -1 - (align - 1) - sizeof(*this)) / nfilt < nsamp * esz)
        goto error;
    data.resize(nsamp * nfilt * esz + align - 1);

    delay = static_cast<lfr_fixed_t>((nsamp - 1) / 2) << 32;

    switch (type) {
    case LFR_FTYPE_S16:
        lfr_filter_calculate_s16(
            reinterpret_cast<short int*>(data.data()), nsamp, nfilt,
            1.0 / static_cast<double>(1 << log2nfilt), cutoff, beta);
        break;

    case LFR_FTYPE_F32:
        lfr_filter_calculate_f32(
            reinterpret_cast<float*>(data.data()), nsamp, nfilt,
            1.0 / static_cast<double>(1 << log2nfilt), cutoff, beta);
        break;
    }
    return;

error:
    throw std::runtime_error{"Error in filter setup: lfr_filter::setup_window"};
}

int lfr_filter::geti(LFR_INFO_TYPE iname, bool convert) const
{
	switch(iname)
	{
		case LFR_INFO_DELAY:
			return delay * (1.0 / 4294967296.0);
		case LFR_INFO_FPASS:
			return f_pass;
		case LFR_INFO_FSTOP:
			return f_stop;
		case LFR_INFO_ATTEN:
			return atten;
		default:
			if(convert)
			{
				return static_cast<int>(getf(iname, false));
			}
	}
	return {};
}

double lfr_filter::getf(LFR_INFO_TYPE iname, bool convert) const
{
	switch(iname)
	{
		case LFR_INFO_DELAY:
			return static_cast<int>(delay >> 32);
		case LFR_INFO_SIZE:
			return nsamp;
		case LFR_INFO_MEMSIZE:
		{
			int sz = nsamp * ((1 << log2nfilt) + 1);
			switch (type)
			{
				case LFR_FTYPE_S16: return sz * 2;
				case LFR_FTYPE_F32: return sz * 4;
				default: return {};
			}
		}
				default:
					if(convert)
					{
						return geti(iname, false);
					}
	}
	return {};
}

void lfr_filter::resample(
	lfr_fixed_t *pos, lfr_fixed_t inv_ratio,
	unsigned *dither, int nchan,
	void *out, lfr_fmt_t outfmt, int outlen,
	const void *in, lfr_fmt_t infmt, int inlen)
{
	lfr_resample_func_t func;
	if (outfmt == LFR_FMT_S16_NATIVE && infmt == LFR_FMT_S16_NATIVE) {
		func = lfr_resample_s16func(nchan, this);
		if (!func)
			return;
		func(pos, inv_ratio, dither, out, outlen, in, inlen, this);
	}
}