[Sound-open-firmware] [PATCH V2] SRC: Use a multi-channel FIR core to optimize speed plus other cleanup
This patch changes the sample rate conversion processing to use a single multi-channel filter instead of per channel called mono filter instances. The filter output is now rounded with 1/2 LSB add. The polyphase filter input input block sizes are multiplied in SRC initialization to reach near to or exacly the period length to reduce polyphase filter call overhead.
The polyphase filter is now called via function pointer for more flexibility with audio data formats. Currently S32_LE and S24_4LE are supported.
Code cleanup includes removal of redundant variables and some debug print code.
Signed-off-by: Seppo Ingalsuo seppo.ingalsuo@linux.intel.com --- src/audio/src.c | 200 +++++++++++------------ src/audio/src_core.c | 448 +++++++++++++++++++++++++++------------------------ src/audio/src_core.h | 42 +---- 3 files changed, 329 insertions(+), 361 deletions(-)
diff --git a/src/audio/src.c b/src/audio/src.c index 4c4bc6e..9fe81d3 100644 --- a/src/audio/src.c +++ b/src/audio/src.c @@ -55,7 +55,7 @@
/* src component private data */ struct comp_data { - struct polyphase_src src[PLATFORM_MAX_CHANNELS]; + struct polyphase_src src; struct src_param param; int32_t *delay_lines; uint32_t sink_rate; @@ -63,16 +63,16 @@ struct comp_data { int32_t *sbuf_w_ptr; int32_t *sbuf_r_ptr; int sbuf_avail; - int sign_extend_s24; /* Set if need to copy sign bit to b24..b31 */ void (* src_func)(struct comp_dev *dev, struct comp_buffer *source, struct comp_buffer *sink, size_t *consumed, size_t *produced); + void (* polyphase_func)(struct src_stage_prm *s); };
/* Fallback function */ -static void fallback_s32(struct comp_dev *dev, struct comp_buffer *source, +static void src_fallback(struct comp_dev *dev, struct comp_buffer *source, struct comp_buffer *sink, size_t *bytes_read, size_t *bytes_written) { *bytes_read = 0; @@ -84,101 +84,110 @@ static void src_2s_s32_default(struct comp_dev *dev, struct comp_buffer *source, struct comp_buffer *sink, size_t *bytes_read, size_t *bytes_written) { - struct polyphase_src *s; struct src_stage_prm s1; struct src_stage_prm s2; - int j; + int s1_blk_in; + int s1_blk_out; + int s2_blk_in; + int s2_blk_out; struct comp_data *cd = comp_get_drvdata(dev); int32_t *dest = (int32_t *) sink->w_ptr; int32_t *src = (int32_t *) source->r_ptr; - int32_t *sbuf_addr = cd->delay_lines; int32_t *sbuf_end_addr = &cd->delay_lines[cd->param.sbuf_length]; int32_t sbuf_size = cd->param.sbuf_length * sizeof(int32_t); - const int nch = dev->params.channels; - const int s1_blk_in = cd->src[0].stage1->blk_in * nch; - const int s1_blk_out = cd->src[0].stage1->blk_out * nch; - const int s2_blk_in = cd->src[0].stage2->blk_in * nch; - const int s2_blk_out = cd->src[0].stage2->blk_out * nch; + int nch = dev->params.channels; int sbuf_free = cd->param.sbuf_length - cd->sbuf_avail; - int source_check = (source->avail >> 2) - s1_blk_in; /* for int32_t */ - int sink_check = (sink->free >> 2) - s2_blk_out; /* For int32_t */ int n_read = 0; int n_written = 0; int n1 = 0; int n2 = 0; + int avail_b = source->avail; + int free_b = sink->free; + int sz = sizeof(int32_t);
- s1.times = 1; s1.x_end_addr = source->end_addr; s1.x_size = source->size; - s1.x_inc = nch; s1.y_end_addr = sbuf_end_addr; s1.y_size = sbuf_size; - s1.y_inc = nch; + s1.state = &cd->src.state1; + s1.stage = cd->src.stage1; + s1.x_rptr = src; + s1.y_wptr = cd->sbuf_w_ptr; + s1.nch = nch;
- s2.times = 1; s2.x_end_addr = sbuf_end_addr; s2.x_size = sbuf_size; - s2.x_inc = nch; s2.y_end_addr = sink->end_addr; s2.y_size = sink->size; - s2.y_inc = nch; + s2.state = &cd->src.state2; + s2.stage = cd->src.stage2; + s2.x_rptr = cd->sbuf_r_ptr; + s2.y_wptr = dest; + s2.nch = nch; +
- /* 1st stage runs once a long multiplied length block. - * The stage buffer much be large enough to fit one s1 output block - * plus one s2 input block plus jitter in s2 consumption. + /* Test if 1st stage can be run with default block length to reach + * the period length or just under it. */ - while ((n1 < cd->param.stage1_times_max) - && (n_read <= source_check) - && (sbuf_free >= s1_blk_out)) { - for (j = 0; j < nch; j++) { - s = &cd->src[j]; /* Point to src[] for this channel */ - s1.x_rptr = src++; - s1.y_wptr = cd->sbuf_w_ptr++; - src_circ_inc_wrap(&src, source->end_addr, source->size); - src_circ_inc_wrap(&cd->sbuf_w_ptr, sbuf_end_addr, sbuf_size); - s1.state = &s->state1; - s1.stage = s->stage1; - if (cd->sign_extend_s24) - src_polyphase_stage_cir_s24(&s1); - else - src_polyphase_stage_cir(&s1); - - } + s1.times = cd->param.stage1_times; + s1_blk_in = s1.times * cd->src.stage1->blk_in * nch; + s1_blk_out = s1.times * cd->src.stage1->blk_out * nch; + if ((avail_b >= s1_blk_in * sz) && (sbuf_free >= s1_blk_out)) { + cd->polyphase_func(&s1); + + cd->sbuf_w_ptr = s1.y_wptr; + cd->sbuf_avail += s1_blk_out; n_read += s1_blk_in; + avail_b -= s1_blk_in * sz; + sbuf_free -= s1_blk_out; + n1 = s1.times; + } + + /* Run one block at time the remaining data for 1st stage. */ + s1.times = 1; + s1_blk_in = cd->src.stage1->blk_in * nch; + s1_blk_out = cd->src.stage1->blk_out * nch; + while ((n1 < cd->param.stage1_times_max) && (avail_b >= s1_blk_in * sz) + && (sbuf_free >= s1_blk_out)) { + cd->polyphase_func(&s1); + + cd->sbuf_w_ptr = s1.y_wptr; cd->sbuf_avail += s1_blk_out; + n_read += s1_blk_in; + avail_b -= s1_blk_in * sz; sbuf_free -= s1_blk_out; - src = s1.x_rptr - nch + 1; - cd->sbuf_w_ptr = s1.y_wptr - nch + 1; - src_circ_dec_wrap(&src, source->addr, source->size); - src_circ_dec_wrap(&cd->sbuf_w_ptr, sbuf_addr, sbuf_size); - n1++; + n1 += s1.times; + } + + /* Test if 2nd stage can be run with default block length. */ + s2.times = cd->param.stage2_times; + s2_blk_in = s2.times * cd->src.stage2->blk_in * nch; + s2_blk_out = s2.times * cd->src.stage2->blk_out * nch; + if ((cd->sbuf_avail >= s2_blk_in) && (free_b >= s2_blk_out * sz)) { + cd->polyphase_func(&s2); + + cd->sbuf_r_ptr = s2.x_rptr; + cd->sbuf_avail -= s2_blk_in; + free_b -= s2_blk_out * sz; + n_written += s2_blk_out; + n2 = s2.times; }
- /* 2nd stage runs as many min size blocks as buffers allow */ + + /* Run one block at time the remaining 2nd stage output */ + s2.times = 1; + s2_blk_in = cd->src.stage2->blk_in * nch; + s2_blk_out = cd->src.stage2->blk_out * nch; while ((n2 < cd->param.stage2_times_max) && (cd->sbuf_avail >= s2_blk_in) - && (n_written <= sink_check)) { - for (j = 0; j < nch; j++) { - s2.x_rptr = cd->sbuf_r_ptr++; - s2.y_wptr = dest++; - src_circ_inc_wrap(&cd->sbuf_r_ptr, sbuf_end_addr, sbuf_size); - src_circ_inc_wrap(&dest, sink->end_addr, sink->size); - s = &cd->src[j]; /* Point to src[] for this channel */ - s2.state = &s->state2; - s2.stage = s->stage2; - if (cd->sign_extend_s24) - src_polyphase_stage_cir_s24(&s2); - else - src_polyphase_stage_cir(&s2); - - } - cd->sbuf_r_ptr = s2.x_rptr - nch + 1; - dest = s2.y_wptr - nch + 1; - src_circ_dec_wrap(&cd->sbuf_r_ptr, sbuf_addr, sbuf_size); - src_circ_dec_wrap(&dest, sink->addr, sink->size); - n_written += s2_blk_out; + && (free_b >= s2_blk_out * sz)) { + cd->polyphase_func(&s2); + + cd->sbuf_r_ptr = s2.x_rptr; cd->sbuf_avail -= s2_blk_in; - n2++; + free_b -= s2_blk_out * sz; + n_written += s2_blk_out; + n2 += s2.times; } *bytes_read = sizeof(int32_t) * n_read; *bytes_written = sizeof(int32_t) * n_written; @@ -189,42 +198,27 @@ static void src_1s_s32_default(struct comp_dev *dev, struct comp_buffer *source, struct comp_buffer *sink, size_t *bytes_read, size_t *bytes_written) { - struct polyphase_src *s; struct src_stage_prm s1; - int j; struct comp_data *cd = comp_get_drvdata(dev); - int32_t *dest = (int32_t *) sink->w_ptr; - int32_t *src = (int32_t *) source->r_ptr; int nch = dev->params.channels; int n_read = 0; int n_written = 0;
s1.times = cd->param.stage1_times; + s1.x_rptr = (int32_t *) source->r_ptr; s1.x_end_addr = source->end_addr; s1.x_size = source->size; - s1.x_inc = nch; + s1.y_wptr = (int32_t *) sink->w_ptr; s1.y_end_addr = sink->end_addr; s1.y_size = sink->size; - s1.y_inc = nch; - s1.x_rptr = src + nch - 1; - s1.y_wptr = dest + nch - 1; - - for (j = 0; j < nch; j++) { - s = &cd->src[j]; /* Point to src for this channel */ - s1.x_rptr = src++; - s1.y_wptr = dest++; - src_circ_inc_wrap(&src, source->end_addr, source->size); - src_circ_inc_wrap(&dest, sink->end_addr, sink->size); - s1.state = &s->state1; - s1.stage = s->stage1; - if (cd->sign_extend_s24) - src_polyphase_stage_cir_s24(&s1); - else - src_polyphase_stage_cir(&s1); - - n_read += cd->param.blk_in; - n_written += cd->param.blk_out; - } + s1.state = &cd->src.state1; + s1.stage = cd->src.stage1; + s1.nch = dev->params.channels; + + cd->polyphase_func(&s1); + + n_read += nch * cd->param.blk_in; + n_written += nch * cd->param.blk_out; *bytes_read = n_read * sizeof(int32_t); *bytes_written = n_written * sizeof(int32_t); } @@ -271,7 +265,6 @@ static struct comp_dev *src_new(struct sof_ipc_comp *comp) struct sof_ipc_comp_src *src; struct sof_ipc_comp_src *ipc_src = (struct sof_ipc_comp_src *) comp; struct comp_data *cd; - int i;
trace_src("new");
@@ -299,8 +292,8 @@ static struct comp_dev *src_new(struct sof_ipc_comp *comp)
cd->delay_lines = NULL; cd->src_func = src_2s_s32_default; - for (i = 0; i < PLATFORM_MAX_CHANNELS; i++) - src_polyphase_reset(&cd->src[i]); + cd->polyphase_func = src_polyphase_stage_cir; + src_polyphase_reset(&cd->src);
dev->state = COMP_STATE_READY; return dev; @@ -334,10 +327,8 @@ static int src_params(struct comp_dev *dev) uint32_t sink_rate; int32_t *buffer_start; int n = 0; - int i; int err; int frames_is_for_source; - int nch; int q;
trace_src("par"); @@ -345,10 +336,10 @@ static int src_params(struct comp_dev *dev) /* SRC supports S24_4LE and S32_LE formats */ switch (config->frame_fmt) { case SOF_IPC_FRAME_S24_4LE: - cd->sign_extend_s24 = 1; + cd->polyphase_func = src_polyphase_stage_cir_s24; break; case SOF_IPC_FRAME_S32_LE: - cd->sign_extend_s24 = 0; + cd->polyphase_func = src_polyphase_stage_cir; break; default: trace_src_error("sr0"); @@ -407,11 +398,7 @@ static int src_params(struct comp_dev *dev) buffer_start = cd->delay_lines + cd->param.sbuf_length;
/* Initize SRC for actual sample rate */ - nch = MIN(params->channels, PLATFORM_MAX_CHANNELS); - for (i = 0; i < nch; i++) { - n = src_polyphase_init(&cd->src[i], &cd->param, buffer_start); - buffer_start += cd->param.single_src; - } + n = src_polyphase_init(&cd->src, &cd->param, buffer_start);
/* Reset stage buffer */ cd->sbuf_r_ptr = cd->delay_lines; @@ -434,7 +421,7 @@ static int src_params(struct comp_dev *dev) * muted if copy() is run. */ trace_src("SFa"); - cd->src_func = fallback_s32; + cd->src_func = src_fallback; return -EINVAL; break; } @@ -532,7 +519,6 @@ static int src_copy(struct comp_dev *dev)
/* Run SRC function if buffers avail and free allow */ if (((int) source->avail >= need_source) && ((int) sink->free >= need_sink)) { - /* Run src */ cd->src_func(dev, source, sink, &consumed, &produced);
/* Calc new free and available if data was processed. These @@ -561,14 +547,12 @@ static int src_preload(struct comp_dev *dev)
static int src_reset(struct comp_dev *dev) { - int i; struct comp_data *cd = comp_get_drvdata(dev);
trace_src("SRe");
cd->src_func = src_2s_s32_default; - for (i = 0; i < PLATFORM_MAX_CHANNELS; i++) - src_polyphase_reset(&cd->src[i]); + src_polyphase_reset(&cd->src);
comp_set_state(dev, COMP_CMD_RESET); return 0; diff --git a/src/audio/src_core.c b/src/audio/src_core.c index 4ee2efd..080256c 100644 --- a/src/audio/src_core.c +++ b/src/audio/src_core.c @@ -45,12 +45,20 @@ #include "src_core.h" #include "src_config.h"
+#define trace_src(__e) trace_event(TRACE_CLASS_SRC, __e) +#define tracev_src(__e) tracev_event(TRACE_CLASS_SRC, __e) +#define trace_src_error(__e) trace_error(TRACE_CLASS_SRC, __e) + /* TODO: These should be defined somewhere else. */ #define SOF_RATES_LENGTH 15 int sof_rates[SOF_RATES_LENGTH] = {8000, 11025, 12000, 16000, 18900, 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000, 176400, 192000};
+/* The FIR maximum lenghts are per channel so need to multiply them */ +#define MAX_FIR_DELAY_SIZE_XNCH (PLATFORM_MAX_CHANNELS * MAX_FIR_DELAY_SIZE) +#define MAX_OUT_DELAY_SIZE_XNCH (PLATFORM_MAX_CHANNELS * MAX_OUT_DELAY_SIZE) + /* Calculate ceil() for integer division */ int src_ceil_divide(int a, int b) { @@ -131,6 +139,13 @@ int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch, int num; int frames2;
+ if (nch > PLATFORM_MAX_CHANNELS) { + trace_src_error("che"); + tracev_value(nch); + return -EINVAL; + } + + a->nch = nch; a->idx_in = src_find_fs(src_in_fs, NUM_IN_FS, fs_in); a->idx_out = src_find_fs(src_out_fs, NUM_OUT_FS, fs_out);
@@ -146,8 +161,8 @@ int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch,
stage1 = src_table1[a->idx_out][a->idx_in]; stage2 = src_table2[a->idx_out][a->idx_in]; - a->fir_s1 = src_fir_delay_length(stage1); - a->out_s1 = src_out_delay_length(stage1); + a->fir_s1 = nch * src_fir_delay_length(stage1); + a->out_s1 = nch * src_out_delay_length(stage1);
/* Find out how many additional times the SRC can be executed while having block size less or equal to max_frames. @@ -191,8 +206,8 @@ int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch, a->stage2_times_max = 0; a->sbuf_length = 0; } else { - a->fir_s2 = src_fir_delay_length(stage2); - a->out_s2 = src_out_delay_length(stage2); + a->fir_s2 = nch * src_fir_delay_length(stage2); + a->out_s2 = nch * src_out_delay_length(stage2); /* 2x is an empirically tested length. Since the sink buffer * capability to receive samples varies a shorter stage 2 output * block will create a peak in internal buffer usage. @@ -200,8 +215,8 @@ int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch, a->sbuf_length = 2 * nch * stage1->blk_out * a->stage1_times_max; }
- a->single_src = a->fir_s1 + a->fir_s2 + a->out_s1 + a->out_s2; - a->total = a->sbuf_length + nch * a->single_src; + a->src_multich = a->fir_s1 + a->fir_s2 + a->out_s1 + a->out_s2; + a->total = a->sbuf_length + a->src_multich;
return 0; } @@ -212,8 +227,6 @@ static void src_state_reset(struct src_state *state) state->fir_delay_size = 0; state->out_delay_size = 0; state->fir_wi = 0; - state->fir_ri = 0; - state->out_wi = 0; state->out_ri = 0; }
@@ -229,17 +242,8 @@ static int init_stages( src->number_of_stages = n; src->stage1 = stage1; src->stage2 = stage2; - src->blk_in = p->blk_in; - src->blk_out = p->blk_out; - if (n == 1) { - src->stage1_times = p->stage1_times; - src->stage2_times = 0; - if (stage1->blk_out == 0) - return -EINVAL; - } else { - src->stage1_times = p->stage1_times; - src->stage2_times = p->stage2_times; - } + if ((n == 1) && (stage1->blk_out == 0)) + return -EINVAL;
/* Delay line sizes */ src->state1.fir_delay_size = p->fir_s1; @@ -262,10 +266,10 @@ static int init_stages( }
/* Check the sizes are less than MAX */ - if ((src->state1.fir_delay_size > MAX_FIR_DELAY_SIZE) - || (src->state1.out_delay_size > MAX_OUT_DELAY_SIZE) - || (src->state2.fir_delay_size > MAX_FIR_DELAY_SIZE) - || (src->state2.out_delay_size > MAX_OUT_DELAY_SIZE)) { + if ((src->state1.fir_delay_size > MAX_FIR_DELAY_SIZE_XNCH) + || (src->state1.out_delay_size > MAX_OUT_DELAY_SIZE_XNCH) + || (src->state2.fir_delay_size > MAX_FIR_DELAY_SIZE_XNCH) + || (src->state2.out_delay_size > MAX_OUT_DELAY_SIZE_XNCH)) { src->state1.fir_delay = NULL; src->state1.out_delay = NULL; src->state2.fir_delay = NULL; @@ -279,12 +283,7 @@ static int init_stages( void src_polyphase_reset(struct polyphase_src *src) {
- src->mute = 0; src->number_of_stages = 0; - src->blk_in = 0; - src->blk_out = 0; - src->stage1_times = 0; - src->stage2_times = 0; src->stage1 = NULL; src->stage2 = NULL; src_state_reset(&src->state1); @@ -294,14 +293,12 @@ void src_polyphase_reset(struct polyphase_src *src) int src_polyphase_init(struct polyphase_src *src, struct src_param *p, int32_t *delay_lines_start) { - int n_stages; - int ret; struct src_stage *stage1; struct src_stage *stage2; + int n_stages; + int ret;
if ((p->idx_in < 0) || (p->idx_out < 0)) { - src->blk_in = p->blk_in; - src->blk_out = p->blk_out; return -EINVAL; }
@@ -335,109 +332,157 @@ int src_polyphase_init(struct polyphase_src *src, struct src_param *p, #if SRC_SHORT == 1
/* Calculate a FIR filter part that does not need circular modification */ -static inline void fir_part(int64_t *y, int ntaps, const int16_t c[], int *ic, - int32_t d[], int *id) + +static inline void fir_part(int64_t y[], int *id, int *ic, + const int32_t data[], const int16_t coef[], int nch_x_taps, int nch) { + int64_t tap0; + int64_t tap1; int n; int64_t a = 0; + int64_t b = 0; + int c = *ic; + int d = *id; + int d_end = d - nch_x_taps;
/* Data is Q1.31, coef is Q1.15, product is Q2.46 */ - for (n = 0; n < (ntaps >> 1); n++) { - a += (int64_t) c[*ic] * d[*id] - + (int64_t) c[*ic + 1] * d[*id - 1]; - *ic += 2; - *id -= 2; + if (nch == 2) { + for (n = 0; n < (nch_x_taps >> 2); n++) { + tap0 = coef[c++]; + tap1 = coef[c++]; + b += data[d--] * tap0; + a += data[d--] * tap0; + b += data[d--] * tap1; + a += data[d--] * tap1; + } + if (d > d_end) { + tap0 = coef[c++]; + b += data[d--] * tap0; + a += data[d--] * tap0; + } + y[1] += b; + y[0] += a; + } else { + while (d > d_end) { + tap0 = coef[c++]; + for (n = nch - 1; n >= 0; n--) + y[n] += data[d--] * tap0; + } } - if (ntaps & 1) - a += (int64_t) c[(*ic)++] * d[(*id)--]; - - *y += a; + *ic = c; + *id = d; } + #else
-/* Calculate a FIR filter part that does not need circular modification */ -static inline void fir_part(int64_t *y, int ntaps, const int32_t c[], int *ic, - int32_t d[], int *id) +static inline void fir_part(int64_t y[], int *id, int *ic, + const int32_t data[], const int32_t coef[], int nch_x_taps, int nch) { + int64_t tap0; + int64_t tap1; int n; int64_t a = 0; - - /* Data is Q8.24, coef is Q1.23, product is Q9.47 */ - for (n = 0; n < (ntaps >> 1); n++) { - a += (int64_t) c[*ic] * d[*id] - + (int64_t) c[*ic + 1] * d[*id - 1]; - *ic += 2; - *id -= 2; + int64_t b = 0; + int c = *ic; + int d = *id; + int d_end = d - nch_x_taps; + + /* Data is Q1.31, coef is Q1.23, product is Q2.54 */ + if (nch == 2) { + for (n = 0; n < (nch_x_taps >> 2); n++) { + tap0 = coef[c++]; + tap1 = coef[c++]; + b += data[d--] * tap0; + a += data[d--] * tap0; + b += data[d--] * tap1; + a += data[d--] * tap1; + } + if (d > d_end) { + tap0 = coef[c++]; + b += data[d--] * tap0; + a += data[d--] * tap0; + } + y[1] += b; + y[0] += a; + } else { + while (d > d_end) { + tap0 = coef[c++]; + for (n = nch - 1; n >= 0; n--) + y[n] += data[d--] * tap0; + } } - if (ntaps & 1) - a += (int64_t) c[(*ic)++] * d[(*id)--]; - - *y += a; + *ic = c; + *id = d; } + #endif
#if SRC_SHORT == 1
-static inline int32_t fir_filter( - struct src_state *fir, const int16_t coefs[], - int *coefi, int filter_length, int shift) +static void fir_filter(int ri0, int *ci, int wi0, int32_t in_delay[], + int32_t out_delay[], const int16_t coefs[], int dsm1, int taps, + int shift, int nch) { - int64_t y = 0; - int n1; int n2; - - n1 = fir->fir_ri + 1; - if (n1 > filter_length) { - /* No need to un-wrap fir read index, make sure fir_fi - * is ge 0 after FIR computation. - */ - fir_part(&y, filter_length, coefs, coefi, fir->fir_delay, - &fir->fir_ri); + int i; + int64_t y[PLATFORM_MAX_CHANNELS]; + int ri = ri0; + int wi = wi0; + int n1 = ri0 + 1; /* Convert to number of sequential frames */ + int qshift = 15 + shift; /* Q2.46 -> Q2.31 */ + int32_t rnd = 1 << (qshift - 1); /* Half LSB */ + int nch_x_taps = nch * taps; + + /* Initialize to half LSB for rounding */ + for (i = 0; i < nch; i++) + y[i] = rnd; + + if (n1 >= nch_x_taps) { + fir_part(y, &ri, ci, in_delay, coefs, nch_x_taps, nch); } else { - n2 = filter_length - n1; - /* Part 1, loop n1 times, fir_ri becomes -1 */ - fir_part(&y, n1, coefs, coefi, fir->fir_delay, &fir->fir_ri); - - /* Part 2, unwrap fir_ri, continue rest of filter */ - fir->fir_ri = fir->fir_delay_size - 1; - fir_part(&y, n2, coefs, coefi, fir->fir_delay, &fir->fir_ri); + n2 = nch_x_taps - n1; + fir_part(y, &ri, ci, in_delay, coefs, n1, nch); + ri = dsm1; + fir_part(y, &ri, ci, in_delay, coefs, n2, nch); } - /* Q2.46 -> Q2.31, saturate to Q1.31 */ - y = y >> (15 + shift);
- return(int32_t) sat_int32(y); + for (i = 0; i < nch; i++) + out_delay[wi++] = sat_int32(y[i] >> qshift); } #else
-static inline int32_t fir_filter( - struct src_state *fir, const int32_t coefs[], - int *coefi, int filter_length, int shift) +static void fir_filter(int ri0, int *ci, int wi0, int32_t in_delay[], + int32_t out_delay[], const int32_t coefs[], int dsm1, int taps, + int shift, int nch) { - int64_t y = 0; - int n1; int n2; - - n1 = fir->fir_ri + 1; - if (n1 > filter_length) { - /* No need to un-wrap fir read index, make sure fir_fi - * is ge 0 after FIR computation. - */ - fir_part(&y, filter_length, coefs, coefi, fir->fir_delay, - &fir->fir_ri); + int i; + int64_t y[PLATFORM_MAX_CHANNELS]; + int ri = ri0; + int wi = wi0; + int n1 = ri0 + 1; /* Convert to number of sequential frames */ + int qshift = 23 + shift; /* Q2.54 -> Q2.31 */ + int32_t rnd = 1 << (qshift - 1); /* Half LSB */ + int nch_x_taps = nch * taps; + + /* Initialize to half LSB for rounding */ + for (i = 0; i < nch; i++) + y[i] = rnd; + + if (n1 >= nch_x_taps) { + fir_part(y, &ri, ci, in_delay, coefs, nch_x_taps, nch); } else { - n2 = filter_length - n1; - /* Part 1, loop n1 times, fir_ri becomes -1 */ - fir_part(&y, n1, coefs, coefi, fir->fir_delay, &fir->fir_ri); - - /* Part 2, unwrap fir_ri, continue rest of filter */ - fir->fir_ri = fir->fir_delay_size - 1; - fir_part(&y, n2, coefs, coefi, fir->fir_delay, &fir->fir_ri); + n2 = nch_x_taps - n1; + fir_part(y, &ri, ci, in_delay, coefs, n1, nch); + ri = dsm1; + fir_part(y, &ri, ci, in_delay, coefs, n2, nch); } - /* Q9.47 -> Q9.24, saturate to Q8.24 */ - y = y >> (23 + shift);
- return(int32_t) sat_int32(y); + for (i = 0; i < nch; i++) + out_delay[wi++] = sat_int32(y[i] >> qshift); + } + #endif
void src_polyphase_stage_cir(struct src_stage_prm * s) @@ -447,29 +492,42 @@ void src_polyphase_stage_cir(struct src_stage_prm * s) int n; int m; int f; - int c; - int r; + int ci; + int ri; int n_wrap_fir; int n_wrap_buf; int n_wrap_min; int n_min; - int32_t z; + int wi; + const void *coef = cfg->coefs; + int32_t *in_delay = fir->fir_delay; + int32_t *out_delay = fir->out_delay; + int dsm1 = fir->fir_delay_size - 1; + int shift = cfg->shift; + int nch = s->nch; + int rewind = -nch * (cfg->blk_in + + (cfg->num_of_subfilters - 1) * cfg->idm) + nch - 1; + int nch_x_idm = cfg->idm * nch; + int nch_x_odm = cfg->odm * nch; + size_t sz = sizeof(int32_t); + int blk_in_bytes = nch * cfg->blk_in * sz; + int blk_out_bytes = nch * cfg->num_of_subfilters * sz; +
for (n = 0; n < s->times; n++) { /* Input data */ - m = s->x_inc * cfg->blk_in; + m = blk_in_bytes; while (m > 0) { - n_wrap_fir = (fir->fir_delay_size - fir->fir_wi) - * s->x_inc; + n_wrap_fir = (fir->fir_delay_size - fir->fir_wi) * sz; n_wrap_buf = s->x_end_addr - s->x_rptr; n_wrap_min = (n_wrap_fir < n_wrap_buf) ? n_wrap_fir : n_wrap_buf; n_min = (m < n_wrap_min) ? m : n_wrap_min; while (n_min > 0) { fir->fir_delay[fir->fir_wi++] = *s->x_rptr; - s->x_rptr += s->x_inc; - n_min -= s->x_inc; - m -= s->x_inc; + s->x_rptr++; + n_min -= sz; + m -= sz; } /* Check for wrap */ src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size); @@ -478,41 +536,38 @@ void src_polyphase_stage_cir(struct src_stage_prm * s) }
/* Filter */ - c = 0; - r = fir->fir_wi - cfg->blk_in - - (cfg->num_of_subfilters - 1) * cfg->idm; - if (r < 0) - r += fir->fir_delay_size; + ci = 0; /* Reset to 1st coefficient */ + ri = fir->fir_wi + rewind; /* Newest data for last subfilter */ + if (ri < 0) + ri += fir->fir_delay_size;
- fir->out_wi = fir->out_ri; + wi = fir->out_ri; for (f = 0; f < cfg->num_of_subfilters; f++) { - fir->fir_ri = r; - z = fir_filter(fir, cfg->coefs, &c, - cfg->subfilter_length, cfg->shift); - r += cfg->idm; - if (r >= fir->fir_delay_size) - r -= fir->fir_delay_size; - - fir->out_delay[fir->out_wi] = z; - fir->out_wi += cfg->odm; - if (fir->out_wi >= fir->out_delay_size) - fir->out_wi -= fir->out_delay_size; + fir_filter(ri, &ci, wi, in_delay, out_delay, coef, + dsm1, cfg->subfilter_length, shift, nch); + + wi += nch_x_odm; + if (wi >= fir->out_delay_size) + wi -= fir->out_delay_size; + + ri += nch_x_idm; /* Next sub-filter start */ + if (ri >= fir->fir_delay_size) + ri -= fir->fir_delay_size; }
/* Output */ - m = s->y_inc * cfg->num_of_subfilters; + m = blk_out_bytes; while (m > 0) { - n_wrap_fir = (fir->out_delay_size - fir->out_ri) - * s->y_inc; + n_wrap_fir = (fir->out_delay_size - fir->out_ri) * sz; n_wrap_buf = s->y_end_addr - s->y_wptr; n_wrap_min = (n_wrap_fir < n_wrap_buf) ? n_wrap_fir : n_wrap_buf; n_min = (m < n_wrap_min) ? m : n_wrap_min; while (n_min > 0) { *s->y_wptr = fir->out_delay[fir->out_ri++]; - s->y_wptr += s->y_inc; - n_min -= s->y_inc; - m -= s->y_inc; + s->y_wptr++; + n_min -= sz; + m -= sz; } /* Check wrap */ src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size); @@ -522,38 +577,48 @@ void src_polyphase_stage_cir(struct src_stage_prm * s) } }
-void src_polyphase_stage_cir_s24(struct src_stage_prm * s) +void src_polyphase_stage_cir_s24(struct src_stage_prm *s) { struct src_state *fir = s->state; struct src_stage *cfg = s->stage; int n; int m; int f; - int c; - int r; + int ci; + int ri; int n_wrap_fir; int n_wrap_buf; int n_wrap_min; int n_min; - int32_t z; - int32_t se; + int wi; + const void *coef = cfg->coefs; + int32_t *in_delay = fir->fir_delay; + int32_t *out_delay = fir->out_delay; + int dsm1 = fir->fir_delay_size - 1; + int shift = cfg->shift; + int nch = s->nch; + int rewind = -nch * (cfg->blk_in + + (cfg->num_of_subfilters - 1) * cfg->idm) + nch - 1; + int nch_x_idm = cfg->idm * nch; + int nch_x_odm = cfg->odm * nch; + size_t sz = sizeof(int32_t); + int blk_in_bytes = nch * cfg->blk_in * sz; + int blk_out_bytes = nch * cfg->num_of_subfilters * sz;
for (n = 0; n < s->times; n++) { /* Input data */ - m = s->x_inc * cfg->blk_in; + m = blk_in_bytes; while (m > 0) { - n_wrap_fir = (fir->fir_delay_size - fir->fir_wi) - * s->x_inc; + n_wrap_fir = (fir->fir_delay_size - fir->fir_wi) * sz; n_wrap_buf = s->x_end_addr - s->x_rptr; n_wrap_min = (n_wrap_fir < n_wrap_buf) ? n_wrap_fir : n_wrap_buf; n_min = (m < n_wrap_min) ? m : n_wrap_min; while (n_min > 0) { - se = *s->x_rptr << 8; - fir->fir_delay[fir->fir_wi++] = se >> 8; - s->x_rptr += s->x_inc; - n_min -= s->x_inc; - m -= s->x_inc; + fir->fir_delay[fir->fir_wi++] = *s->x_rptr << 8; + s->x_rptr++; + n_min -= sz; + m -= sz; } /* Check for wrap */ src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size); @@ -562,41 +627,38 @@ void src_polyphase_stage_cir_s24(struct src_stage_prm * s) }
/* Filter */ - c = 0; - r = fir->fir_wi - cfg->blk_in - - (cfg->num_of_subfilters - 1) * cfg->idm; - if (r < 0) - r += fir->fir_delay_size; + ci = 0; /* Reset to 1st coefficient */ + ri = fir->fir_wi + rewind; /* Newest data for last subfilter */ + if (ri < 0) + ri += fir->fir_delay_size;
- fir->out_wi = fir->out_ri; + wi = fir->out_ri; for (f = 0; f < cfg->num_of_subfilters; f++) { - fir->fir_ri = r; - z = fir_filter(fir, cfg->coefs, &c, - cfg->subfilter_length, cfg->shift); - r += cfg->idm; - if (r >= fir->fir_delay_size) - r -= fir->fir_delay_size; - - fir->out_delay[fir->out_wi] = z; - fir->out_wi += cfg->odm; - if (fir->out_wi >= fir->out_delay_size) - fir->out_wi -= fir->out_delay_size; + fir_filter(ri, &ci, wi, in_delay, out_delay, coef, + dsm1, cfg->subfilter_length, shift, nch); + + wi += nch_x_odm; + if (wi >= fir->out_delay_size) + wi -= fir->out_delay_size; + + ri += nch_x_idm; /* Next sub-filter start */ + if (ri >= fir->fir_delay_size) + ri -= fir->fir_delay_size; }
/* Output */ - m = s->y_inc * cfg->num_of_subfilters; + m = blk_out_bytes; while (m > 0) { - n_wrap_fir = (fir->out_delay_size - fir->out_ri) - * s->y_inc; + n_wrap_fir = (fir->out_delay_size - fir->out_ri) * sz; n_wrap_buf = s->y_end_addr - s->y_wptr; n_wrap_min = (n_wrap_fir < n_wrap_buf) ? n_wrap_fir : n_wrap_buf; n_min = (m < n_wrap_min) ? m : n_wrap_min; while (n_min > 0) { - *s->y_wptr = fir->out_delay[fir->out_ri++]; - s->y_wptr += s->y_inc; - n_min -= s->y_inc; - m -= s->y_inc; + *s->y_wptr = fir->out_delay[fir->out_ri++] >> 8; + s->y_wptr++; + n_min -= sz; + m -= sz; } /* Check wrap */ src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size); @@ -604,47 +666,5 @@ void src_polyphase_stage_cir_s24(struct src_stage_prm * s) fir->out_ri = 0; } } -} -
-#ifdef MODULE_TEST - -void src_print_info(struct polyphase_src * src) -{ - - int n1; - int n2; - - n1 = src->stage1->filter_length; - n2 = src->stage2->filter_length; - printf("SRC stages %d\n", src->number_of_stages); - printf("SRC input blk %d\n", src->blk_in); - printf("SRC output blk %d\n", src->blk_out); - printf("SRC stage1 %d times\n", src->stage1_times); - printf("SRC stage2 %d times\n", src->stage2_times); - - printf("SRC1 filter length %d\n", n1); - printf("SRC1 subfilter length %d\n", src->stage1->subfilter_length); - printf("SRC1 number of subfilters %d\n", - src->stage1->num_of_subfilters); - printf("SRC1 idm %d\n", src->stage1->idm); - printf("SRC1 odm %d\n", src->stage1->odm); - printf("SRC1 input blk %d\n", src->stage1->blk_in); - printf("SRC1 output blk %d\n", src->stage1->blk_out); - printf("SRC1 halfband %d\n", src->stage1->halfband); - printf("SRC1 FIR delay %d\n", src->state1.fir_delay_size); - printf("SRC1 out delay %d\n", src->state1.out_delay_size); - - printf("SRC2 filter length %d\n", n2); - printf("SRC2 subfilter length %d\n", src->stage2->subfilter_length); - printf("SRC2 number of subfilters %d\n", - src->stage2->num_of_subfilters); - printf("SRC2 idm %d\n", src->stage2->idm); - printf("SRC2 odm %d\n", src->stage2->odm); - printf("SRC2 input blk %d\n", src->stage2->blk_in); - printf("SRC2 output blk %d\n", src->stage2->blk_out); - printf("SRC2 halfband %d\n", src->stage2->halfband); - printf("SRC2 FIR delay %d\n", src->state2.fir_delay_size); - printf("SRC2 out delay %d\n", src->state2.out_delay_size); } -#endif diff --git a/src/audio/src_core.h b/src/audio/src_core.h index 3859e6f..3ea6028 100644 --- a/src/audio/src_core.h +++ b/src/audio/src_core.h @@ -41,7 +41,7 @@ struct src_param { int out_s1; int out_s2; int sbuf_length; - int single_src; + int src_multich; int total; int blk_in; int blk_out; @@ -51,6 +51,7 @@ struct src_param { int stage2_times_max; int idx_in; int idx_out; + int nch; };
struct src_stage { @@ -70,20 +71,13 @@ struct src_state { int fir_delay_size; int out_delay_size; int fir_wi; - int fir_ri; - int out_wi; int out_ri; int32_t *fir_delay; int32_t *out_delay; };
struct polyphase_src { - int mute; int number_of_stages; - int blk_in; - int blk_out; - int stage1_times; - int stage2_times; struct src_stage *stage1; struct src_stage *stage2; struct src_state state1; @@ -91,15 +85,14 @@ struct polyphase_src { };
struct src_stage_prm { + int nch; int times; int32_t *x_rptr; int32_t *x_end_addr; size_t x_size; - int x_inc; int32_t *y_wptr; int32_t *y_end_addr; size_t y_size; - int y_inc; struct src_state *state; struct src_stage *stage; }; @@ -116,31 +109,6 @@ static inline void src_circ_dec_wrap(int32_t **ptr, int32_t *addr, size_t size) *ptr = (int32_t *) ((size_t) * ptr + size); }
-static inline void src_polyphase_mute(struct polyphase_src *src) -{ - src->mute = 1; -} - -static inline void src_polyphase_unmute(struct polyphase_src *src) -{ - src->mute = 0; -} - -static inline int src_polyphase_getmute(struct polyphase_src *src) -{ - return src->mute; -} - -static inline int src_polyphase_get_blk_in(struct polyphase_src *src) -{ - return src->blk_in; -} - -static inline int src_polyphase_get_blk_out(struct polyphase_src *src) -{ - return src->blk_out; -} - void src_polyphase_reset(struct polyphase_src *src);
int src_polyphase_init(struct polyphase_src *src, struct src_param *p, @@ -162,8 +130,4 @@ int32_t src_output_rates(void);
int src_ceil_divide(int a, int b);
-#ifdef MODULE_TEST -void src_print_info(struct polyphase_src *src); -#endif - #endif
participants (1)
-
Seppo Ingalsuo