[Sound-open-firmware] [PATCH] SRC: Use a multi-channel FIR core to optimize speed plus other cleanup
Liam Girdwood
liam.r.girdwood at linux.intel.com
Thu Oct 19 23:10:45 CEST 2017
On Wed, 2017-10-18 at 19:56 +0300, Seppo Ingalsuo wrote:
> This patch changes the sample rate conversion processing to use a single
> multi-channel filter instead of per channel called mono filter instances.
> The filter output is now rounded with 1/2 LSB add. The polyphase filter
> input input block sizes are multiplied in SRC initialization to reach
> near to or exacly the period length to reduce polyphase filter call
> overhead.
>
> The polyphase filter is now called via function pointer for more
> flexibility with audio data formats. Currently S32_LE and S24_4LE are
> supported.
>
> Code cleanup includes removal of redundant variables and some debug
> print code.
>
> Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo at linux.intel.com>
> ---
> src/audio/src.c | 198 ++++++++++-------------
> src/audio/src_core.c | 447 ++++++++++++++++++++++++++-------------------------
> src/audio/src_core.h | 42 +----
> 3 files changed, 323 insertions(+), 364 deletions(-)
>
> diff --git a/src/audio/src.c b/src/audio/src.c
> index 4c4bc6e..854cb82 100644
> --- a/src/audio/src.c
> +++ b/src/audio/src.c
> @@ -55,7 +55,7 @@
>
> /* src component private data */
> struct comp_data {
> - struct polyphase_src src[PLATFORM_MAX_CHANNELS];
> + struct polyphase_src src;
> struct src_param param;
> int32_t *delay_lines;
> uint32_t sink_rate;
> @@ -63,16 +63,16 @@ struct comp_data {
> int32_t *sbuf_w_ptr;
> int32_t *sbuf_r_ptr;
> int sbuf_avail;
> - int sign_extend_s24; /* Set if need to copy sign bit to b24..b31 */
> void (* src_func)(struct comp_dev *dev,
> struct comp_buffer *source,
> struct comp_buffer *sink,
> size_t *consumed,
> size_t *produced);
> + void (* polyphase_func)(struct src_stage_prm *s);
> };
>
> /* Fallback function */
> -static void fallback_s32(struct comp_dev *dev, struct comp_buffer *source,
> +static void src_fallback(struct comp_dev *dev, struct comp_buffer *source,
> struct comp_buffer *sink, size_t *bytes_read, size_t *bytes_written)
> {
> *bytes_read = 0;
> @@ -84,101 +84,105 @@ static void src_2s_s32_default(struct comp_dev *dev,
> struct comp_buffer *source, struct comp_buffer *sink,
> size_t *bytes_read, size_t *bytes_written)
> {
> - struct polyphase_src *s;
> struct src_stage_prm s1;
> struct src_stage_prm s2;
> - int j;
> + int s1_blk_in;
> + int s1_blk_out;
> + int s2_blk_in;
> + int s2_blk_out;
> struct comp_data *cd = comp_get_drvdata(dev);
> int32_t *dest = (int32_t *) sink->w_ptr;
> int32_t *src = (int32_t *) source->r_ptr;
> - int32_t *sbuf_addr = cd->delay_lines;
> int32_t *sbuf_end_addr = &cd->delay_lines[cd->param.sbuf_length];
> int32_t sbuf_size = cd->param.sbuf_length * sizeof(int32_t);
> - const int nch = dev->params.channels;
> - const int s1_blk_in = cd->src[0].stage1->blk_in * nch;
> - const int s1_blk_out = cd->src[0].stage1->blk_out * nch;
> - const int s2_blk_in = cd->src[0].stage2->blk_in * nch;
> - const int s2_blk_out = cd->src[0].stage2->blk_out * nch;
> + int nch = dev->params.channels;
> int sbuf_free = cd->param.sbuf_length - cd->sbuf_avail;
> - int source_check = (source->avail >> 2) - s1_blk_in; /* for int32_t */
> - int sink_check = (sink->free >> 2) - s2_blk_out; /* For int32_t */
> int n_read = 0;
> int n_written = 0;
> int n1 = 0;
> int n2 = 0;
> + int avail = source->avail >> 2; /* For int32_t */
> + int free = sink->free >> 2; /* For int32_t */
Is this converting bytes to words ? otherwise it hard codes the format ?
Please add a comment on what it's doing.
>
> - s1.times = 1;
> s1.x_end_addr = source->end_addr;
> s1.x_size = source->size;
> - s1.x_inc = nch;
> s1.y_end_addr = sbuf_end_addr;
> s1.y_size = sbuf_size;
> - s1.y_inc = nch;
> + s1.state = &cd->src.state1;
> + s1.stage = cd->src.stage1;
> + s1.x_rptr = src;
> + s1.y_wptr = cd->sbuf_w_ptr;
> + s1.nch = nch;
>
> - s2.times = 1;
> s2.x_end_addr = sbuf_end_addr;
> s2.x_size = sbuf_size;
> - s2.x_inc = nch;
> s2.y_end_addr = sink->end_addr;
> s2.y_size = sink->size;
> - s2.y_inc = nch;
> + s2.state = &cd->src.state2;
> + s2.stage = cd->src.stage2;
> + s2.x_rptr = cd->sbuf_r_ptr;
> + s2.y_wptr = dest;
> + s2.nch = nch;
> +
>
> - /* 1st stage runs once a long multiplied length block.
> - * The stage buffer much be large enough to fit one s1 output block
> - * plus one s2 input block plus jitter in s2 consumption.
> + /* Test if 1st stage can be run with default block length to reach
> + * the period length or just under it.
> */
> - while ((n1 < cd->param.stage1_times_max)
> - && (n_read <= source_check)
> - && (sbuf_free >= s1_blk_out)) {
> - for (j = 0; j < nch; j++) {
> - s = &cd->src[j]; /* Point to src[] for this channel */
> - s1.x_rptr = src++;
> - s1.y_wptr = cd->sbuf_w_ptr++;
> - src_circ_inc_wrap(&src, source->end_addr, source->size);
> - src_circ_inc_wrap(&cd->sbuf_w_ptr, sbuf_end_addr, sbuf_size);
> - s1.state = &s->state1;
> - s1.stage = s->stage1;
> - if (cd->sign_extend_s24)
> - src_polyphase_stage_cir_s24(&s1);
> - else
> - src_polyphase_stage_cir(&s1);
> -
> - }
> + s1.times = cd->param.stage1_times;
> + s1_blk_in = s1.times * cd->src.stage1->blk_in * nch;
> + s1_blk_out = s1.times * cd->src.stage1->blk_out * nch;
> + if ((avail >= s1_blk_in) && (sbuf_free >= s1_blk_out)) {
> + cd->polyphase_func(&s1);
> + cd->sbuf_w_ptr = s1.y_wptr;
> + cd->sbuf_avail += s1_blk_out;
> n_read += s1_blk_in;
> + avail -= s1_blk_in;
> + sbuf_free -= s1_blk_out;
> + n1 = s1.times;
> + }
> +
> + /* Run one block at time the remaining data for 1st stage. */
> + s1.times = 1;
> + s1_blk_in = cd->src.stage1->blk_in * nch;
> + s1_blk_out = cd->src.stage1->blk_out * nch;
> + while ((n1 < cd->param.stage1_times_max) && (avail >= s1_blk_in)
> + && (sbuf_free >= s1_blk_out)) {
> + cd->polyphase_func(&s1);
> + cd->sbuf_w_ptr = s1.y_wptr;
> cd->sbuf_avail += s1_blk_out;
> + n_read += s1_blk_in;
> + avail -= s1_blk_in;
> sbuf_free -= s1_blk_out;
> - src = s1.x_rptr - nch + 1;
> - cd->sbuf_w_ptr = s1.y_wptr - nch + 1;
> - src_circ_dec_wrap(&src, source->addr, source->size);
> - src_circ_dec_wrap(&cd->sbuf_w_ptr, sbuf_addr, sbuf_size);
> - n1++;
> + n1 += s1.times;
> + }
> +
> + /* Test if 2nd stage can be run with default block length. */
> + s2.times = cd->param.stage2_times;
> + s2_blk_in = s2.times * cd->src.stage2->blk_in * nch;
> + s2_blk_out = s2.times * cd->src.stage2->blk_out * nch;
> + if ((cd->sbuf_avail >= s2_blk_in) && (free >= s2_blk_out)) {
> + cd->polyphase_func(&s2);
> + cd->sbuf_r_ptr = s2.x_rptr;
> + cd->sbuf_avail -= s2_blk_in;
> + free -= s2_blk_out;
> + n_written += s2_blk_out;
> + n2 = s2.times;
> }
>
> - /* 2nd stage runs as many min size blocks as buffers allow */
> +
> + /* Run one block at time the remaining 2nd stage output */
> + s2.times = 1;
> + s2_blk_in = cd->src.stage2->blk_in * nch;
> + s2_blk_out = cd->src.stage2->blk_out * nch;
> while ((n2 < cd->param.stage2_times_max)
> && (cd->sbuf_avail >= s2_blk_in)
> - && (n_written <= sink_check)) {
> - for (j = 0; j < nch; j++) {
> - s2.x_rptr = cd->sbuf_r_ptr++;
> - s2.y_wptr = dest++;
> - src_circ_inc_wrap(&cd->sbuf_r_ptr, sbuf_end_addr, sbuf_size);
> - src_circ_inc_wrap(&dest, sink->end_addr, sink->size);
> - s = &cd->src[j]; /* Point to src[] for this channel */
> - s2.state = &s->state2;
> - s2.stage = s->stage2;
> - if (cd->sign_extend_s24)
> - src_polyphase_stage_cir_s24(&s2);
> - else
> - src_polyphase_stage_cir(&s2);
> -
> - }
> - cd->sbuf_r_ptr = s2.x_rptr - nch + 1;
> - dest = s2.y_wptr - nch + 1;
> - src_circ_dec_wrap(&cd->sbuf_r_ptr, sbuf_addr, sbuf_size);
> - src_circ_dec_wrap(&dest, sink->addr, sink->size);
> - n_written += s2_blk_out;
> + && (free >= s2_blk_out)) {
> + cd->polyphase_func(&s2);
> + cd->sbuf_r_ptr = s2.x_rptr;
> cd->sbuf_avail -= s2_blk_in;
> - n2++;
> + free -= s2_blk_out;
> + n_written += s2_blk_out;
> + n2 += s2.times;
> }
> *bytes_read = sizeof(int32_t) * n_read;
> *bytes_written = sizeof(int32_t) * n_written;
> @@ -189,42 +193,27 @@ static void src_1s_s32_default(struct comp_dev *dev,
> struct comp_buffer *source, struct comp_buffer *sink,
> size_t *bytes_read, size_t *bytes_written)
> {
> - struct polyphase_src *s;
> struct src_stage_prm s1;
> - int j;
> struct comp_data *cd = comp_get_drvdata(dev);
> - int32_t *dest = (int32_t *) sink->w_ptr;
> - int32_t *src = (int32_t *) source->r_ptr;
> int nch = dev->params.channels;
> int n_read = 0;
> int n_written = 0;
>
> s1.times = cd->param.stage1_times;
> + s1.x_rptr = (int32_t *) source->r_ptr;
> s1.x_end_addr = source->end_addr;
> s1.x_size = source->size;
> - s1.x_inc = nch;
> + s1.y_wptr = (int32_t *) sink->w_ptr;
> s1.y_end_addr = sink->end_addr;
> s1.y_size = sink->size;
> - s1.y_inc = nch;
> - s1.x_rptr = src + nch - 1;
> - s1.y_wptr = dest + nch - 1;
> -
> - for (j = 0; j < nch; j++) {
> - s = &cd->src[j]; /* Point to src for this channel */
> - s1.x_rptr = src++;
> - s1.y_wptr = dest++;
> - src_circ_inc_wrap(&src, source->end_addr, source->size);
> - src_circ_inc_wrap(&dest, sink->end_addr, sink->size);
> - s1.state = &s->state1;
> - s1.stage = s->stage1;
> - if (cd->sign_extend_s24)
> - src_polyphase_stage_cir_s24(&s1);
> - else
> - src_polyphase_stage_cir(&s1);
> -
> - n_read += cd->param.blk_in;
> - n_written += cd->param.blk_out;
> - }
> + s1.state = &cd->src.state1;
> + s1.stage = cd->src.stage1;
> + s1.nch = dev->params.channels;
> +
> + cd->polyphase_func(&s1);
> +
> + n_read += nch * cd->param.blk_in;
> + n_written += nch * cd->param.blk_out;
> *bytes_read = n_read * sizeof(int32_t);
> *bytes_written = n_written * sizeof(int32_t);
> }
> @@ -271,7 +260,6 @@ static struct comp_dev *src_new(struct sof_ipc_comp *comp)
> struct sof_ipc_comp_src *src;
> struct sof_ipc_comp_src *ipc_src = (struct sof_ipc_comp_src *) comp;
> struct comp_data *cd;
> - int i;
>
> trace_src("new");
>
> @@ -299,8 +287,8 @@ static struct comp_dev *src_new(struct sof_ipc_comp *comp)
>
> cd->delay_lines = NULL;
> cd->src_func = src_2s_s32_default;
> - for (i = 0; i < PLATFORM_MAX_CHANNELS; i++)
> - src_polyphase_reset(&cd->src[i]);
> + cd->polyphase_func = src_polyphase_stage_cir;
> + src_polyphase_reset(&cd->src);
>
> dev->state = COMP_STATE_READY;
> return dev;
> @@ -334,10 +322,8 @@ static int src_params(struct comp_dev *dev)
> uint32_t sink_rate;
> int32_t *buffer_start;
> int n = 0;
> - int i;
> int err;
> int frames_is_for_source;
> - int nch;
> int q;
>
> trace_src("par");
> @@ -345,10 +331,10 @@ static int src_params(struct comp_dev *dev)
> /* SRC supports S24_4LE and S32_LE formats */
> switch (config->frame_fmt) {
> case SOF_IPC_FRAME_S24_4LE:
> - cd->sign_extend_s24 = 1;
> + cd->polyphase_func = src_polyphase_stage_cir_s24;
> break;
> case SOF_IPC_FRAME_S32_LE:
> - cd->sign_extend_s24 = 0;
> + cd->polyphase_func = src_polyphase_stage_cir;
> break;
> default:
> trace_src_error("sr0");
> @@ -407,11 +393,7 @@ static int src_params(struct comp_dev *dev)
> buffer_start = cd->delay_lines + cd->param.sbuf_length;
>
> /* Initize SRC for actual sample rate */
> - nch = MIN(params->channels, PLATFORM_MAX_CHANNELS);
> - for (i = 0; i < nch; i++) {
> - n = src_polyphase_init(&cd->src[i], &cd->param, buffer_start);
> - buffer_start += cd->param.single_src;
> - }
> + n = src_polyphase_init(&cd->src, &cd->param, buffer_start);
>
> /* Reset stage buffer */
> cd->sbuf_r_ptr = cd->delay_lines;
> @@ -434,7 +416,7 @@ static int src_params(struct comp_dev *dev)
> * muted if copy() is run.
> */
> trace_src("SFa");
> - cd->src_func = fallback_s32;
> + cd->src_func = src_fallback;
> return -EINVAL;
> break;
> }
> @@ -532,7 +514,6 @@ static int src_copy(struct comp_dev *dev)
>
> /* Run SRC function if buffers avail and free allow */
> if (((int) source->avail >= need_source) && ((int) sink->free >= need_sink)) {
> - /* Run src */
> cd->src_func(dev, source, sink, &consumed, &produced);
>
> /* Calc new free and available if data was processed. These
> @@ -561,14 +542,12 @@ static int src_preload(struct comp_dev *dev)
>
> static int src_reset(struct comp_dev *dev)
> {
> - int i;
> struct comp_data *cd = comp_get_drvdata(dev);
>
> trace_src("SRe");
>
> cd->src_func = src_2s_s32_default;
> - for (i = 0; i < PLATFORM_MAX_CHANNELS; i++)
> - src_polyphase_reset(&cd->src[i]);
> + src_polyphase_reset(&cd->src);
>
> comp_set_state(dev, COMP_CMD_RESET);
> return 0;
> @@ -576,7 +555,8 @@ static int src_reset(struct comp_dev *dev)
>
> struct comp_driver comp_src = {
> .type = SOF_COMP_SRC,
> - .ops = {
> + .ops =
> + {
lets keep the kernel.org style.
> .new = src_new,
> .free = src_free,
> .params = src_params,
> diff --git a/src/audio/src_core.c b/src/audio/src_core.c
> index 4ee2efd..0d8cc17 100644
> --- a/src/audio/src_core.c
> +++ b/src/audio/src_core.c
> @@ -51,6 +51,10 @@ int sof_rates[SOF_RATES_LENGTH] = {8000, 11025, 12000, 16000, 18900,
> 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000, 176400,
> 192000};
>
> +/* The FIR maximum lenghts are per channel so need to multiply them */
> +#define MAX_FIR_DELAY_SIZE_XNCH (PLATFORM_MAX_CHANNELS * MAX_FIR_DELAY_SIZE)
> +#define MAX_OUT_DELAY_SIZE_XNCH (PLATFORM_MAX_CHANNELS * MAX_OUT_DELAY_SIZE)
> +
> /* Calculate ceil() for integer division */
> int src_ceil_divide(int a, int b)
> {
> @@ -131,6 +135,10 @@ int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch,
> int num;
> int frames2;
>
> + if (nch > PLATFORM_MAX_CHANNELS)
> + return -EINVAL;
also report this via trace_error
> +
> + a->nch = nch;
> a->idx_in = src_find_fs(src_in_fs, NUM_IN_FS, fs_in);
> a->idx_out = src_find_fs(src_out_fs, NUM_OUT_FS, fs_out);
>
> @@ -146,8 +154,8 @@ int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch,
>
> stage1 = src_table1[a->idx_out][a->idx_in];
> stage2 = src_table2[a->idx_out][a->idx_in];
> - a->fir_s1 = src_fir_delay_length(stage1);
> - a->out_s1 = src_out_delay_length(stage1);
> + a->fir_s1 = nch * src_fir_delay_length(stage1);
> + a->out_s1 = nch * src_out_delay_length(stage1);
>
> /* Find out how many additional times the SRC can be executed
> while having block size less or equal to max_frames.
> @@ -191,8 +199,8 @@ int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch,
> a->stage2_times_max = 0;
> a->sbuf_length = 0;
> } else {
> - a->fir_s2 = src_fir_delay_length(stage2);
> - a->out_s2 = src_out_delay_length(stage2);
> + a->fir_s2 = nch * src_fir_delay_length(stage2);
> + a->out_s2 = nch * src_out_delay_length(stage2);
> /* 2x is an empirically tested length. Since the sink buffer
> * capability to receive samples varies a shorter stage 2 output
> * block will create a peak in internal buffer usage.
> @@ -200,8 +208,8 @@ int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch,
> a->sbuf_length = 2 * nch * stage1->blk_out * a->stage1_times_max;
> }
>
> - a->single_src = a->fir_s1 + a->fir_s2 + a->out_s1 + a->out_s2;
> - a->total = a->sbuf_length + nch * a->single_src;
> + a->src_multich = a->fir_s1 + a->fir_s2 + a->out_s1 + a->out_s2;
> + a->total = a->sbuf_length + a->src_multich;
>
> return 0;
> }
> @@ -212,8 +220,6 @@ static void src_state_reset(struct src_state *state)
> state->fir_delay_size = 0;
> state->out_delay_size = 0;
> state->fir_wi = 0;
> - state->fir_ri = 0;
> - state->out_wi = 0;
> state->out_ri = 0;
> }
>
> @@ -229,17 +235,8 @@ static int init_stages(
> src->number_of_stages = n;
> src->stage1 = stage1;
> src->stage2 = stage2;
> - src->blk_in = p->blk_in;
> - src->blk_out = p->blk_out;
> - if (n == 1) {
> - src->stage1_times = p->stage1_times;
> - src->stage2_times = 0;
> - if (stage1->blk_out == 0)
> - return -EINVAL;
> - } else {
> - src->stage1_times = p->stage1_times;
> - src->stage2_times = p->stage2_times;
> - }
> + if ((n == 1) && (stage1->blk_out == 0))
> + return -EINVAL;
>
> /* Delay line sizes */
> src->state1.fir_delay_size = p->fir_s1;
> @@ -262,10 +259,10 @@ static int init_stages(
> }
>
> /* Check the sizes are less than MAX */
> - if ((src->state1.fir_delay_size > MAX_FIR_DELAY_SIZE)
> - || (src->state1.out_delay_size > MAX_OUT_DELAY_SIZE)
> - || (src->state2.fir_delay_size > MAX_FIR_DELAY_SIZE)
> - || (src->state2.out_delay_size > MAX_OUT_DELAY_SIZE)) {
> + if ((src->state1.fir_delay_size > MAX_FIR_DELAY_SIZE_XNCH)
> + || (src->state1.out_delay_size > MAX_OUT_DELAY_SIZE_XNCH)
> + || (src->state2.fir_delay_size > MAX_FIR_DELAY_SIZE_XNCH)
> + || (src->state2.out_delay_size > MAX_OUT_DELAY_SIZE_XNCH)) {
> src->state1.fir_delay = NULL;
> src->state1.out_delay = NULL;
> src->state2.fir_delay = NULL;
> @@ -279,12 +276,7 @@ static int init_stages(
> void src_polyphase_reset(struct polyphase_src *src)
> {
>
> - src->mute = 0;
> src->number_of_stages = 0;
> - src->blk_in = 0;
> - src->blk_out = 0;
> - src->stage1_times = 0;
> - src->stage2_times = 0;
> src->stage1 = NULL;
> src->stage2 = NULL;
> src_state_reset(&src->state1);
> @@ -300,8 +292,6 @@ int src_polyphase_init(struct polyphase_src *src, struct src_param *p,
> struct src_stage *stage2;
>
> if ((p->idx_in < 0) || (p->idx_out < 0)) {
> - src->blk_in = p->blk_in;
> - src->blk_out = p->blk_out;
> return -EINVAL;
> }
>
> @@ -335,141 +325,203 @@ int src_polyphase_init(struct polyphase_src *src, struct src_param *p,
> #if SRC_SHORT == 1
>
> /* Calculate a FIR filter part that does not need circular modification */
> -static inline void fir_part(int64_t *y, int ntaps, const int16_t c[], int *ic,
> - int32_t d[], int *id)
> +
> +static inline void fir_part(int64_t y[], int *id, int *ic,
> + const int32_t data[], const int16_t coef[], int nch_x_taps, int nch)
> {
> + int64_t tap0;
> + int64_t tap1;
> int n;
> int64_t a = 0;
> + int64_t b = 0;
> + int c = *ic;
> + int d = *id;
> + int d_end = d - nch_x_taps;
>
> /* Data is Q1.31, coef is Q1.15, product is Q2.46 */
> - for (n = 0; n < (ntaps >> 1); n++) {
> - a += (int64_t) c[*ic] * d[*id]
> - + (int64_t) c[*ic + 1] * d[*id - 1];
> - *ic += 2;
> - *id -= 2;
> + if (nch == 2) {
> + for (n = 0; n < (nch_x_taps >> 2); n++) {
> + tap0 = coef[c++];
> + tap1 = coef[c++];
> + b += data[d--] * tap0;
> + a += data[d--] * tap0;
> + b += data[d--] * tap1;
> + a += data[d--] * tap1;
> + }
> + if (d > d_end) {
> + tap0 = coef[c++];
> + b += data[d--] * tap0;
> + a += data[d--] * tap0;
> + }
> + y[1] += b;
> + y[0] += a;
> + } else {
> + while (d > d_end) {
> + tap0 = coef[c++];
> + for (n = nch - 1; n >= 0; n--)
> + y[n] += data[d--] * tap0;
> + }
> }
> - if (ntaps & 1)
> - a += (int64_t) c[(*ic)++] * d[(*id)--];
> -
> - *y += a;
> + *ic = c;
> + *id = d;
> }
> +
> #else
>
> -/* Calculate a FIR filter part that does not need circular modification */
> -static inline void fir_part(int64_t *y, int ntaps, const int32_t c[], int *ic,
> - int32_t d[], int *id)
> +static inline void fir_part(int64_t y[], int *id, int *ic,
> + const int32_t data[], const int32_t coef[], int nch_x_taps, int nch)
> {
> + int64_t tap0;
> + int64_t tap1;
> int n;
> int64_t a = 0;
> -
> - /* Data is Q8.24, coef is Q1.23, product is Q9.47 */
> - for (n = 0; n < (ntaps >> 1); n++) {
> - a += (int64_t) c[*ic] * d[*id]
> - + (int64_t) c[*ic + 1] * d[*id - 1];
> - *ic += 2;
> - *id -= 2;
> + int64_t b = 0;
> + int c = *ic;
> + int d = *id;
> + int d_end = d - nch_x_taps;
> +
> + /* Data is Q1.31, coef is Q1.23, product is Q2.54 */
> + if (nch == 2) {
> + for (n = 0; n < (nch_x_taps >> 2); n++) {
> + tap0 = coef[c++];
> + tap1 = coef[c++];
> + b += data[d--] * tap0;
> + a += data[d--] * tap0;
> + b += data[d--] * tap1;
> + a += data[d--] * tap1;
> + }
> + if (d > d_end) {
> + tap0 = coef[c++];
> + b += data[d--] * tap0;
> + a += data[d--] * tap0;
> + }
> + y[1] += b;
> + y[0] += a;
> + } else {
> + while (d > d_end) {
> + tap0 = coef[c++];
> + for (n = nch - 1; n >= 0; n--)
> + y[n] += data[d--] * tap0;
> + }
> }
> - if (ntaps & 1)
> - a += (int64_t) c[(*ic)++] * d[(*id)--];
> -
> - *y += a;
> + *ic = c;
> + *id = d;
> }
> +
> #endif
>
> #if SRC_SHORT == 1
>
> -static inline int32_t fir_filter(
> - struct src_state *fir, const int16_t coefs[],
> - int *coefi, int filter_length, int shift)
> +static void fir_filter(int ri0, int *ci, int wi0, int32_t in_delay[],
> + int32_t out_delay[], const int16_t coefs[], int dsm1, int taps,
> + int shift, int nch)
> {
> - int64_t y = 0;
> - int n1;
> int n2;
> -
> - n1 = fir->fir_ri + 1;
> - if (n1 > filter_length) {
> - /* No need to un-wrap fir read index, make sure fir_fi
> - * is ge 0 after FIR computation.
> - */
> - fir_part(&y, filter_length, coefs, coefi, fir->fir_delay,
> - &fir->fir_ri);
> + int i;
> + int64_t y[PLATFORM_MAX_CHANNELS];
> + int ri = ri0;
> + int wi = wi0;
> + int n1 = ri0 + 1; /* Convert to number of sequential frames */
> + int qshift = 15 + shift; /* Q2.46 -> Q2.31 */
> + int32_t rnd = 1 << (qshift - 1); /* Half LSB */
> + int nch_x_taps = nch * taps;
> +
> + /* Initialize to half LSB for rounding */
> + for (i = 0; i < nch; i++)
> + y[i] = rnd;
> +
> + if (n1 >= nch_x_taps) {
> + fir_part(y, &ri, ci, in_delay, coefs, nch_x_taps, nch);
> } else {
> - n2 = filter_length - n1;
> - /* Part 1, loop n1 times, fir_ri becomes -1 */
> - fir_part(&y, n1, coefs, coefi, fir->fir_delay, &fir->fir_ri);
> -
> - /* Part 2, unwrap fir_ri, continue rest of filter */
> - fir->fir_ri = fir->fir_delay_size - 1;
> - fir_part(&y, n2, coefs, coefi, fir->fir_delay, &fir->fir_ri);
> + n2 = nch_x_taps - n1;
> + fir_part(y, &ri, ci, in_delay, coefs, n1, nch);
> + ri = dsm1;
> + fir_part(y, &ri, ci, in_delay, coefs, n2, nch);
> }
> - /* Q2.46 -> Q2.31, saturate to Q1.31 */
> - y = y >> (15 + shift);
>
> - return(int32_t) sat_int32(y);
> + for (i = 0; i < nch; i++)
> + out_delay[wi++] = sat_int32(y[i] >> qshift);
> }
> #else
>
> -static inline int32_t fir_filter(
> - struct src_state *fir, const int32_t coefs[],
> - int *coefi, int filter_length, int shift)
> +static void fir_filter(int ri0, int *ci, int wi0, int32_t in_delay[],
> + int32_t out_delay[], const int32_t coefs[], int dsm1, int taps,
> + int shift, int nch)
> {
> - int64_t y = 0;
> - int n1;
> int n2;
> -
> - n1 = fir->fir_ri + 1;
> - if (n1 > filter_length) {
> - /* No need to un-wrap fir read index, make sure fir_fi
> - * is ge 0 after FIR computation.
> - */
> - fir_part(&y, filter_length, coefs, coefi, fir->fir_delay,
> - &fir->fir_ri);
> + int i;
> + int64_t y[PLATFORM_MAX_CHANNELS];
> + int ri = ri0;
> + int wi = wi0;
> + int n1 = ri0 + 1; /* Convert to number of sequential frames */
> + int qshift = 23 + shift; /* Q2.54 -> Q2.31 */
> + int32_t rnd = 1 << (qshift - 1); /* Half LSB */
> + int nch_x_taps = nch * taps;
> +
> + /* Initialize to half LSB for rounding */
> + for (i = 0; i < nch; i++)
> + y[i] = rnd;
> +
> + if (n1 >= nch_x_taps) {
> + fir_part(y, &ri, ci, in_delay, coefs, nch_x_taps, nch);
> } else {
> - n2 = filter_length - n1;
> - /* Part 1, loop n1 times, fir_ri becomes -1 */
> - fir_part(&y, n1, coefs, coefi, fir->fir_delay, &fir->fir_ri);
> -
> - /* Part 2, unwrap fir_ri, continue rest of filter */
> - fir->fir_ri = fir->fir_delay_size - 1;
> - fir_part(&y, n2, coefs, coefi, fir->fir_delay, &fir->fir_ri);
> + n2 = nch_x_taps - n1;
> + fir_part(y, &ri, ci, in_delay, coefs, n1, nch);
> + ri = dsm1;
> + fir_part(y, &ri, ci, in_delay, coefs, n2, nch);
> }
> - /* Q9.47 -> Q9.24, saturate to Q8.24 */
> - y = y >> (23 + shift);
>
> - return(int32_t) sat_int32(y);
> + for (i = 0; i < nch; i++)
> + out_delay[wi++] = sat_int32(y[i] >> qshift);
> +
> }
> +
> #endif
>
> void src_polyphase_stage_cir(struct src_stage_prm * s)
> {
> - struct src_state *fir = s->state;
> - struct src_stage *cfg = s->stage;
> int n;
> int m;
> int f;
> - int c;
> - int r;
> + int ci;
> + int ri;
> int n_wrap_fir;
> int n_wrap_buf;
> int n_wrap_min;
> int n_min;
> - int32_t z;
> + int wi;
> +
> + struct src_state *fir = s->state;
> + struct src_stage *cfg = s->stage;
> + const void *coef = cfg->coefs;
> + int32_t *in_delay = fir->fir_delay;
> + int32_t *out_delay = fir->out_delay;
> + int dsm1 = fir->fir_delay_size - 1;
> + int shift = cfg->shift;
> + int nch = s->nch;
> + int rewind = -nch * (cfg->blk_in
> + + (cfg->num_of_subfilters - 1) * cfg->idm) + nch - 1;
> + int nch_x_idm = cfg->idm * nch;
> + int nch_x_odm = cfg->odm * nch;
> + size_t sz = sizeof(int32_t);
> + int blk_in_bytes = nch * cfg->blk_in * sz;
> + int blk_out_bytes = nch * cfg->num_of_subfilters * sz;
> +
>
> for (n = 0; n < s->times; n++) {
> /* Input data */
> - m = s->x_inc * cfg->blk_in;
> + m = blk_in_bytes;
> while (m > 0) {
> - n_wrap_fir = (fir->fir_delay_size - fir->fir_wi)
> - * s->x_inc;
> + n_wrap_fir = (fir->fir_delay_size - fir->fir_wi) * sz;
> n_wrap_buf = s->x_end_addr - s->x_rptr;
> n_wrap_min = (n_wrap_fir < n_wrap_buf)
> ? n_wrap_fir : n_wrap_buf;
> n_min = (m < n_wrap_min) ? m : n_wrap_min;
> while (n_min > 0) {
> fir->fir_delay[fir->fir_wi++] = *s->x_rptr;
> - s->x_rptr += s->x_inc;
> - n_min -= s->x_inc;
> - m -= s->x_inc;
> + s->x_rptr++;
> + n_min -= sz;
> + m -= sz;
> }
> /* Check for wrap */
> src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
> @@ -478,41 +530,38 @@ void src_polyphase_stage_cir(struct src_stage_prm * s)
> }
>
> /* Filter */
> - c = 0;
> - r = fir->fir_wi - cfg->blk_in
> - - (cfg->num_of_subfilters - 1) * cfg->idm;
> - if (r < 0)
> - r += fir->fir_delay_size;
> + ci = 0; /* Reset to 1st coefficient */
> + ri = fir->fir_wi + rewind; /* Newest data for last subfilter */
> + if (ri < 0)
> + ri += fir->fir_delay_size;
>
> - fir->out_wi = fir->out_ri;
> + wi = fir->out_ri;
> for (f = 0; f < cfg->num_of_subfilters; f++) {
> - fir->fir_ri = r;
> - z = fir_filter(fir, cfg->coefs, &c,
> - cfg->subfilter_length, cfg->shift);
> - r += cfg->idm;
> - if (r >= fir->fir_delay_size)
> - r -= fir->fir_delay_size;
> -
> - fir->out_delay[fir->out_wi] = z;
> - fir->out_wi += cfg->odm;
> - if (fir->out_wi >= fir->out_delay_size)
> - fir->out_wi -= fir->out_delay_size;
> + fir_filter(ri, &ci, wi, in_delay, out_delay, coef,
> + dsm1, cfg->subfilter_length, shift, nch);
> +
> + wi += nch_x_odm;
> + if (wi >= fir->out_delay_size)
> + wi -= fir->out_delay_size;
> +
> + ri += nch_x_idm; /* Next sub-filter start */
> + if (ri >= fir->fir_delay_size)
> + ri -= fir->fir_delay_size;
> }
>
> /* Output */
> - m = s->y_inc * cfg->num_of_subfilters;
> + m = blk_out_bytes;
> while (m > 0) {
> - n_wrap_fir = (fir->out_delay_size - fir->out_ri)
> - * s->y_inc;
> + n_wrap_fir = (fir->out_delay_size - fir->out_ri) * sz;
> n_wrap_buf = s->y_end_addr - s->y_wptr;
> n_wrap_min = (n_wrap_fir < n_wrap_buf)
> ? n_wrap_fir : n_wrap_buf;
> n_min = (m < n_wrap_min) ? m : n_wrap_min;
> while (n_min > 0) {
> *s->y_wptr = fir->out_delay[fir->out_ri++];
> - s->y_wptr += s->y_inc;
> - n_min -= s->y_inc;
> - m -= s->y_inc;
> + s->y_wptr++;
> + n_min -= sz;
> + m -= sz;
> }
> /* Check wrap */
> src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
> @@ -522,38 +571,49 @@ void src_polyphase_stage_cir(struct src_stage_prm * s)
> }
> }
>
> -void src_polyphase_stage_cir_s24(struct src_stage_prm * s)
> +void src_polyphase_stage_cir_s24(struct src_stage_prm *s)
> {
> - struct src_state *fir = s->state;
> - struct src_stage *cfg = s->stage;
> int n;
> int m;
> int f;
> - int c;
> - int r;
> + int ci;
> + int ri;
> int n_wrap_fir;
> int n_wrap_buf;
> int n_wrap_min;
> int n_min;
> - int32_t z;
> - int32_t se;
> + int wi;
> +
> + struct src_state *fir = s->state;
> + struct src_stage *cfg = s->stage;
structs should be declared before local ints, this makes it easier to
reference them against params passed in
> + const void *coef = cfg->coefs;
> + int32_t *in_delay = fir->fir_delay;
> + int32_t *out_delay = fir->out_delay;
> + int dsm1 = fir->fir_delay_size - 1;
> + int shift = cfg->shift;
> + int nch = s->nch;
> + int rewind = -nch * (cfg->blk_in
> + + (cfg->num_of_subfilters - 1) * cfg->idm) + nch - 1;
> + int nch_x_idm = cfg->idm * nch;
> + int nch_x_odm = cfg->odm * nch;
> + size_t sz = sizeof(int32_t);
> + int blk_in_bytes = nch * cfg->blk_in * sz;
> + int blk_out_bytes = nch * cfg->num_of_subfilters * sz;
>
> for (n = 0; n < s->times; n++) {
> /* Input data */
> - m = s->x_inc * cfg->blk_in;
> + m = blk_in_bytes;
> while (m > 0) {
> - n_wrap_fir = (fir->fir_delay_size - fir->fir_wi)
> - * s->x_inc;
> + n_wrap_fir = (fir->fir_delay_size - fir->fir_wi) * sz;
> n_wrap_buf = s->x_end_addr - s->x_rptr;
> n_wrap_min = (n_wrap_fir < n_wrap_buf)
> ? n_wrap_fir : n_wrap_buf;
> n_min = (m < n_wrap_min) ? m : n_wrap_min;
> while (n_min > 0) {
> - se = *s->x_rptr << 8;
> - fir->fir_delay[fir->fir_wi++] = se >> 8;
> - s->x_rptr += s->x_inc;
> - n_min -= s->x_inc;
> - m -= s->x_inc;
> + fir->fir_delay[fir->fir_wi++] = *s->x_rptr << 8;
> + s->x_rptr++;
> + n_min -= sz;
> + m -= sz;
> }
> /* Check for wrap */
> src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
> @@ -562,41 +622,38 @@ void src_polyphase_stage_cir_s24(struct src_stage_prm * s)
> }
>
> /* Filter */
> - c = 0;
> - r = fir->fir_wi - cfg->blk_in
> - - (cfg->num_of_subfilters - 1) * cfg->idm;
> - if (r < 0)
> - r += fir->fir_delay_size;
> + ci = 0; /* Reset to 1st coefficient */
> + ri = fir->fir_wi + rewind; /* Newest data for last subfilter */
> + if (ri < 0)
> + ri += fir->fir_delay_size;
>
> - fir->out_wi = fir->out_ri;
> + wi = fir->out_ri;
> for (f = 0; f < cfg->num_of_subfilters; f++) {
> - fir->fir_ri = r;
> - z = fir_filter(fir, cfg->coefs, &c,
> - cfg->subfilter_length, cfg->shift);
> - r += cfg->idm;
> - if (r >= fir->fir_delay_size)
> - r -= fir->fir_delay_size;
> -
> - fir->out_delay[fir->out_wi] = z;
> - fir->out_wi += cfg->odm;
> - if (fir->out_wi >= fir->out_delay_size)
> - fir->out_wi -= fir->out_delay_size;
> + fir_filter(ri, &ci, wi, in_delay, out_delay, coef,
> + dsm1, cfg->subfilter_length, shift, nch);
> +
> + wi += nch_x_odm;
> + if (wi >= fir->out_delay_size)
> + wi -= fir->out_delay_size;
> +
> + ri += nch_x_idm; /* Next sub-filter start */
> + if (ri >= fir->fir_delay_size)
> + ri -= fir->fir_delay_size;
> }
>
> /* Output */
> - m = s->y_inc * cfg->num_of_subfilters;
> + m = blk_out_bytes;
> while (m > 0) {
> - n_wrap_fir = (fir->out_delay_size - fir->out_ri)
> - * s->y_inc;
> + n_wrap_fir = (fir->out_delay_size - fir->out_ri) * sz;
> n_wrap_buf = s->y_end_addr - s->y_wptr;
> n_wrap_min = (n_wrap_fir < n_wrap_buf)
> ? n_wrap_fir : n_wrap_buf;
> n_min = (m < n_wrap_min) ? m : n_wrap_min;
> while (n_min > 0) {
> - *s->y_wptr = fir->out_delay[fir->out_ri++];
> - s->y_wptr += s->y_inc;
> - n_min -= s->y_inc;
> - m -= s->y_inc;
> + *s->y_wptr = fir->out_delay[fir->out_ri++] >> 8;
> + s->y_wptr++;
> + n_min -= sz;
> + m -= sz;
> }
> /* Check wrap */
> src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
> @@ -604,47 +661,5 @@ void src_polyphase_stage_cir_s24(struct src_stage_prm * s)
> fir->out_ri = 0;
> }
> }
> -}
> -
>
> -#ifdef MODULE_TEST
> -
> -void src_print_info(struct polyphase_src * src)
> -{
> -
> - int n1;
> - int n2;
> -
> - n1 = src->stage1->filter_length;
> - n2 = src->stage2->filter_length;
> - printf("SRC stages %d\n", src->number_of_stages);
> - printf("SRC input blk %d\n", src->blk_in);
> - printf("SRC output blk %d\n", src->blk_out);
> - printf("SRC stage1 %d times\n", src->stage1_times);
> - printf("SRC stage2 %d times\n", src->stage2_times);
> -
> - printf("SRC1 filter length %d\n", n1);
> - printf("SRC1 subfilter length %d\n", src->stage1->subfilter_length);
> - printf("SRC1 number of subfilters %d\n",
> - src->stage1->num_of_subfilters);
> - printf("SRC1 idm %d\n", src->stage1->idm);
> - printf("SRC1 odm %d\n", src->stage1->odm);
> - printf("SRC1 input blk %d\n", src->stage1->blk_in);
> - printf("SRC1 output blk %d\n", src->stage1->blk_out);
> - printf("SRC1 halfband %d\n", src->stage1->halfband);
> - printf("SRC1 FIR delay %d\n", src->state1.fir_delay_size);
> - printf("SRC1 out delay %d\n", src->state1.out_delay_size);
> -
> - printf("SRC2 filter length %d\n", n2);
> - printf("SRC2 subfilter length %d\n", src->stage2->subfilter_length);
> - printf("SRC2 number of subfilters %d\n",
> - src->stage2->num_of_subfilters);
> - printf("SRC2 idm %d\n", src->stage2->idm);
> - printf("SRC2 odm %d\n", src->stage2->odm);
> - printf("SRC2 input blk %d\n", src->stage2->blk_in);
> - printf("SRC2 output blk %d\n", src->stage2->blk_out);
> - printf("SRC2 halfband %d\n", src->stage2->halfband);
> - printf("SRC2 FIR delay %d\n", src->state2.fir_delay_size);
> - printf("SRC2 out delay %d\n", src->state2.out_delay_size);
> }
> -#endif
> diff --git a/src/audio/src_core.h b/src/audio/src_core.h
> index 3859e6f..3ea6028 100644
> --- a/src/audio/src_core.h
> +++ b/src/audio/src_core.h
> @@ -41,7 +41,7 @@ struct src_param {
> int out_s1;
> int out_s2;
> int sbuf_length;
> - int single_src;
> + int src_multich;
> int total;
> int blk_in;
> int blk_out;
> @@ -51,6 +51,7 @@ struct src_param {
> int stage2_times_max;
> int idx_in;
> int idx_out;
> + int nch;
> };
>
> struct src_stage {
> @@ -70,20 +71,13 @@ struct src_state {
> int fir_delay_size;
> int out_delay_size;
> int fir_wi;
> - int fir_ri;
> - int out_wi;
> int out_ri;
> int32_t *fir_delay;
> int32_t *out_delay;
> };
>
> struct polyphase_src {
> - int mute;
> int number_of_stages;
> - int blk_in;
> - int blk_out;
> - int stage1_times;
> - int stage2_times;
> struct src_stage *stage1;
> struct src_stage *stage2;
> struct src_state state1;
> @@ -91,15 +85,14 @@ struct polyphase_src {
> };
>
> struct src_stage_prm {
> + int nch;
> int times;
> int32_t *x_rptr;
> int32_t *x_end_addr;
> size_t x_size;
> - int x_inc;
> int32_t *y_wptr;
> int32_t *y_end_addr;
> size_t y_size;
> - int y_inc;
> struct src_state *state;
> struct src_stage *stage;
> };
> @@ -116,31 +109,6 @@ static inline void src_circ_dec_wrap(int32_t **ptr, int32_t *addr, size_t size)
> *ptr = (int32_t *) ((size_t) * ptr + size);
> }
>
> -static inline void src_polyphase_mute(struct polyphase_src *src)
> -{
> - src->mute = 1;
> -}
> -
> -static inline void src_polyphase_unmute(struct polyphase_src *src)
> -{
> - src->mute = 0;
> -}
> -
> -static inline int src_polyphase_getmute(struct polyphase_src *src)
> -{
> - return src->mute;
> -}
> -
> -static inline int src_polyphase_get_blk_in(struct polyphase_src *src)
> -{
> - return src->blk_in;
> -}
> -
> -static inline int src_polyphase_get_blk_out(struct polyphase_src *src)
> -{
> - return src->blk_out;
> -}
> -
> void src_polyphase_reset(struct polyphase_src *src);
>
> int src_polyphase_init(struct polyphase_src *src, struct src_param *p,
> @@ -162,8 +130,4 @@ int32_t src_output_rates(void);
>
> int src_ceil_divide(int a, int b);
>
> -#ifdef MODULE_TEST
> -void src_print_info(struct polyphase_src *src);
> -#endif
> -
> #endif
More information about the Sound-open-firmware
mailing list