New subject: [Sound-open-firmware] [PATCH] SRC: Support block sizes near or equal to period length

5 Oct 2017

This patch decouples the first and second stage in two phase conversion
that is used for out/in rate fractions with high numerator or denominator
value. For e.g. 44.1 -> 48 kHz 160/147 fraction the 1st 8/7 stage and
2nd 20/21 stage are executed number of times close to period length in
time used for the scheduling rate. The number of consumed and produced
samples per copy() is no more constant. The latency of 44.1 kHz is
decreased about 2 ms and processing load is less variable.
This patch removes mute feature due to non-compatibility with variable
length data processing. It can be introduced later if needed as different
implementation.
The polyphase filter is also optimized slightly. More optimizations will
follow.
Signed-off-by: Seppo Ingalsuo seppo.ingalsuo@linux.intel.com
---
 src/audio/src.c      | 354 +++++++++++++++++++++----------------------
 src/audio/src_core.c | 412 ++++++++++++++++++++++++---------------------------
 src/audio/src_core.h |  27 +++-
 3 files changed, 395 insertions(+), 398 deletions(-)

diff --git a/src/audio/src.c b/src/audio/src.c
index de84149..993c76b 100644
--- a/src/audio/src.c
+++ b/src/audio/src.c
@@ -56,170 +56,150 @@
 /* src component private data */
 struct comp_data {
    struct polyphase_src src[PLATFORM_MAX_CHANNELS];
+	struct src_param param;
    int32_t *delay_lines;
    uint32_t sink_rate;
    uint32_t source_rate;
-	uint32_t period_bytes; /* sink period */
-	int scratch_length; /* Buffer for stage1-stage2 */
+	int32_t *sbuf_w_ptr;
+	int32_t *sbuf_r_ptr;
+	int sbuf_avail;
    int sign_extend_s24; /* Set if need to copy sign bit to b24..b31 */
-	void (*src_func)(struct comp_dev *dev,
+	void (* src_func)(struct comp_dev *dev,
    	struct comp_buffer *source,
    	struct comp_buffer *sink,
-		int source_frames);
+		size_t *consumed,
+		size_t *produced);
 };
-/* Common mute function for 2s and 1s SRC. This preserves the same
- * buffer consume and produce pattern as normal operation.
- */
-static void src_muted_s32(struct comp_buffer *source, struct comp_buffer *sink,
-	int blk_in, int blk_out, int nch, int source_frames)
+/* Fallback function */
+static void fallback_s32(struct comp_dev *dev, struct comp_buffer *source,
+	struct comp_buffer *sink, size_t *bytes_read, size_t *bytes_written)
 {
-
-	int i;
-	int32_t *dest = (int32_t *) sink->w_ptr;
-	int32_t *end = (int32_t *) sink->end_addr;
-	int n_read = 0;
-	int n_max;
-	int n;
-	int n_written = 0;
-
-	for (i = 0; i < source_frames - blk_in + 1; i += blk_in) {
-		n_max = end - dest;
-		n = nch*blk_out;
-		if (n < n_max) {
-			bzero(dest, n * sizeof(int32_t));
-			dest += n;
-		} else {
-			/* Also case n_max == n is done here */
-			bzero(dest, n_max * sizeof(int32_t));
-			dest = (int32_t *) sink->addr;
-			bzero(dest, (n - n_max) * sizeof(int32_t));
-			dest += n - n_max;
-		}
-		n_read += nch*blk_in;
-		n_written += nch*blk_out;
-	}
-}
-
-/* Fallback function to just output muted samples and advance
- * pointers. Note that a buffer that is not having integer number of
- * frames in a period will drift since there is no similar blk in/out
- * check as for SRC.
- */
-static void fallback_s32(struct comp_dev *dev,
-	struct comp_buffer *source,
-	struct comp_buffer *sink,
-	int source_frames)
-{
-
-	struct comp_data *cd = comp_get_drvdata(dev);
-	int nch = dev->params.channels;
-	int blk_in = cd->src[0].blk_in;
-	int blk_out = cd->src[0].blk_out;
-
-	src_muted_s32(source, sink, blk_in, blk_out, nch, source_frames);
-
+	*bytes_read = 0;
+	*bytes_written = 0;
 }
/* Normal 2 stage SRC */
 static void src_2s_s32_default(struct comp_dev *dev,
    struct comp_buffer *source, struct comp_buffer *sink,
-	int source_frames)
+	size_t *bytes_read, size_t *bytes_written)
 {
-	int i;
-	int j;
    struct polyphase_src *s;
+	struct src_stage_prm s1;
+	struct src_stage_prm s2;
+	int j;
    struct comp_data *cd = comp_get_drvdata(dev);
-	int blk_in = cd->src[0].blk_in;
-	int blk_out = cd->src[0].blk_out;
-	int n_times1 = cd->src[0].stage1_times;
-	int n_times2 = cd->src[0].stage2_times;
-	int nch = dev->params.channels;
    int32_t *dest = (int32_t *) sink->w_ptr;
    int32_t *src = (int32_t *) source->r_ptr;
-	struct src_stage_prm s1;
-	struct src_stage_prm s2;
+	int32_t *sbuf_addr = cd->delay_lines;
+	int32_t *sbuf_end_addr = &cd->delay_lines[cd->param.sbuf_length];
+	int32_t sbuf_size = cd->param.sbuf_length * sizeof(int32_t);
+	const int nch = dev->params.channels;
+	const int s1_blk_in = cd->src[0].stage1->blk_in * nch;
+	const int s1_blk_out = cd->src[0].stage1->blk_out * nch;
+	const int s2_blk_in = cd->src[0].stage2->blk_in * nch;
+	const int s2_blk_out = cd->src[0].stage2->blk_out * nch;
+	int sbuf_free = cd->param.sbuf_length - cd->sbuf_avail;
+	int source_check = (source->avail >> 2) - s1_blk_in; /* for int32_t */
+	int sink_check = (sink->free >> 2) - s2_blk_out; /* For int32_t */
    int n_read = 0;
    int n_written = 0;
+	int n1 = 0;
+	int n2 = 0;
-	if (cd->src[0].mute) {
-		src_muted_s32(source, sink, blk_in, blk_out, nch, source_frames);
-		return;
-	}
-
-	s1.times = n_times1;
+	s1.times = 1;
    s1.x_end_addr = source->end_addr;
    s1.x_size = source->size;
    s1.x_inc = nch;
-	s1.y_end_addr = &cd->delay_lines[cd->scratch_length];
-	s1.y_size = cd->scratch_length * sizeof(int32_t);
-	s1.y_inc = 1;
-
-	s2.times = n_times2;
-	s2.x_end_addr = &cd->delay_lines[cd->scratch_length];
-	s2.x_size = cd->scratch_length * sizeof(int32_t);
-	s2.x_inc = 1;
+	s1.y_end_addr = sbuf_end_addr;
+	s1.y_size = sbuf_size;
+	s1.y_inc = nch;
+
+	s2.times = 1;
+	s2.x_end_addr = sbuf_end_addr;
+	s2.x_size = sbuf_size;
+	s2.x_inc = nch;
    s2.y_end_addr = sink->end_addr;
    s2.y_size = sink->size;
    s2.y_inc = nch;
-	s1.x_rptr = src + nch - 1;
-	s2.y_wptr = dest + nch - 1;
-
-	for (j = 0; j < nch; j++) {
-		s = &cd->src[j]; /* Point to src[] for this channel */
-		s1.x_rptr = src++;
-		s2.y_wptr = dest++;
-		s1.state = &s->state1;
-		s1.stage = s->stage1;
-		s2.state = &s->state2;
-		s2.stage = s->stage2;
-
-		for (i = 0; i < source_frames - blk_in + 1; i += blk_in) {
-			/* Reset output to buffer start, read interleaved */
-			s1.y_wptr = cd->delay_lines;
-			s2.x_rptr = cd->delay_lines;
-			if (cd->sign_extend_s24) {
+	/* 1st stage runs once a long multiplied length block.
+	 * The stage buffer much be large enough to fit one s1 output block
+	 * plus one s2 input block plus jitter in s2 consumption.
+	 */
+	while ((n1 < cd->param.stage1_times_max)
+		&& (n_read <= source_check)
+		&& (sbuf_free >= s1_blk_out)) {
+		for (j = 0; j < nch; j++) {
+			s = &cd->src[j]; /* Point to src[] for this channel */
+			s1.x_rptr = src++;
+			s1.y_wptr = cd->sbuf_w_ptr++;
+			src_circ_inc_wrap(&src, source->end_addr, source->size);
+			src_circ_inc_wrap(&cd->sbuf_w_ptr, sbuf_end_addr, sbuf_size);
+			s1.state = &s->state1;
+			s1.stage = s->stage1;
+			if (cd->sign_extend_s24)
    			src_polyphase_stage_cir_s24(&s1);
-				src_polyphase_stage_cir_s24(&s2);
-			} else {
+			else
    			src_polyphase_stage_cir(&s1);
+
+		}
+		n_read += s1_blk_in;
+		cd->sbuf_avail += s1_blk_out;
+		sbuf_free -= s1_blk_out;
+		src = s1.x_rptr - nch + 1;
+		cd->sbuf_w_ptr = s1.y_wptr - nch + 1;
+		src_circ_dec_wrap(&src, source->addr, source->size);
+		src_circ_dec_wrap(&cd->sbuf_w_ptr, sbuf_addr, sbuf_size);
+		n1++;
+	}
+
+	/* 2nd stage runs as many min size blocks as buffers allow */
+	while ((n2 < cd->param.stage2_times_max)
+		&& (cd->sbuf_avail >= s2_blk_in)
+		&& (n_written <= sink_check)) {
+		for (j = 0; j < nch; j++) {
+			s2.x_rptr = cd->sbuf_r_ptr++;
+			s2.y_wptr = dest++;
+			src_circ_inc_wrap(&cd->sbuf_r_ptr, sbuf_end_addr, sbuf_size);
+			src_circ_inc_wrap(&dest, sink->end_addr, sink->size);
+			s = &cd->src[j]; /* Point to src[] for this channel */
+			s2.state = &s->state2;
+			s2.stage = s->stage2;
+			if (cd->sign_extend_s24)
+				src_polyphase_stage_cir_s24(&s2);
+			else
    			src_polyphase_stage_cir(&s2);
-			}
-			n_read += blk_in;
-			n_written += blk_out;
    	}
+		cd->sbuf_r_ptr = s2.x_rptr - nch + 1;
+		dest = s2.y_wptr - nch + 1;
+		src_circ_dec_wrap(&cd->sbuf_r_ptr, sbuf_addr, sbuf_size);
+		src_circ_dec_wrap(&dest, sink->addr, sink->size);
+		n_written += s2_blk_out;
+		cd->sbuf_avail -= s2_blk_in;
+		n2++;
    }
+	*bytes_read = sizeof(int32_t) * n_read;
+	*bytes_written = sizeof(int32_t) * n_written;
 }
/* 1 stage SRC for simple conversions */
 static void src_1s_s32_default(struct comp_dev *dev,
    struct comp_buffer *source, struct comp_buffer *sink,
-	int source_frames)
+	size_t *bytes_read, size_t *bytes_written)
 {
-	int i;
-	int j;
    struct polyphase_src *s;
-
+	struct src_stage_prm s1;
+	int j;
    struct comp_data *cd = comp_get_drvdata(dev);
-	int blk_in = cd->src[0].blk_in;
-	int blk_out = cd->src[0].blk_out;
-	int n_times = cd->src[0].stage1_times;
-	int nch = dev->params.channels;
    int32_t *dest = (int32_t *) sink->w_ptr;
    int32_t *src = (int32_t *) source->r_ptr;
+	int nch = dev->params.channels;
    int n_read = 0;
    int n_written = 0;
-	struct src_stage_prm s1;
-	if (cd->src[0].mute) {
-		src_muted_s32(source, sink, blk_in, blk_out, nch,
-			source_frames);
-		return;
-	}
-
-	s1.times = n_times;
+	s1.times = cd->param.stage1_times;
    s1.x_end_addr = source->end_addr;
    s1.x_size = source->size;
    s1.x_inc = nch;
@@ -233,20 +213,56 @@ static void src_1s_s32_default(struct comp_dev *dev,
    	s = &cd->src[j]; /* Point to src for this channel */
    	s1.x_rptr = src++;
    	s1.y_wptr = dest++;
+		src_circ_inc_wrap(&src, source->end_addr, source->size);
+		src_circ_inc_wrap(&dest, sink->end_addr, sink->size);
    	s1.state = &s->state1;
    	s1.stage = s->stage1;
+		if (cd->sign_extend_s24)
+			src_polyphase_stage_cir_s24(&s1);
+		else
+			src_polyphase_stage_cir(&s1);
-		for (i = 0; i + blk_in - 1 < source_frames; i += blk_in) {
-			if (cd->sign_extend_s24)
-				src_polyphase_stage_cir_s24(&s1);
-			else
-				src_polyphase_stage_cir(&s1);
+		n_read += cd->param.blk_in;
+		n_written += cd->param.blk_out;
+	}
+	*bytes_read = n_read * sizeof(int32_t);
+	*bytes_written = n_written * sizeof(int32_t);
+}
-			n_read += blk_in;
-			n_written += blk_out;
-		}
+/* A fast copy function for same in and out rate */
+static void src_copy_s32_default(struct comp_dev *dev,
+	struct comp_buffer *source, struct comp_buffer *sink,
+	size_t *bytes_read, size_t *bytes_written)
+{
+	struct comp_data *cd = comp_get_drvdata(dev);
+	int32_t *src = (int32_t *) source->r_ptr;
+	int32_t *snk = (int32_t *) sink->w_ptr;
+	int nch = dev->params.channels;
+	int frames = cd->param.blk_in;
+	int n;
+	int n_wrap_src;
+	int n_wrap_snk;
+	int n_wrap_min;
+	int n_copy;
+
+	n = frames * nch;
+	while (n > 0) {
+		n_wrap_src = (int32_t *) source->end_addr - src;
+		n_wrap_snk = (int32_t *) sink->end_addr - snk;
+		n_wrap_min = (n_wrap_src < n_wrap_snk) ? n_wrap_src : n_wrap_snk;
+		n_copy = (n < n_wrap_min) ? n : n_wrap_min;
+		memcpy(snk, src, n_copy * sizeof(int32_t));
+
+		/* Update and check both source and destination for wrap */
+		n -= n_copy;
+		src += n_copy;
+		snk += n_copy;
+		src_circ_inc_wrap(&src, source->end_addr, source->size);
+		src_circ_inc_wrap(&snk, sink->end_addr, sink->size);
}
+	*bytes_read = frames * nch * sizeof(int32_t);
+	*bytes_written = frames * nch * sizeof(int32_t);
 }
static struct comp_dev *src_new(struct sof_ipc_comp *comp)
@@ -313,7 +329,6 @@ static int src_params(struct comp_dev *dev)
    struct comp_data *cd = comp_get_drvdata(dev);
    struct comp_buffer *sink;
    struct comp_buffer *source;
-	struct src_alloc need;
    size_t delay_lines_size;
    uint32_t source_rate;
    uint32_t sink_rate;
@@ -359,7 +374,7 @@ static int src_params(struct comp_dev *dev)
    }
/* Allocate needed memory for delay lines */
-	err = src_buffer_lengths(&need, source_rate, sink_rate,
+	err = src_buffer_lengths(&cd->param, source_rate, sink_rate,
    	params->channels, dev->frames, frames_is_for_source);
    if (err < 0) {
    	trace_src_error("sr1");
@@ -370,7 +385,7 @@ static int src_params(struct comp_dev *dev)
    	return err;
    }
-	delay_lines_size = sizeof(int32_t) * need.total;
+	delay_lines_size = sizeof(int32_t) * cd->param.total;
    if (delay_lines_size == 0) {
    	trace_src_error("sr2");
    	return -EINVAL;
@@ -389,17 +404,24 @@ static int src_params(struct comp_dev *dev)
/* Clear all delay lines here */
    memset(cd->delay_lines, 0, delay_lines_size);
-	cd->scratch_length = need.scratch;
-	buffer_start = cd->delay_lines + need.scratch;
+	buffer_start = cd->delay_lines + cd->param.sbuf_length;
/* Initize SRC for actual sample rate */
    nch = MIN(params->channels, PLATFORM_MAX_CHANNELS);
    for (i = 0; i < nch; i++) {
-		n = src_polyphase_init(&cd->src[i], &need, buffer_start);
-		buffer_start += need.single_src;
+		n = src_polyphase_init(&cd->src[i], &cd->param, buffer_start);
+		buffer_start += cd->param.single_src;
    }
+	/* Reset stage buffer */
+	cd->sbuf_r_ptr = cd->delay_lines;
+	cd->sbuf_w_ptr = cd->delay_lines;
+	cd->sbuf_avail = 0;
+
    switch (n) {
+	case 0:
+		cd->src_func = src_copy_s32_default; /* 1:1 fast copy */
+		break;
    case 1:
    	cd->src_func = src_1s_s32_default; /* Simpler 1 stage SRC */
    	break;
@@ -422,19 +444,13 @@ static int src_params(struct comp_dev *dev)
     */
    dev->frame_bytes =
    	dev->params.sample_container_bytes * dev->params.channels;
-	cd->period_bytes = dev->frames * dev->frame_bytes;
/* The downstream buffer must be at least length of blk_out plus
-	 * dev->frames and an integer multiple of dev->frames. The
+	 * a dev->frames and an integer multiple of dev->frames. The
     * buffer_set_size will return an error if the required length would
     * be too long.
     */
-	q = need.blk_out / dev->frames;
-	if (q * (int)dev->frames < need.blk_out)
-		++q;
-
-	if (q * (int)dev->frames < need.blk_out + (int)dev->frames)
-		++q;
+	q = src_ceil_divide(cd->param.blk_out, (int) dev->frames) + 1;
/* Configure downstream buffer */
    sink = list_first_item(&dev->bsink_list, struct comp_buffer,
@@ -452,7 +468,7 @@ static int src_params(struct comp_dev *dev)
    /* Check that source buffer has sufficient size */
    source = list_first_item(&dev->bsource_list, struct comp_buffer,
    	sink_list);
-	if (source->size < need.blk_in * dev->frame_bytes) {
+	if (source->size < cd->param.blk_in * dev->frame_bytes) {
    	trace_src_error("eSy");
    	return -EINVAL;
    }
@@ -463,28 +479,8 @@ static int src_params(struct comp_dev *dev)
static int src_ctrl_cmd(struct comp_dev *dev, struct sof_ipc_ctrl_data *cdata)
 {
-	struct comp_data *cd = comp_get_drvdata(dev);
-	int i;
-
-	switch (cdata->cmd) {
-	case SOF_CTRL_CMD_MUTE:
-		trace_src("SMu");
-		for (i = 0; i < PLATFORM_MAX_CHANNELS; i++)
-			src_polyphase_mute(&cd->src[i]);
-
-		break;
-	case SOF_CTRL_CMD_UNMUTE:
-		trace_src("SUm");
-		for (i = 0; i < PLATFORM_MAX_CHANNELS; i++)
-			src_polyphase_unmute(&cd->src[i]);
-
-		break;
-	default:
-		trace_src_error("ec1");
-		return -EINVAL;
-	}
-
-	return 0;
+	trace_src_error("ec1");
+	return -EINVAL;
 }
/* used to pass standard and bespoke commands (with data) to component */
@@ -513,8 +509,8 @@ static int src_copy(struct comp_dev *dev)
    struct comp_buffer *sink;
    int need_source;
    int need_sink;
-	int blk_in;
-	int blk_out;
+	size_t consumed = 0;
+	size_t produced = 0;
trace_comp("SRC");
@@ -525,23 +521,29 @@ static int src_copy(struct comp_dev *dev)
    	source_list);
/* Calculate needed amount of source buffer and sink buffer
-	 * for one SRC run.
+	 * for one SRC run. The blk_in and blk are minimum condition to
+	 * call copy. Copy can consume or produce a slightly larger block
+	 * with the rates where block sizes are not constant. E.g. for
+	 * 1 ms schduling the blocks can be under or above 1 ms when the
+	 * SRC interal block size constraint prevents exact 1 ms blocks.
     */
-	blk_in = src_polyphase_get_blk_in(&cd->src[0]);
-	blk_out = src_polyphase_get_blk_out(&cd->src[0]);
-	need_source = blk_in * dev->frame_bytes;
-	need_sink = blk_out * dev->frame_bytes;
+	need_source = cd->param.blk_in * dev->frame_bytes;
+	need_sink = cd->param.blk_out * dev->frame_bytes;
-	/* Run as many times as buffers allow */
-	while (((int) source->avail >= need_source) && ((int) sink->free >= need_sink)) {
+	/* Run SRC function if buffers avail and free allow */
+	if (((int) source->avail >= need_source) && ((int) sink->free >= need_sink)) {
    	/* Run src */
-		cd->src_func(dev, source, sink, blk_in);
+		cd->src_func(dev, source, sink, &consumed, &produced);
-		/* calc new free and available  */
-		comp_update_buffer_consume(source, need_source);
-		comp_update_buffer_produce(sink, need_sink);
-	}
+		/* Calc new free and available if data was processed. These
+		 * functions must not be called with 0 consumed/produced.
+		 */
+		if (consumed > 0)
+			comp_update_buffer_consume(source, consumed);
+		if (produced > 0)
+			comp_update_buffer_produce(sink, produced);
+	}
    return 0;
 }
diff --git a/src/audio/src_core.c b/src/audio/src_core.c
index 8098d87..4ee2efd 100644
--- a/src/audio/src_core.c
+++ b/src/audio/src_core.c
@@ -51,6 +51,18 @@ int sof_rates[SOF_RATES_LENGTH] = {8000, 11025, 12000, 16000, 18900,
    22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000, 176400,
    192000};
+/* Calculate ceil() for integer division */
+int src_ceil_divide(int a, int b)
+{
+	int c;
+
+	c = a / b;
+	if (c * b < a)
+		c++;
+
+	return c;
+}
+
 /* Calculates the needed FIR delay line length */
 static int src_fir_delay_length(struct src_stage *s)
 {
@@ -62,7 +74,7 @@ static int src_fir_delay_length(struct src_stage *s)
 static int src_out_delay_length(struct src_stage *s)
 {
-	return (s->num_of_subfilters - 1) * s->odm + 1;
+	return 1 + (s->num_of_subfilters - 1) * s->odm;
 }
/* Returns index of a matching sample rate */
@@ -108,16 +120,16 @@ int32_t src_output_rates(void)
 }
/* Calculates buffers to allocate for a SRC mode */
-int src_buffer_lengths(struct src_alloc *a, int fs_in, int fs_out, int nch,
-	int max_frames, int max_frames_is_for_source)
+int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch,
+	int frames, int frames_is_for_source)
 {
-	int blk_in;
-	int blk_out;
-	int k;
-	int s1_times;
-	int s2_times;
    struct src_stage *stage1;
    struct src_stage *stage2;
+	int k;
+	int q;
+	int den;
+	int num;
+	int frames2;
a->idx_in = src_find_fs(src_in_fs, NUM_IN_FS, fs_in);
    a->idx_out = src_find_fs(src_out_fs, NUM_OUT_FS, fs_out);
@@ -137,46 +149,59 @@ int src_buffer_lengths(struct src_alloc *a, int fs_in, int fs_out, int nch,
    a->fir_s1 = src_fir_delay_length(stage1);
    a->out_s1 = src_out_delay_length(stage1);
-	k = gcd(stage1->blk_out, stage2->blk_in);
-	s1_times = stage2->blk_in / k;
-	s2_times = s1_times * stage1->blk_out / stage2->blk_in;
-	blk_in = s1_times * stage1->blk_in;
-	blk_out = s2_times * stage2->blk_out;
-
    /* Find out how many additional times the SRC can be executed
       while having block size less or equal to max_frames.
     */
-	if (max_frames_is_for_source) {
-		k = max_frames / blk_in;
+	if (frames_is_for_source) {
+		/* Times that stage1 needs to run to input length of frames */
+		a->stage1_times_max = src_ceil_divide(frames, stage1->blk_in);
+		q = frames / stage1->blk_in;
+		a->stage1_times = MAX(q, 1);
+		a->blk_in = a->stage1_times * stage1->blk_in;
+
+		/* Times that stage2 needs to run */
+		den = stage2->blk_in * stage1->blk_in;
+		num = frames * stage2->blk_out * stage1->blk_out;
+		frames2 = src_ceil_divide(num, den);
+		a->stage2_times_max = src_ceil_divide(frames2, stage2->blk_out);
+		q = frames2 / stage2->blk_out;
+		a->stage2_times = MAX(q, 1);
+		a->blk_out = a->stage2_times * stage2->blk_out;
    } else {
-		k = max_frames / blk_out;
+		/* Times that stage2 needs to run to output length of frames */
+		a->stage2_times_max = src_ceil_divide(frames, stage2->blk_out);
+		q = frames / stage2->blk_out;
+		a->stage2_times = MAX(q, 1);
+		a->blk_out = a->stage2_times * stage2->blk_out;
+
+		/* Times that stage1 needs to run */
+		num = frames * stage2->blk_in * stage1->blk_in;
+		den = stage2->blk_out * stage1->blk_out;
+		frames2 = src_ceil_divide(num, den);
+		a->stage1_times_max = src_ceil_divide(frames2, stage1->blk_in);
+		q = frames2 / stage1->blk_in;
+		a->stage1_times = MAX(q, 1);
+		a->blk_in = a->stage1_times * stage1->blk_in;
    }
-	/* Mininum k is 1, when 0 max_frames is less than block length. In
-	 * that case need to check in src.c that sink/source size is large
-	 * enough for at least one block.
-	 */
-	if (k < 1)
-		k = 1;
-
-	a->blk_mult = k;
-	a->blk_in = blk_in * k;
-	a->blk_out = blk_out * k;
-	a->stage1_times = s1_times * k;
-	a->stage2_times = s2_times * k;
-
    if (stage2->filter_length == 1) {
    	a->fir_s2 = 0;
    	a->out_s2 = 0;
-		a->scratch = 0;
    	a->stage2_times = 0;
+		a->stage2_times_max = 0;
+		a->sbuf_length = 0;
    } else {
    	a->fir_s2 = src_fir_delay_length(stage2);
    	a->out_s2 = src_out_delay_length(stage2);
-		a->scratch = stage1->blk_out * s1_times * k;
+		/* 2x is an empirically tested length. Since the sink buffer
+		 * capability to receive samples varies a shorter stage 2 output
+		 * block will create a peak in internal buffer usage.
+		 */
+		a->sbuf_length = 2 * nch * stage1->blk_out * a->stage1_times_max;
    }
+
    a->single_src = a->fir_s1 + a->fir_s2 + a->out_s1 + a->out_s2;
-	a->total = a->scratch + nch * a->single_src;
+	a->total = a->sbuf_length + nch * a->single_src;
return 0;
 }
@@ -194,7 +219,7 @@ static void src_state_reset(struct src_state *state)
static int init_stages(
    struct src_stage *stage1, struct src_stage *stage2,
-	struct polyphase_src *src, struct src_alloc *res,
+	struct polyphase_src *src, struct src_param *p,
    int n, int32_t *delay_lines_start)
 {
    /* Clear FIR state */
@@ -204,29 +229,27 @@ static int init_stages(
    src->number_of_stages = n;
    src->stage1 = stage1;
    src->stage2 = stage2;
+	src->blk_in = p->blk_in;
+	src->blk_out = p->blk_out;
    if (n == 1) {
-		src->blk_in = stage1->blk_in * res->blk_mult;
-		src->blk_out = stage1->blk_out * res->blk_mult;
-		src->stage1_times = res->stage1_times;
+		src->stage1_times = p->stage1_times;
    	src->stage2_times = 0;
    	if (stage1->blk_out == 0)
    		return -EINVAL;
    } else {
-		src->stage1_times = res->stage1_times;
-		src->stage2_times = res->stage2_times;
-		src->blk_in = res->blk_in;
-		src->blk_out = res->blk_out;
+		src->stage1_times = p->stage1_times;
+		src->stage2_times = p->stage2_times;
    }
/* Delay line sizes */
-	src->state1.fir_delay_size = res->fir_s1;
-	src->state1.out_delay_size = res->out_s1;
+	src->state1.fir_delay_size = p->fir_s1;
+	src->state1.out_delay_size = p->out_s1;
    src->state1.fir_delay = delay_lines_start;
    src->state1.out_delay =
    	src->state1.fir_delay + src->state1.fir_delay_size;
    if (n > 1) {
-		src->state2.fir_delay_size = res->fir_s2;
-		src->state2.out_delay_size = res->out_s2;
+		src->state2.fir_delay_size = p->fir_s2;
+		src->state2.out_delay_size = p->out_s2;
    	src->state2.fir_delay =
    		src->state1.out_delay + src->state1.out_delay_size;
    	src->state2.out_delay =
@@ -251,7 +274,6 @@ static int init_stages(
    }
return 0;
-
 }
void src_polyphase_reset(struct polyphase_src *src)
@@ -269,7 +291,7 @@ void src_polyphase_reset(struct polyphase_src *src)
    src_state_reset(&src->state2);
 }
-int src_polyphase_init(struct polyphase_src *src, struct src_alloc *res,
+int src_polyphase_init(struct polyphase_src *src, struct src_param *p,
    int32_t *delay_lines_start)
 {
    int n_stages;
@@ -277,23 +299,28 @@ int src_polyphase_init(struct polyphase_src *src, struct src_alloc *res,
    struct src_stage *stage1;
    struct src_stage *stage2;
-	if ((res->idx_in < 0) || (res->idx_out < 0)) {
-		src->blk_in = res->blk_in;
-		src->blk_out = res->blk_out;
+	if ((p->idx_in < 0) || (p->idx_out < 0)) {
+		src->blk_in = p->blk_in;
+		src->blk_out = p->blk_out;
    	return -EINVAL;
    }
/* Get setup for 2 stage conversion */
-	stage1 = src_table1[res->idx_out][res->idx_in];
-	stage2 = src_table2[res->idx_out][res->idx_in];
-	ret = init_stages(stage1, stage2, src, res, 2, delay_lines_start);
+	stage1 = src_table1[p->idx_out][p->idx_in];
+	stage2 = src_table2[p->idx_out][p->idx_in];
+	ret = init_stages(stage1, stage2, src, p, 2, delay_lines_start);
    if (ret < 0)
    	return -EINVAL;
/* Get number of stages used for optimize opportunity. 2nd
     * stage length is one if conversion needs only one stage.
+	 * If input and output rate is the same return 0 to
+	 * use a simple copy function instead of 1 stage FIR with one
+	 * tap.
     */
    n_stages = (src->stage2->filter_length == 1) ? 1 : 2;
+	if (p->idx_in == p->idx_out)
+		n_stages = 0;
/* If filter length for first stage is zero this is a deleted
     * mode from in/out matrix. Computing of such SRC mode needs
@@ -311,14 +338,20 @@ int src_polyphase_init(struct polyphase_src *src, struct src_alloc *res,
 static inline void fir_part(int64_t *y, int ntaps, const int16_t c[], int *ic,
    int32_t d[], int *id)
 {
-	int64_t p;
    int n;
+	int64_t a = 0;
/* Data is Q1.31, coef is Q1.15, product is Q2.46 */
-	for (n = 0; n < ntaps; n++) {
-		p = (int64_t) c[(*ic)++] * d[(*id)--];
-		*y += p;
+	for (n = 0; n < (ntaps >> 1); n++) {
+		a += (int64_t) c[*ic] * d[*id]
+			+ (int64_t) c[*ic + 1] * d[*id - 1];
+		*ic += 2;
+		*id -= 2;
    }
+	if (ntaps & 1)
+		a += (int64_t) c[(*ic)++] * d[(*id)--];
+
+	*y += a;
 }
 #else
@@ -326,15 +359,20 @@ static inline void fir_part(int64_t *y, int ntaps, const int16_t c[], int *ic,
 static inline void fir_part(int64_t *y, int ntaps, const int32_t c[], int *ic,
    int32_t d[], int *id)
 {
-	int64_t p;
    int n;
+	int64_t a = 0;
/* Data is Q8.24, coef is Q1.23, product is Q9.47 */
-	for (n = 0; n < ntaps; n++) {
-
-		p = (int64_t) c[(*ic)++] * d[(*id)--];
-		*y += p;
+	for (n = 0; n < (ntaps >> 1); n++) {
+		a += (int64_t) c[*ic] * d[*id]
+			+ (int64_t) c[*ic + 1] * d[*id - 1];
+		*ic += 2;
+		*id -= 2;
    }
+	if (ntaps & 1)
+		a += (int64_t) c[(*ic)++] * d[(*id)--];
+
+	*y += a;
 }
 #endif
@@ -367,7 +405,7 @@ static inline int32_t fir_filter(
    /* Q2.46 -> Q2.31, saturate to Q1.31 */
    y = y >> (15 + shift);
-	return (int32_t) sat_int32(y);
+	return(int32_t) sat_int32(y);
 }
 #else
@@ -398,12 +436,14 @@ static inline int32_t fir_filter(
    /* Q9.47 -> Q9.24, saturate to Q8.24 */
    y = y >> (23 + shift);
-	return (int32_t) sat_int32(y);
+	return(int32_t) sat_int32(y);
 }
 #endif
-void src_polyphase_stage_cir(struct src_stage_prm *s)
+void src_polyphase_stage_cir(struct src_stage_prm * s)
 {
+	struct src_state *fir = s->state;
+	struct src_stage *cfg = s->stage;
    int n;
    int m;
    int f;
@@ -412,109 +452,80 @@ void src_polyphase_stage_cir(struct src_stage_prm *s)
    int n_wrap_fir;
    int n_wrap_buf;
    int n_wrap_min;
+	int n_min;
    int32_t z;
for (n = 0; n < s->times; n++) {
    	/* Input data */
-		m = s->x_inc * s->stage->blk_in;
+		m = s->x_inc * cfg->blk_in;
    	while (m > 0) {
-			n_wrap_fir =
-				(s->state->fir_delay_size - s->state->fir_wi)
+			n_wrap_fir = (fir->fir_delay_size - fir->fir_wi)
    			* s->x_inc;
    		n_wrap_buf = s->x_end_addr - s->x_rptr;
    		n_wrap_min = (n_wrap_fir < n_wrap_buf)
    			? n_wrap_fir : n_wrap_buf;
-			if (m < n_wrap_min) {
-				/* No circular wrap need */
-				while (m > 0) {
-					s->state->fir_delay[s->state->fir_wi++]
-						= *s->x_rptr;
-					s->x_rptr += s->x_inc;
-					m -= s->x_inc;
-				}
-			} else {
-				/* Wrap in n_wrap_min/x_inc samples */
-				while (n_wrap_min > 0) {
-					s->state->fir_delay[s->state->fir_wi++]
-						= *s->x_rptr;
-					s->x_rptr += s->x_inc;
-					n_wrap_min -= s->x_inc;
-					m -= s->x_inc;
-				}
-				/* Check both */
-				if (s->x_rptr >= s->x_end_addr)
-					s->x_rptr = (int32_t *)
-					((size_t) s->x_rptr - s->x_size);
-				if (s->state->fir_wi
-					== s->state->fir_delay_size)
-					s->state->fir_wi = 0;
+			n_min = (m < n_wrap_min) ? m : n_wrap_min;
+			while (n_min > 0) {
+				fir->fir_delay[fir->fir_wi++] = *s->x_rptr;
+				s->x_rptr += s->x_inc;
+				n_min -= s->x_inc;
+				m -= s->x_inc;
    		}
+			/* Check for wrap */
+			src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
+			if (fir->fir_wi == fir->fir_delay_size)
+				fir->fir_wi = 0;
    	}
/* Filter */
    	c = 0;
-		r = s->state->fir_wi - s->stage->blk_in
-			- (s->stage->num_of_subfilters - 1) * s->stage->idm;
+		r = fir->fir_wi - cfg->blk_in
+			- (cfg->num_of_subfilters - 1) * cfg->idm;
    	if (r < 0)
-			r += s->state->fir_delay_size;
-
-		s->state->out_wi = s->state->out_ri;
-		for (f = 0; f < s->stage->num_of_subfilters; f++) {
-			s->state->fir_ri = r;
-			z = fir_filter(s->state, s->stage->coefs, &c,
-				s->stage->subfilter_length, s->stage->shift);
-			r += s->stage->idm;
-			if (r > s->state->fir_delay_size - 1)
-				r -= s->state->fir_delay_size;
-
-			s->state->out_delay[s->state->out_wi] = z;
-			s->state->out_wi += s->stage->odm;
-			if (s->state->out_wi > s->state->out_delay_size - 1)
-				s->state->out_wi -= s->state->out_delay_size;
+			r += fir->fir_delay_size;
+
+		fir->out_wi = fir->out_ri;
+		for (f = 0; f < cfg->num_of_subfilters; f++) {
+			fir->fir_ri = r;
+			z = fir_filter(fir, cfg->coefs, &c,
+				cfg->subfilter_length, cfg->shift);
+			r += cfg->idm;
+			if (r >= fir->fir_delay_size)
+				r -= fir->fir_delay_size;
+
+			fir->out_delay[fir->out_wi] = z;
+			fir->out_wi += cfg->odm;
+			if (fir->out_wi >= fir->out_delay_size)
+				fir->out_wi -= fir->out_delay_size;
    	}
/* Output */
-		m = s->y_inc * s->stage->num_of_subfilters;
+		m = s->y_inc * cfg->num_of_subfilters;
    	while (m > 0) {
-			n_wrap_fir =
-				(s->state->out_delay_size - s->state->out_ri)
+			n_wrap_fir = (fir->out_delay_size - fir->out_ri)
    			* s->y_inc;
    		n_wrap_buf = s->y_end_addr - s->y_wptr;
    		n_wrap_min = (n_wrap_fir < n_wrap_buf)
    			? n_wrap_fir : n_wrap_buf;
-			if (m < n_wrap_min) {
-				/* No circular wrap need */
-				while (m > 0) {
-					*s->y_wptr = s->state->out_delay[
-						s->state->out_ri++];
-					s->y_wptr += s->y_inc;
-					m -= s->y_inc;
-				}
-			} else {
-				/* Wrap in n_wrap_min/y_inc samples */
-				while (n_wrap_min > 0) {
-					*s->y_wptr = s->state->out_delay[
-						s->state->out_ri++];
-					s->y_wptr += s->y_inc;
-					n_wrap_min -= s->y_inc;
-					m -= s->y_inc;
-				}
-				/* Check both */
-				if (s->y_wptr >= s->y_end_addr)
-					s->y_wptr =
-					(int32_t *)
-					((size_t) s->y_wptr - s->y_size);
-
-				if (s->state->out_ri
-					== s->state->out_delay_size)
-					s->state->out_ri = 0;
+			n_min = (m < n_wrap_min) ? m : n_wrap_min;
+			while (n_min > 0) {
+				*s->y_wptr = fir->out_delay[fir->out_ri++];
+				s->y_wptr += s->y_inc;
+				n_min -= s->y_inc;
+				m -= s->y_inc;
    		}
+			/* Check wrap */
+			src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
+			if (fir->out_ri == fir->out_delay_size)
+				fir->out_ri = 0;
    	}
    }
 }
-void src_polyphase_stage_cir_s24(struct src_stage_prm *s)
+void src_polyphase_stage_cir_s24(struct src_stage_prm * s)
 {
+	struct src_state *fir = s->state;
+	struct src_stage *cfg = s->stage;
    int n;
    int m;
    int f;
@@ -523,113 +534,82 @@ void src_polyphase_stage_cir_s24(struct src_stage_prm *s)
    int n_wrap_fir;
    int n_wrap_buf;
    int n_wrap_min;
-	int32_t se;
+	int n_min;
    int32_t z;
+	int32_t se;
for (n = 0; n < s->times; n++) {
    	/* Input data */
-		m = s->x_inc * s->stage->blk_in;
+		m = s->x_inc * cfg->blk_in;
    	while (m > 0) {
-			n_wrap_fir =
-				(s->state->fir_delay_size - s->state->fir_wi)
+			n_wrap_fir = (fir->fir_delay_size - fir->fir_wi)
    			* s->x_inc;
    		n_wrap_buf = s->x_end_addr - s->x_rptr;
    		n_wrap_min = (n_wrap_fir < n_wrap_buf)
    			? n_wrap_fir : n_wrap_buf;
-			if (m < n_wrap_min) {
-				/* No circular wrap need */
-				while (m > 0) {
-					se = *s->x_rptr << 8;
-					s->state->fir_delay[s->state->fir_wi++]
-						= se >> 8;
-					s->x_rptr += s->x_inc;
-					m -= s->x_inc;
-				}
-			} else {
-				/* Wrap in n_wrap_min/x_inc samples */
-				while (n_wrap_min > 0) {
-					se = *s->x_rptr << 8;
-					s->state->fir_delay[s->state->fir_wi++]
-						= se >> 8;
-					s->x_rptr += s->x_inc;
-					n_wrap_min -= s->x_inc;
-					m -= s->x_inc;
-				}
-				/* Check both */
-				if (s->x_rptr >= s->x_end_addr)
-					s->x_rptr = (int32_t *)
-					((size_t) s->x_rptr - s->x_size);
-				if (s->state->fir_wi
-					== s->state->fir_delay_size)
-					s->state->fir_wi = 0;
+			n_min = (m < n_wrap_min) ? m : n_wrap_min;
+			while (n_min > 0) {
+				se = *s->x_rptr << 8;
+				fir->fir_delay[fir->fir_wi++] = se >> 8;
+				s->x_rptr += s->x_inc;
+				n_min -= s->x_inc;
+				m -= s->x_inc;
    		}
+			/* Check for wrap */
+			src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
+			if (fir->fir_wi == fir->fir_delay_size)
+				fir->fir_wi = 0;
    	}
/* Filter */
    	c = 0;
-		r = s->state->fir_wi - s->stage->blk_in
-			- (s->stage->num_of_subfilters - 1) * s->stage->idm;
+		r = fir->fir_wi - cfg->blk_in
+			- (cfg->num_of_subfilters - 1) * cfg->idm;
    	if (r < 0)
-			r += s->state->fir_delay_size;
-
-		s->state->out_wi = s->state->out_ri;
-		for (f = 0; f < s->stage->num_of_subfilters; f++) {
-			s->state->fir_ri = r;
-			z = fir_filter(s->state, s->stage->coefs, &c,
-				s->stage->subfilter_length, s->stage->shift);
-			r += s->stage->idm;
-			if (r > s->state->fir_delay_size - 1)
-				r -= s->state->fir_delay_size;
-
-			s->state->out_delay[s->state->out_wi] = z;
-			s->state->out_wi += s->stage->odm;
-			if (s->state->out_wi > s->state->out_delay_size - 1)
-				s->state->out_wi -= s->state->out_delay_size;
+			r += fir->fir_delay_size;
+
+		fir->out_wi = fir->out_ri;
+		for (f = 0; f < cfg->num_of_subfilters; f++) {
+			fir->fir_ri = r;
+			z = fir_filter(fir, cfg->coefs, &c,
+				cfg->subfilter_length, cfg->shift);
+			r += cfg->idm;
+			if (r >= fir->fir_delay_size)
+				r -= fir->fir_delay_size;
+
+			fir->out_delay[fir->out_wi] = z;
+			fir->out_wi += cfg->odm;
+			if (fir->out_wi >= fir->out_delay_size)
+				fir->out_wi -= fir->out_delay_size;
    	}
/* Output */
-		m = s->y_inc * s->stage->num_of_subfilters;
+		m = s->y_inc * cfg->num_of_subfilters;
    	while (m > 0) {
-			n_wrap_fir =
-				(s->state->out_delay_size - s->state->out_ri)
+			n_wrap_fir = (fir->out_delay_size - fir->out_ri)
    			* s->y_inc;
    		n_wrap_buf = s->y_end_addr - s->y_wptr;
    		n_wrap_min = (n_wrap_fir < n_wrap_buf)
    			? n_wrap_fir : n_wrap_buf;
-			if (m < n_wrap_min) {
-				/* No circular wrap need */
-				while (m > 0) {
-					*s->y_wptr = s->state->out_delay[
-						s->state->out_ri++];
-					s->y_wptr += s->y_inc;
-					m -= s->y_inc;
-				}
-			} else {
-				/* Wrap in n_wrap_min/y_inc samples */
-				while (n_wrap_min > 0) {
-					*s->y_wptr = s->state->out_delay[
-						s->state->out_ri++];
-					s->y_wptr += s->y_inc;
-					n_wrap_min -= s->y_inc;
-					m -= s->y_inc;
-				}
-				/* Check both */
-				if (s->y_wptr >= s->y_end_addr)
-					s->y_wptr =
-					(int32_t *)
-					((size_t) s->y_wptr - s->y_size);
-
-				if (s->state->out_ri
-					== s->state->out_delay_size)
-					s->state->out_ri = 0;
+			n_min = (m < n_wrap_min) ? m : n_wrap_min;
+			while (n_min > 0) {
+				*s->y_wptr = fir->out_delay[fir->out_ri++];
+				s->y_wptr += s->y_inc;
+				n_min -= s->y_inc;
+				m -= s->y_inc;
    		}
+			/* Check wrap */
+			src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
+			if (fir->out_ri == fir->out_delay_size)
+				fir->out_ri = 0;
    	}
    }
 }
+
 #ifdef MODULE_TEST
-void src_print_info(struct polyphase_src *src)
+void src_print_info(struct polyphase_src * src)
 {
int n1;
diff --git a/src/audio/src_core.h b/src/audio/src_core.h
index a9f66d4..3859e6f 100644
--- a/src/audio/src_core.h
+++ b/src/audio/src_core.h
@@ -35,19 +35,20 @@
 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
-struct src_alloc {
+struct src_param {
    int fir_s1;
    int fir_s2;
    int out_s1;
    int out_s2;
-	int scratch;
+	int sbuf_length;
    int single_src;
    int total;
-	int blk_mult;
    int blk_in;
    int blk_out;
    int stage1_times;
    int stage2_times;
+	int stage1_times_max;
+	int stage2_times_max;
    int idx_in;
    int idx_out;
 };
@@ -103,6 +104,18 @@ struct src_stage_prm {
    struct src_stage *stage;
 };
+static inline void src_circ_inc_wrap(int32_t **ptr, int32_t *end, size_t size)
+{
+	if (*ptr >= end)
+		*ptr = (int32_t *) ((size_t) * ptr - size);
+}
+
+static inline void src_circ_dec_wrap(int32_t **ptr, int32_t *addr, size_t size)
+{
+	if (*ptr < addr)
+		*ptr = (int32_t *) ((size_t) * ptr + size);
+}
+
 static inline void src_polyphase_mute(struct polyphase_src *src)
 {
    src->mute = 1;
@@ -130,7 +143,7 @@ static inline int src_polyphase_get_blk_out(struct polyphase_src *src)
void src_polyphase_reset(struct polyphase_src *src);
-int src_polyphase_init(struct polyphase_src *src, struct src_alloc *res,
+int src_polyphase_init(struct polyphase_src *src, struct src_param *p,
    int32_t *delay_lines_start);
int src_polyphase(struct polyphase_src *src, int32_t x[], int32_t y[],
@@ -140,13 +153,15 @@ void src_polyphase_stage_cir(struct src_stage_prm *s);
void src_polyphase_stage_cir_s24(struct src_stage_prm *s);
-int src_buffer_lengths(struct src_alloc *a, int fs_in, int fs_out, int nch,
-	int max_frames, int max_frames_is_for_source);
+int src_buffer_lengths(struct src_param *p, int fs_in, int fs_out, int nch,
+	int frames, int frames_is_for_source);
int32_t src_input_rates(void);
int32_t src_output_rates(void);
+int src_ceil_divide(int a, int b);
+
 #ifdef MODULE_TEST
 void src_print_info(struct polyphase_src *src);
 #endif
-- 
2.11.0


    

[Sound-open-firmware] [PATCH] SRC: Support block sizes near or equal to period length

Seppo Ingalsuo

Liam Girdwood

Signed-off-by: Seppo Ingalsuo seppo.ingalsuo@linux.intel.com

tags (0)

participants (2)