Sound-open-firmware
Threads by month
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
March 2018
- 20 participants
- 144 discussions
[Sound-open-firmware] [PATCH] volume: fix bug pertaining to volume ramp down
by Ranjani Sridharan 12 Mar '18
by Ranjani Sridharan 12 Mar '18
12 Mar '18
This patch fixes the bug in the check for terminating the volume
ramp down.
Signed-off-by: Ranjani Sridharan <ranjani.sridharan(a)linux.intel.com>
---
src/audio/volume.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/audio/volume.c b/src/audio/volume.c
index 45a343e..51a5826 100644
--- a/src/audio/volume.c
+++ b/src/audio/volume.c
@@ -335,7 +335,7 @@ static uint64_t vol_work(void *data, uint64_t delay)
vol -= VOL_RAMP_STEP;
/* ramp completed ? */
- if (vol <= cd->tvolume[i] || vol >= VOL_MAX)
+ if (vol <= cd->tvolume[i] || vol <= VOL_MIN)
vol_update(cd, i);
else {
cd->volume[i] = vol;
--
2.14.1
2
1
10 Mar '18
Clean up SSPSP bits to make SSP output follow ALSA ASoC definition
of all I2S/DSP_A/DSP_B/LEFT_J modes.
Signed-off-by: Keyon Jie <yang.jie(a)linux.intel.com>
---
src/drivers/apl-ssp.c | 88 ++++++++++++++++++++++++++++++++++++++------------
src/include/reef/ssp.h | 3 ++
2 files changed, 71 insertions(+), 20 deletions(-)
diff --git a/src/drivers/apl-ssp.c b/src/drivers/apl-ssp.c
index c32bd35..42ffd03 100644
--- a/src/drivers/apl-ssp.c
+++ b/src/drivers/apl-ssp.c
@@ -93,7 +93,9 @@ static inline int ssp_set_config(struct dai *dai,
uint32_t i2s_n;
uint32_t data_size;
uint32_t start_delay;
+ uint32_t dummy_stop;
uint32_t frame_len = 0;
+ uint32_t bdiv_min;
bool inverted_frame = false;
int ret = 0;
@@ -265,8 +267,7 @@ static inline int ssp_set_config(struct dai *dai,
/* must be enouch BCLKs for data */
bdiv = config->bclk / config->fclk;
- if (bdiv < config->sample_container_bits *
- ((config->rx_slot_mask + 1) / 2)) {
+ if (bdiv < config->sample_container_bits * config->num_slots) {
trace_ssp_error("ec8");
ret = -EINVAL;
goto out;
@@ -283,16 +284,27 @@ static inline int ssp_set_config(struct dai *dai,
switch (config->format & SOF_DAI_FMT_FORMAT_MASK) {
case SOF_DAI_FMT_I2S:
- start_delay = config->sample_container_bits >
- config->sample_valid_bits ? 1 : 0;
+ start_delay = 1;
sscr0 |= SSCR0_FRDC(config->num_slots);
- /* set asserted frame length */
- frame_len = config->sample_container_bits;
+ if (bdiv % 2) {
+ trace_ssp_error("eca");
+ ret = -EINVAL;
+ goto out;
+ }
- /* handle frame polarity, I2S default is falling/active low */
- sspsp |= SSPSP_SFRMP(!inverted_frame);
+ /* set asserted frame length to half frame length */
+ frame_len = bdiv / 2;
+
+ /*
+ * handle frame polarity, I2S default is falling/active low,
+ * non-inverted(inverted_frame=0) -- active low(SFRMP=0),
+ * inverted(inverted_frame=1) -- rising/active high(SFRMP=1),
+ * so, we should set SFRMP to inverted_frame.
+ */
+ sspsp |= SSPSP_SFRMP(inverted_frame);
+ sspsp |= SSPSP_FSRT;
break;
@@ -305,25 +317,41 @@ static inline int ssp_set_config(struct dai *dai,
/* LJDFD enable */
sscr2 &= ~SSCR2_LJDFD;
- /* set asserted frame length */
- frame_len = config->sample_container_bits;
+ if (bdiv % 2) {
+ trace_ssp_error("ecb");
+ ret = -EINVAL;
+ goto out;
+ }
- /* LEFT_J default is rising/active high, opposite of I2S */
- sspsp |= SSPSP_SFRMP(inverted_frame);
+ /* set asserted frame length to half frame length */
+ frame_len = bdiv / 2;
+
+ /*
+ * handle frame polarity, LEFT_J default is rising/active high,
+ * non-inverted(inverted_frame=0) -- active high(SFRMP=1),
+ * inverted(inverted_frame=1) -- falling/active low(SFRMP=0),
+ * so, we should set SFRMP to !inverted_frame.
+ */
+ sspsp |= SSPSP_SFRMP(!inverted_frame);
break;
case SOF_DAI_FMT_DSP_A:
- start_delay = config->sample_container_bits >
- config->sample_valid_bits ? 1 : 0;
+ start_delay = 1;
sscr0 |= SSCR0_MOD | SSCR0_FRDC(config->num_slots);
/* set asserted frame length */
- frame_len = config->sample_container_bits;
+ frame_len = 1;
- /* handle frame polarity, DSP_A default is rising/active high */
- sspsp |= SSPSP_SFRMP(inverted_frame);
+ /*
+ * handle frame polarity, DSP_A default is rising/active high,
+ * non-inverted(inverted_frame=0) -- active high(SFRMP=1),
+ * inverted(inverted_frame=1) -- falling/active low(SFRMP=0),
+ * so, we should set SFRMP to !inverted_frame.
+ */
+ sspsp |= SSPSP_SFRMP(!inverted_frame);
+ sspsp |= SSPSP_FSRT;
break;
case SOF_DAI_FMT_DSP_B:
@@ -335,9 +363,13 @@ static inline int ssp_set_config(struct dai *dai,
/* set asserted frame length */
frame_len = 1;
- /* handle frame polarity, DSP_A default is rising/active high */
+ /*
+ * handle frame polarity, DSP_B default is rising/active high,
+ * non-inverted(inverted_frame=0) -- active high(SFRMP=1),
+ * inverted(inverted_frame=1) -- falling/active low(SFRMP=0),
+ * so, we should set SFRMP to !inverted_frame.
+ */
sspsp |= SSPSP_SFRMP(!inverted_frame);
- sspsp |= SSPSP_FSRT;
break;
default:
@@ -346,9 +378,25 @@ static inline int ssp_set_config(struct dai *dai,
goto out;
}
- sspsp |= SSPSP_DMYSTRT(start_delay);
+ sspsp |= SSPSP_STRTDLY(start_delay);
sspsp |= SSPSP_SFRMWDTH(frame_len);
+ /*
+ * [dummy_start][valid_bits_slot[0...n-1]][dummy_stop],
+ * but don't count dummy_start for dummy_stop calculation.
+ */
+ bdiv_min = config->num_slots * config->sample_valid_bits;
+ if (bdiv < bdiv_min) {
+ trace_ssp_error("ecc");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ dummy_stop = bdiv - bdiv_min;
+ sspsp |= SSPSP_DMYSTOP(SSPSP_DMYSTOP_MASK & dummy_stop);
+ sspsp |= SSPSP_EDMYSTOP(SSPSP_EDMYSTOP_MASK &
+ (dummy_stop >> SSPSP_DMYSTOP_BITS));
+
data_size = config->sample_valid_bits;
if (data_size > 16)
diff --git a/src/include/reef/ssp.h b/src/include/reef/ssp.h
index e041121..453aaab 100644
--- a/src/include/reef/ssp.h
+++ b/src/include/reef/ssp.h
@@ -160,10 +160,13 @@ extern const struct dai_ops ssp_ops;
#define SSPSP_SFRMDLY(x) ((x) << 9)
#define SSPSP_SFRMWDTH(x) ((x) << 16)
#define SSPSP_DMYSTOP(x) ((x) << 23)
+#define SSPSP_DMYSTOP_BITS 2
+#define SSPSP_DMYSTOP_MASK ((0x1 << SSPSP_DMYSTOP_BITS) - 1)
#define SSPSP_FSRT (1 << 25)
#if defined CONFIG_APOLLOLAKE || defined CONFIG_CANNONLAKE
#define SSPSP_EDMYSTOP(x) ((x) << 26)
+#define SSPSP_EDMYSTOP_MASK 0x7
#define SSTSS 0x38
#define SSCR2 0x40
#define SSPSP2 0x44
--
2.14.1
4
3
[Sound-open-firmware] [PATCH_V2 2/2] cnl: dma: refine dma interrupt processing on cnl
by Rander Wang 09 Mar '18
by Rander Wang 09 Mar '18
09 Mar '18
On cnl, all the dma share the same interrupt pin, so
all the dma controller need to be checked in interrupt
function.
---
V2: rebase on master
Tested with:
SOF master
kernel:pierre V4.14 branch
Signed-off-by: Rander Wang <rander.wang(a)linux.intel.com>
---
src/drivers/dw-dma.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/src/drivers/dw-dma.c b/src/drivers/dw-dma.c
index ccefc50..3d92b21 100644
--- a/src/drivers/dw-dma.c
+++ b/src/drivers/dw-dma.c
@@ -1138,6 +1138,26 @@ static void dw_dma_irq_handler(void *data)
}
}
+/*All the dma share the same interrupt on CNL, so check each dma*/
+/*controller when interrupt coming, then do the work */
+static void dw_dma_irq_cnl(void *data)
+{
+ struct dma *dma;
+ uint32_t status_intr;
+
+ /*check interrupt status of DMA controller 0*/
+ dma = dma_get(DMA_GP_LP_DMAC0);
+ status_intr = dw_read(dma, DW_INTR_STATUS);
+ if (status_intr)
+ dw_dma_irq_handler(dma);
+
+ /*check interrupt status of DMA controller 1*/
+ dma = dma_get(DMA_GP_LP_DMAC1);
+ status_intr = dw_read(dma, DW_INTR_STATUS);
+ if (status_intr)
+ dw_dma_irq_handler(dma);
+}
+
static int dw_dma_probe(struct dma *dma)
{
struct dma_pdata *dw_pdata;
@@ -1160,7 +1180,11 @@ static int dw_dma_probe(struct dma *dma)
}
/* register our IRQ handler */
+#if defined CONFIG_CANNONLAKE
+ interrupt_register(dma_irq(dma), dw_dma_irq_cnl, dma);
+#else
interrupt_register(dma_irq(dma), dw_dma_irq_handler, dma);
+#endif
interrupt_enable(dma_irq(dma));
return 0;
--
2.14.1
2
1
[Sound-open-firmware] [PATCH] ssp: fix inverted_frame handling for BYT and APL/CNL
by Pierre-Louis Bossart 09 Mar '18
by Pierre-Louis Bossart 09 Mar '18
09 Mar '18
inverted_frame variable should only be set in the _IF
cases
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
---
This should apply on top of Keyon's APL/CNL fixes
src/drivers/apl-ssp.c | 2 +-
src/drivers/ssp.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/drivers/apl-ssp.c b/src/drivers/apl-ssp.c
index 42ffd03..1ee107c 100644
--- a/src/drivers/apl-ssp.c
+++ b/src/drivers/apl-ssp.c
@@ -196,6 +196,7 @@ static inline int ssp_set_config(struct dai *dai,
case SOF_DAI_FMT_NB_NF:
break;
case SOF_DAI_FMT_NB_IF:
+ inverted_frame = true; /* handled later with format */
break;
case SOF_DAI_FMT_IB_IF:
sspsp |= SSPSP_SCMODE(2);
@@ -203,7 +204,6 @@ static inline int ssp_set_config(struct dai *dai,
break;
case SOF_DAI_FMT_IB_NF:
sspsp |= SSPSP_SCMODE(2);
- inverted_frame = true; /* handled later with format */
break;
default:
trace_ssp_error("ec3");
diff --git a/src/drivers/ssp.c b/src/drivers/ssp.c
index 4b195ec..827a4d8 100644
--- a/src/drivers/ssp.c
+++ b/src/drivers/ssp.c
@@ -223,6 +223,7 @@ static inline int ssp_set_config(struct dai *dai,
case SOF_DAI_FMT_NB_NF:
break;
case SOF_DAI_FMT_NB_IF:
+ inverted_frame = true; /* handled later with format */
break;
case SOF_DAI_FMT_IB_IF:
sspsp |= SSPSP_SCMODE(2);
@@ -230,7 +231,6 @@ static inline int ssp_set_config(struct dai *dai,
break;
case SOF_DAI_FMT_IB_NF:
sspsp |= SSPSP_SCMODE(2);
- inverted_frame = true; /* handled later with format */
break;
default:
trace_ssp_error("ec3");
--
2.14.1
1
0
[Sound-open-firmware] [PATCH 1/6] SRC: Files structure change and add Xtensa optimized versions
by Seppo Ingalsuo 09 Mar '18
by Seppo Ingalsuo 09 Mar '18
09 Mar '18
This patch moves generic common code to src.c/h from src_core.c/h and
places generic C optimized filter to src_generic.c. The HiFi EP
version is in src_hifi2ep.c and HiFi3 version is in src_hifi3.c. Use of
the Xtensa optimized versions require xt-xcc compiler.
The non-used SRC in/out rates query code is removed. The 24 bit
coefficients were replaced by 32 bit coefficients those are compatible
with Xtensa fractional integer types.
Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo(a)linux.intel.com>
---
src/audio/Makefile.am | 4 +-
src/audio/src.c | 346 ++++++++++++++++++--
src/audio/{src_core.h => src.h} | 17 +-
src/audio/src_config.h | 57 +++-
src/audio/src_core.c | 676 ----------------------------------------
src/audio/src_generic.c | 435 ++++++++++++++++++++++++++
src/audio/src_hifi2ep.c | 562 +++++++++++++++++++++++++++++++++
src/audio/src_hifi3.c | 567 +++++++++++++++++++++++++++++++++
8 files changed, 1938 insertions(+), 726 deletions(-)
rename src/audio/{src_core.h => src.h} (93%)
delete mode 100644 src/audio/src_core.c
create mode 100644 src/audio/src_generic.c
create mode 100644 src/audio/src_hifi2ep.c
create mode 100644 src/audio/src_hifi3.c
diff --git a/src/audio/Makefile.am b/src/audio/Makefile.am
index bccedbf..ae58289 100644
--- a/src/audio/Makefile.am
+++ b/src/audio/Makefile.am
@@ -1006,7 +1006,9 @@ libaudio_a_SOURCES = \
fir.c \
tone.c \
src.c \
- src_core.c \
+ src_generic.c \
+ src_hifi2ep.c \
+ src_hifi3.c \
mixer.c \
mux.c \
volume.c \
diff --git a/src/audio/src.c b/src/audio/src.c
index c7ac649..cca0cbc 100644
--- a/src/audio/src.c
+++ b/src/audio/src.c
@@ -43,7 +43,17 @@
#include <reef/audio/component.h>
#include <reef/audio/pipeline.h>
#include <uapi/ipc.h>
-#include "src_core.h"
+
+#include "src_config.h"
+#include "src.h"
+
+#if SRC_SHORT
+#include <reef/audio/coefficients/src/src_tiny_int16_define.h>
+#include <reef/audio/coefficients/src/src_tiny_int16_table.h>
+#else
+#include <reef/audio/coefficients/src/src_std_int32_define.h>
+#include <reef/audio/coefficients/src/src_std_int32_table.h>
+#endif
#ifdef MODULE_TEST
#include <stdio.h>
@@ -53,6 +63,10 @@
#define tracev_src(__e) tracev_event(TRACE_CLASS_SRC, __e)
#define trace_src_error(__e) trace_error(TRACE_CLASS_SRC, __e)
+/* The FIR maximum lengths are per channel so need to multiply them */
+#define MAX_FIR_DELAY_SIZE_XNCH (PLATFORM_MAX_CHANNELS * MAX_FIR_DELAY_SIZE)
+#define MAX_OUT_DELAY_SIZE_XNCH (PLATFORM_MAX_CHANNELS * MAX_OUT_DELAY_SIZE)
+
/* src component private data */
struct comp_data {
struct polyphase_src src;
@@ -63,14 +77,273 @@ struct comp_data {
int32_t *sbuf_w_ptr;
int32_t *sbuf_r_ptr;
int sbuf_avail;
- void (* src_func)(struct comp_dev *dev,
+ void (*src_func)(struct comp_dev *dev,
struct comp_buffer *source,
struct comp_buffer *sink,
size_t *consumed,
size_t *produced);
- void (* polyphase_func)(struct src_stage_prm *s);
+ void (*polyphase_func)(struct src_stage_prm *s);
};
+/* Calculate ceil() for integer division */
+int src_ceil_divide(int a, int b)
+{
+ int c;
+
+ c = a / b;
+ if (c * b < a)
+ c++;
+
+ return c;
+}
+
+/* Calculates the needed FIR delay line length */
+static int src_fir_delay_length(struct src_stage *s)
+{
+ return s->subfilter_length + (s->num_of_subfilters - 1) * s->idm
+ + s->blk_in;
+}
+
+/* Calculates the FIR output delay line length */
+static int src_out_delay_length(struct src_stage *s)
+{
+ return 1 + (s->num_of_subfilters - 1) * s->odm;
+}
+
+/* Returns index of a matching sample rate */
+static int src_find_fs(int fs_list[], int list_length, int fs)
+{
+ int i;
+
+ for (i = 0; i < list_length; i++) {
+ if (fs_list[i] == fs)
+ return i;
+ }
+ return -EINVAL;
+}
+
+/* Calculates buffers to allocate for a SRC mode */
+int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch,
+ int frames, int frames_is_for_source)
+{
+ struct src_stage *stage1;
+ struct src_stage *stage2;
+ int q;
+ int den;
+ int num;
+ int frames2;
+
+ if (nch > PLATFORM_MAX_CHANNELS) {
+ trace_src_error("che");
+ tracev_value(nch);
+ return -EINVAL;
+ }
+
+ a->nch = nch;
+ a->idx_in = src_find_fs(src_in_fs, NUM_IN_FS, fs_in);
+ a->idx_out = src_find_fs(src_out_fs, NUM_OUT_FS, fs_out);
+
+ /* Check that both in and out rates are supported */
+ if (a->idx_in < 0 || a->idx_out < 0) {
+ trace_src_error("us1");
+ tracev_value(fs_in);
+ tracev_value(fs_out);
+ return -EINVAL;
+ }
+
+ stage1 = src_table1[a->idx_out][a->idx_in];
+ stage2 = src_table2[a->idx_out][a->idx_in];
+
+ /* Check from stage1 parameter for a deleted in/out rate combination.*/
+ if (stage1->filter_length < 1) {
+ trace_src_error("us2");
+ tracev_value(fs_in);
+ tracev_value(fs_out);
+ return -EINVAL;
+ }
+
+ a->fir_s1 = nch * src_fir_delay_length(stage1);
+ a->out_s1 = nch * src_out_delay_length(stage1);
+
+ /* Find out how many additional times the SRC can be executed
+ * while having block size less or equal to max_frames.
+ */
+ if (frames_is_for_source) {
+ /* Times that stage1 needs to run to input length of frames */
+ a->stage1_times_max = src_ceil_divide(frames, stage1->blk_in);
+ q = frames / stage1->blk_in;
+ a->stage1_times = MAX(q, 1);
+ a->blk_in = a->stage1_times * stage1->blk_in;
+
+ /* Times that stage2 needs to run */
+ den = stage2->blk_in * stage1->blk_in;
+ num = frames * stage2->blk_out * stage1->blk_out;
+ frames2 = src_ceil_divide(num, den);
+ a->stage2_times_max = src_ceil_divide(frames2, stage2->blk_out);
+ q = frames2 / stage2->blk_out;
+ a->stage2_times = MAX(q, 1);
+ a->blk_out = a->stage2_times * stage2->blk_out;
+ } else {
+ /* Times that stage2 needs to run to output length of frames */
+ a->stage2_times_max = src_ceil_divide(frames, stage2->blk_out);
+ q = frames / stage2->blk_out;
+ a->stage2_times = MAX(q, 1);
+ a->blk_out = a->stage2_times * stage2->blk_out;
+
+ /* Times that stage1 needs to run */
+ num = frames * stage2->blk_in * stage1->blk_in;
+ den = stage2->blk_out * stage1->blk_out;
+ frames2 = src_ceil_divide(num, den);
+ a->stage1_times_max = src_ceil_divide(frames2, stage1->blk_in);
+ q = frames2 / stage1->blk_in;
+ a->stage1_times = MAX(q, 1);
+ a->blk_in = a->stage1_times * stage1->blk_in;
+ }
+
+ if (stage2->filter_length == 1) {
+ a->fir_s2 = 0;
+ a->out_s2 = 0;
+ a->stage2_times = 0;
+ a->stage2_times_max = 0;
+ a->sbuf_length = 0;
+ } else {
+ a->fir_s2 = nch * src_fir_delay_length(stage2);
+ a->out_s2 = nch * src_out_delay_length(stage2);
+ /* 2x is an empirically tested length. Since the sink buffer
+ * capability to receive samples varies a shorter stage 2 output
+ * block will create a peak in internal buffer usage.
+ */
+
+ /* TODO 1: Equation for needed length */
+ a->sbuf_length = 2 * nch * stage1->blk_out
+ * a->stage1_times_max;
+ }
+
+ a->src_multich = a->fir_s1 + a->fir_s2 + a->out_s1 + a->out_s2;
+ a->total = a->sbuf_length + a->src_multich;
+
+ return 0;
+}
+
+static void src_state_reset(struct src_state *state)
+{
+ state->fir_delay_size = 0;
+ state->out_delay_size = 0;
+}
+
+static int init_stages(struct src_stage *stage1, struct src_stage *stage2,
+ struct polyphase_src *src, struct src_param *p,
+ int n, int32_t *delay_lines_start)
+{
+ /* Clear FIR state */
+ src_state_reset(&src->state1);
+ src_state_reset(&src->state2);
+
+ src->number_of_stages = n;
+ src->stage1 = stage1;
+ src->stage2 = stage2;
+ if (n == 1 && stage1->blk_out == 0)
+ return -EINVAL;
+
+ /* Optimized SRC requires subfilter length multiple of 4 */
+ if (stage1->filter_length > 1 && (stage1->subfilter_length & 0x3) > 0)
+ return -EINVAL;
+
+ if (stage2->filter_length > 1 && (stage2->subfilter_length & 0x3) > 0)
+ return -EINVAL;
+
+ /* Delay line sizes */
+ src->state1.fir_delay_size = p->fir_s1;
+ src->state1.out_delay_size = p->out_s1;
+ src->state1.fir_delay = delay_lines_start;
+ src->state1.out_delay =
+ src->state1.fir_delay + src->state1.fir_delay_size;
+ /* Initialize to last ensures that circular wrap cannot happen
+ * mid-frame. The size is multiple of channels count.
+ */
+ src->state1.fir_wp = &src->state1.fir_delay[p->fir_s1 - 1];
+ src->state1.out_rp = src->state1.out_delay;
+ if (n > 1) {
+ src->state2.fir_delay_size = p->fir_s2;
+ src->state2.out_delay_size = p->out_s2;
+ src->state2.fir_delay =
+ src->state1.out_delay + src->state1.out_delay_size;
+ src->state2.out_delay =
+ src->state2.fir_delay + src->state2.fir_delay_size;
+ /* Initialize to last ensures that circular wrap cannot happen
+ * mid-frame. The size is multiple of channels count.
+ */
+ src->state2.fir_wp = &src->state2.fir_delay[p->fir_s2 - 1];
+ src->state2.out_rp = src->state2.out_delay;
+ } else {
+ src->state2.fir_delay_size = 0;
+ src->state2.out_delay_size = 0;
+ src->state2.fir_delay = NULL;
+ src->state2.out_delay = NULL;
+ }
+
+ /* Check the sizes are less than MAX */
+ if (src->state1.fir_delay_size > MAX_FIR_DELAY_SIZE_XNCH ||
+ src->state1.out_delay_size > MAX_OUT_DELAY_SIZE_XNCH ||
+ src->state2.fir_delay_size > MAX_FIR_DELAY_SIZE_XNCH ||
+ src->state2.out_delay_size > MAX_OUT_DELAY_SIZE_XNCH) {
+ src->state1.fir_delay = NULL;
+ src->state1.out_delay = NULL;
+ src->state2.fir_delay = NULL;
+ src->state2.out_delay = NULL;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void src_polyphase_reset(struct polyphase_src *src)
+{
+ src->number_of_stages = 0;
+ src->stage1 = NULL;
+ src->stage2 = NULL;
+ src_state_reset(&src->state1);
+ src_state_reset(&src->state2);
+}
+
+int src_polyphase_init(struct polyphase_src *src, struct src_param *p,
+ int32_t *delay_lines_start)
+{
+ struct src_stage *stage1;
+ struct src_stage *stage2;
+ int n_stages;
+ int ret;
+
+ if (p->idx_in < 0 || p->idx_out < 0)
+ return -EINVAL;
+
+ /* Get setup for 2 stage conversion */
+ stage1 = src_table1[p->idx_out][p->idx_in];
+ stage2 = src_table2[p->idx_out][p->idx_in];
+ ret = init_stages(stage1, stage2, src, p, 2, delay_lines_start);
+ if (ret < 0)
+ return -EINVAL;
+
+ /* Get number of stages used for optimize opportunity. 2nd
+ * stage length is one if conversion needs only one stage.
+ * If input and output rate is the same return 0 to
+ * use a simple copy function instead of 1 stage FIR with one
+ * tap.
+ */
+ n_stages = (src->stage2->filter_length == 1) ? 1 : 2;
+ if (p->idx_in == p->idx_out)
+ n_stages = 0;
+
+ /* If filter length for first stage is zero this is a deleted
+ * mode from in/out matrix. Computing of such SRC mode needs
+ * to be prevented.
+ */
+ if (src->stage1->filter_length == 0)
+ return -EINVAL;
+
+ return n_stages;
+}
+
/* Fallback function */
static void src_fallback(struct comp_dev *dev, struct comp_buffer *source,
struct comp_buffer *sink, size_t *bytes_read, size_t *bytes_written)
@@ -91,8 +364,9 @@ static void src_2s_s32_default(struct comp_dev *dev,
int s2_blk_in;
int s2_blk_out;
struct comp_data *cd = comp_get_drvdata(dev);
- int32_t *dest = (int32_t *) sink->w_ptr;
- int32_t *src = (int32_t *) source->r_ptr;
+ int32_t *dest = (int32_t *)sink->w_ptr;
+ int32_t *src = (int32_t *)source->r_ptr;
+ int32_t *sbuf_addr = cd->delay_lines;
int32_t *sbuf_end_addr = &cd->delay_lines[cd->param.sbuf_length];
int32_t sbuf_size = cd->param.sbuf_length * sizeof(int32_t);
int nch = dev->params.channels;
@@ -107,6 +381,7 @@ static void src_2s_s32_default(struct comp_dev *dev,
s1.x_end_addr = source->end_addr;
s1.x_size = source->size;
+ s1.y_addr = sbuf_addr;
s1.y_end_addr = sbuf_end_addr;
s1.y_size = sbuf_size;
s1.state = &cd->src.state1;
@@ -117,6 +392,7 @@ static void src_2s_s32_default(struct comp_dev *dev,
s2.x_end_addr = sbuf_end_addr;
s2.x_size = sbuf_size;
+ s2.y_addr = sink->addr;
s2.y_end_addr = sink->end_addr;
s2.y_size = sink->size;
s2.state = &cd->src.state2;
@@ -125,14 +401,13 @@ static void src_2s_s32_default(struct comp_dev *dev,
s2.y_wptr = dest;
s2.nch = nch;
-
/* Test if 1st stage can be run with default block length to reach
* the period length or just under it.
*/
s1.times = cd->param.stage1_times;
s1_blk_in = s1.times * cd->src.stage1->blk_in * nch;
s1_blk_out = s1.times * cd->src.stage1->blk_out * nch;
- if ((avail_b >= s1_blk_in * sz) && (sbuf_free >= s1_blk_out)) {
+ if (avail_b >= s1_blk_in * sz && sbuf_free >= s1_blk_out) {
cd->polyphase_func(&s1);
cd->sbuf_w_ptr = s1.y_wptr;
@@ -147,8 +422,9 @@ static void src_2s_s32_default(struct comp_dev *dev,
s1.times = 1;
s1_blk_in = cd->src.stage1->blk_in * nch;
s1_blk_out = cd->src.stage1->blk_out * nch;
- while ((n1 < cd->param.stage1_times_max) && (avail_b >= s1_blk_in * sz)
- && (sbuf_free >= s1_blk_out)) {
+ while (n1 < cd->param.stage1_times_max &&
+ avail_b >= s1_blk_in * sz &&
+ sbuf_free >= s1_blk_out) {
cd->polyphase_func(&s1);
cd->sbuf_w_ptr = s1.y_wptr;
@@ -163,7 +439,7 @@ static void src_2s_s32_default(struct comp_dev *dev,
s2.times = cd->param.stage2_times;
s2_blk_in = s2.times * cd->src.stage2->blk_in * nch;
s2_blk_out = s2.times * cd->src.stage2->blk_out * nch;
- if ((cd->sbuf_avail >= s2_blk_in) && (free_b >= s2_blk_out * sz)) {
+ if (cd->sbuf_avail >= s2_blk_in && free_b >= s2_blk_out * sz) {
cd->polyphase_func(&s2);
cd->sbuf_r_ptr = s2.x_rptr;
@@ -173,14 +449,13 @@ static void src_2s_s32_default(struct comp_dev *dev,
n2 = s2.times;
}
-
/* Run one block at time the remaining 2nd stage output */
s2.times = 1;
s2_blk_in = cd->src.stage2->blk_in * nch;
s2_blk_out = cd->src.stage2->blk_out * nch;
- while ((n2 < cd->param.stage2_times_max)
- && (cd->sbuf_avail >= s2_blk_in)
- && (free_b >= s2_blk_out * sz)) {
+ while (n2 < cd->param.stage2_times_max &&
+ cd->sbuf_avail >= s2_blk_in &&
+ free_b >= s2_blk_out * sz) {
cd->polyphase_func(&s2);
cd->sbuf_r_ptr = s2.x_rptr;
@@ -205,10 +480,10 @@ static void src_1s_s32_default(struct comp_dev *dev,
int n_written = 0;
s1.times = cd->param.stage1_times;
- s1.x_rptr = (int32_t *) source->r_ptr;
+ s1.x_rptr = (int32_t *)source->r_ptr;
s1.x_end_addr = source->end_addr;
s1.x_size = source->size;
- s1.y_wptr = (int32_t *) sink->w_ptr;
+ s1.y_wptr = (int32_t *)sink->w_ptr;
s1.y_end_addr = sink->end_addr;
s1.y_size = sink->size;
s1.state = &cd->src.state1;
@@ -229,8 +504,8 @@ static void src_copy_s32_default(struct comp_dev *dev,
size_t *bytes_read, size_t *bytes_written)
{
struct comp_data *cd = comp_get_drvdata(dev);
- int32_t *src = (int32_t *) source->r_ptr;
- int32_t *snk = (int32_t *) sink->w_ptr;
+ int32_t *src = (int32_t *)source->r_ptr;
+ int32_t *snk = (int32_t *)sink->w_ptr;
int nch = dev->params.channels;
int frames = cd->param.blk_in;
int n;
@@ -241,9 +516,10 @@ static void src_copy_s32_default(struct comp_dev *dev,
n = frames * nch;
while (n > 0) {
- n_wrap_src = (int32_t *) source->end_addr - src;
- n_wrap_snk = (int32_t *) sink->end_addr - snk;
- n_wrap_min = (n_wrap_src < n_wrap_snk) ? n_wrap_src : n_wrap_snk;
+ n_wrap_src = (int32_t *)source->end_addr - src;
+ n_wrap_snk = (int32_t *)sink->end_addr - snk;
+ n_wrap_min = (n_wrap_src < n_wrap_snk) ?
+ n_wrap_src : n_wrap_snk;
n_copy = (n < n_wrap_min) ? n : n_wrap_min;
memcpy(snk, src, n_copy * sizeof(int32_t));
@@ -253,7 +529,6 @@ static void src_copy_s32_default(struct comp_dev *dev,
snk += n_copy;
src_circ_inc_wrap(&src, source->end_addr, source->size);
src_circ_inc_wrap(&snk, sink->end_addr, sink->size);
-
}
*bytes_read = frames * nch * sizeof(int32_t);
*bytes_written = frames * nch * sizeof(int32_t);
@@ -263,7 +538,7 @@ static struct comp_dev *src_new(struct sof_ipc_comp *comp)
{
struct comp_dev *dev;
struct sof_ipc_comp_src *src;
- struct sof_ipc_comp_src *ipc_src = (struct sof_ipc_comp_src *) comp;
+ struct sof_ipc_comp_src *ipc_src = (struct sof_ipc_comp_src *)comp;
struct comp_data *cd;
trace_src("new");
@@ -276,14 +551,14 @@ static struct comp_dev *src_new(struct sof_ipc_comp *comp)
dev = rzalloc(RZONE_RUNTIME, SOF_MEM_CAPS_RAM,
COMP_SIZE(struct sof_ipc_comp_src));
- if (dev == NULL)
+ if (!dev)
return NULL;
- src = (struct sof_ipc_comp_src *) &dev->comp;
+ src = (struct sof_ipc_comp_src *)&dev->comp;
memcpy(src, ipc_src, sizeof(struct sof_ipc_comp_src));
cd = rzalloc(RZONE_RUNTIME, SOF_MEM_CAPS_RAM, sizeof(*cd));
- if (cd == NULL) {
+ if (!cd) {
rfree(dev);
return NULL;
}
@@ -306,7 +581,7 @@ static void src_free(struct comp_dev *dev)
trace_src("fre");
/* Free dynamically reserved buffers for SRC algorithm */
- if (cd->delay_lines != NULL)
+ if (!cd->delay_lines)
rfree(cd->delay_lines);
rfree(cd);
@@ -347,7 +622,8 @@ static int src_params(struct comp_dev *dev)
}
/* Calculate source and sink rates, one rate will come from IPC new
- * and the other from params. */
+ * and the other from params.
+ */
if (src->source_rate == 0) {
/* params rate is source rate */
source_rate = params->rate;
@@ -383,12 +659,12 @@ static int src_params(struct comp_dev *dev)
}
/* free any existing delay lines. TODO reuse if same size */
- if (cd->delay_lines != NULL)
+ if (!cd->delay_lines)
rfree(cd->delay_lines);
cd->delay_lines = rballoc(RZONE_RUNTIME, SOF_MEM_CAPS_RAM,
delay_lines_size);
- if (cd->delay_lines == NULL) {
+ if (!cd->delay_lines) {
trace_src_error("sr3");
trace_value(delay_lines_size);
return -EINVAL;
@@ -424,7 +700,6 @@ static int src_params(struct comp_dev *dev)
trace_src("SFa");
cd->src_func = src_fallback;
return -EINVAL;
- break;
}
/* Calculate period size based on config. First make sure that
@@ -438,7 +713,7 @@ static int src_params(struct comp_dev *dev)
* buffer_set_size will return an error if the required length would
* be too long.
*/
- q = src_ceil_divide(cd->param.blk_out, (int) dev->frames) + 1;
+ q = src_ceil_divide(cd->param.blk_out, (int)dev->frames) + 1;
/* Configure downstream buffer */
sink = list_first_item(&dev->bsink_list, struct comp_buffer,
@@ -459,7 +734,6 @@ static int src_params(struct comp_dev *dev)
return -EINVAL;
}
-
return 0;
}
@@ -518,7 +792,8 @@ static int src_copy(struct comp_dev *dev)
/* make sure source component buffer has enough data available and that
* the sink component buffer has enough free bytes for copy. Also
- * check for XRUNs */
+ * check for XRUNs.
+ */
if (source->avail < need_source) {
trace_src_error("xru");
return -EIO; /* xrun */
@@ -530,6 +805,9 @@ static int src_copy(struct comp_dev *dev)
cd->src_func(dev, source, sink, &consumed, &produced);
+ tracev_value(consumed >> 3);
+ tracev_value(produced >> 3);
+
/* Calc new free and available if data was processed. These
* functions must not be called with 0 consumed/produced.
*/
diff --git a/src/audio/src_core.h b/src/audio/src.h
similarity index 93%
rename from src/audio/src_core.h
rename to src/audio/src.h
index 3ea6028..3208693 100644
--- a/src/audio/src_core.h
+++ b/src/audio/src.h
@@ -29,8 +29,8 @@
*
*/
-#ifndef SRC_CORE_H
-#define SRC_CORE_H
+#ifndef SRC_H
+#define SRC_H
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
@@ -68,12 +68,12 @@ struct src_stage {
};
struct src_state {
- int fir_delay_size;
- int out_delay_size;
- int fir_wi;
- int out_ri;
+ int fir_delay_size; /* samples */
+ int out_delay_size; /* samples */
int32_t *fir_delay;
int32_t *out_delay;
+ int32_t *fir_wp;
+ int32_t *out_rp;
};
struct polyphase_src {
@@ -91,6 +91,7 @@ struct src_stage_prm {
int32_t *x_end_addr;
size_t x_size;
int32_t *y_wptr;
+ int32_t *y_addr;
int32_t *y_end_addr;
size_t y_size;
struct src_state *state;
@@ -100,13 +101,13 @@ struct src_stage_prm {
static inline void src_circ_inc_wrap(int32_t **ptr, int32_t *end, size_t size)
{
if (*ptr >= end)
- *ptr = (int32_t *) ((size_t) * ptr - size);
+ *ptr = (int32_t *)((size_t)*ptr - size);
}
static inline void src_circ_dec_wrap(int32_t **ptr, int32_t *addr, size_t size)
{
if (*ptr < addr)
- *ptr = (int32_t *) ((size_t) * ptr + size);
+ *ptr = (int32_t *)((size_t)*ptr + size);
}
void src_polyphase_reset(struct polyphase_src *src);
diff --git a/src/audio/src_config.h b/src/audio/src_config.h
index 3ad4c78..65d6247 100644
--- a/src/audio/src_config.h
+++ b/src/audio/src_config.h
@@ -34,14 +34,57 @@
#include <config.h>
-#if defined CONFIG_BAYTRAIL || defined CONFIG_CHERRYTRAIL || defined CONFIG_BROADWELL || defined CONFIG_HASWELL
-#define SRC_SHORT 1
-#include <reef/audio/coefficients/src/src_tiny_int16_define.h>
-#include <reef/audio/coefficients/src/src_tiny_int16_table.h>
+/* If next defines are set to 1 the SRC is configured automatically. Setting
+ * to zero temporarily is useful is for testing needs.
+ * Setting SRC_AUTODSP to 0 allows to manually set the code variant.
+ * Setting SRC_AUTOCOEF to 0 allows to select the coefficient type.
+ */
+#define SRC_AUTOARCH 1
+#define SRC_AUTOCOEF 1
+
+/* Force manually some code variant when SRC_AUTODSP is set to zero. These
+ * are useful in code debugging.
+ */
+#if SRC_AUTOARCH == 0
+#define SRC_GENERIC 1
+#define SRC_HIFIEP 0
+#define SRC_HIFI3 0
+#endif
+#if SRC_AUTOCOEF == 0
+#define SRC_SHORT 0
+#endif
+
+/* Select 16 bit coefficients for specific platforms.
+ * Otherwise 32 bits is the default.
+ */
+#if SRC_AUTOCOEF == 1
+#if defined CONFIG_BAYTRAIL || defined CONFIG_CHERRYTRAIL \
+ || defined CONFIG_BROADWELL || defined CONFIG_HASWELL
+#define SRC_SHORT 1 /* Use int16_t filter coefficients */
#else
-#define SHORT_SHORT 0
-#include <reef/audio/coefficients/src/src_std_int24_define.h>
-#include <reef/audio/coefficients/src/src_std_int24_table.h>
+#define SRC_SHORT 0 /* Use int32_t filter coefficients */
+#endif
+#endif
+
+/* Select optimized code variant when xt-xcc compiler is used */
+#if SRC_AUTOARCH == 1
+#if defined __XCC__
+#include <xtensa/config/core-isa.h>
+#define SRC_GENERIC 0
+#if XCHAL_HAVE_HIFI2EP == 1
+#define SRC_HIFIEP 1
+#define SRC_HIFI3 0
+#endif
+#if XCHAL_HAVE_HIFI3 == 1
+#define SRC_HIFI3 1
+#define SRC_HIFIEP 0
+#endif
+#else
+/* GCC */
+#define SRC_GENERIC 1
+#define SRC_HIFIEP 0
+#define SRC_HIFI3 0
+#endif
#endif
#endif
diff --git a/src/audio/src_core.c b/src/audio/src_core.c
deleted file mode 100644
index d8b9a3d..0000000
--- a/src/audio/src_core.c
+++ /dev/null
@@ -1,676 +0,0 @@
-/*
- * Copyright (c) 2016, Intel Corporation
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Intel Corporation nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Seppo Ingalsuo <seppo.ingalsuo(a)linux.intel.com>
- *
- */
-
-/* Non optimized default C implementation guaranteed to work on any
- * architecture.
- */
-
-#include <stdint.h>
-
-#ifdef MODULE_TEST
-#include <stdio.h>
-#endif
-
-#include <reef/alloc.h>
-#include <reef/audio/format.h>
-#include <reef/math/numbers.h>
-#include "src_core.h"
-#include "src_config.h"
-
-#define trace_src(__e) trace_event(TRACE_CLASS_SRC, __e)
-#define tracev_src(__e) tracev_event(TRACE_CLASS_SRC, __e)
-#define trace_src_error(__e) trace_error(TRACE_CLASS_SRC, __e)
-
-/* TODO: These should be defined somewhere else. */
-#define SOF_RATES_LENGTH 15
-int sof_rates[SOF_RATES_LENGTH] = {8000, 11025, 12000, 16000, 18900,
- 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000, 176400,
- 192000};
-
-/* The FIR maximum lengths are per channel so need to multiply them */
-#define MAX_FIR_DELAY_SIZE_XNCH (PLATFORM_MAX_CHANNELS * MAX_FIR_DELAY_SIZE)
-#define MAX_OUT_DELAY_SIZE_XNCH (PLATFORM_MAX_CHANNELS * MAX_OUT_DELAY_SIZE)
-
-/* Calculate ceil() for integer division */
-int src_ceil_divide(int a, int b)
-{
- int c;
-
- c = a / b;
- if (c * b < a)
- c++;
-
- return c;
-}
-
-/* Calculates the needed FIR delay line length */
-static int src_fir_delay_length(struct src_stage *s)
-{
- return s->subfilter_length + (s->num_of_subfilters - 1) * s->idm
- + s->blk_in;
-}
-
-/* Calculates the FIR output delay line length */
-static int src_out_delay_length(struct src_stage *s)
-{
-
- return 1 + (s->num_of_subfilters - 1) * s->odm;
-}
-
-/* Returns index of a matching sample rate */
-static int src_find_fs(int fs_list[], int list_length, int fs)
-{
- int i;
-
- for (i = 0; i < list_length; i++) {
- if (fs_list[i] == fs)
- return i;
- }
- return -EINVAL;
-}
-
-/* Match SOF and defined SRC input rates into a bit mask */
-int32_t src_input_rates(void)
-{
- int n;
- int b;
- int mask = 0;
-
- for (n = SOF_RATES_LENGTH - 1; n >= 0; n--) {
- b = (src_find_fs(src_in_fs, NUM_IN_FS, sof_rates[n]) >= 0)
- ? 1 : 0;
- mask = (mask << 1) | b;
- }
- return mask;
-}
-
-/* Match SOF and defined SRC output rates into a bit mask */
-int32_t src_output_rates(void)
-{
- int n;
- int b;
- int mask = 0;
-
- for (n = SOF_RATES_LENGTH - 1; n >= 0; n--) {
- b = (src_find_fs(src_out_fs, NUM_OUT_FS, sof_rates[n]) >= 0)
- ? 1 : 0;
- mask = (mask << 1) | b;
- }
- return mask;
-}
-
-/* Calculates buffers to allocate for a SRC mode */
-int src_buffer_lengths(struct src_param *a, int fs_in, int fs_out, int nch,
- int frames, int frames_is_for_source)
-{
- struct src_stage *stage1;
- struct src_stage *stage2;
- int q;
- int den;
- int num;
- int frames2;
-
- if (nch > PLATFORM_MAX_CHANNELS) {
- trace_src_error("che");
- tracev_value(nch);
- return -EINVAL;
- }
-
- a->nch = nch;
- a->idx_in = src_find_fs(src_in_fs, NUM_IN_FS, fs_in);
- a->idx_out = src_find_fs(src_out_fs, NUM_OUT_FS, fs_out);
-
- /* Check that both in and out rates are supported */
- if ((a->idx_in < 0) || (a->idx_out < 0)) {
- trace_src_error("us1");
- tracev_value(fs_in);
- tracev_value(fs_out);
- return -EINVAL;
- }
-
- stage1 = src_table1[a->idx_out][a->idx_in];
- stage2 = src_table2[a->idx_out][a->idx_in];
-
- /* Check from stage1 parameter for a deleted in/out rate combination.*/
- if (stage1->filter_length < 1) {
- trace_src_error("us2");
- tracev_value(fs_in);
- tracev_value(fs_out);
- return -EINVAL;
- }
-
- a->fir_s1 = nch * src_fir_delay_length(stage1);
- a->out_s1 = nch * src_out_delay_length(stage1);
-
- /* Find out how many additional times the SRC can be executed
- while having block size less or equal to max_frames.
- */
- if (frames_is_for_source) {
- /* Times that stage1 needs to run to input length of frames */
- a->stage1_times_max = src_ceil_divide(frames, stage1->blk_in);
- q = frames / stage1->blk_in;
- a->stage1_times = MAX(q, 1);
- a->blk_in = a->stage1_times * stage1->blk_in;
-
- /* Times that stage2 needs to run */
- den = stage2->blk_in * stage1->blk_in;
- num = frames * stage2->blk_out * stage1->blk_out;
- frames2 = src_ceil_divide(num, den);
- a->stage2_times_max = src_ceil_divide(frames2, stage2->blk_out);
- q = frames2 / stage2->blk_out;
- a->stage2_times = MAX(q, 1);
- a->blk_out = a->stage2_times * stage2->blk_out;
- } else {
- /* Times that stage2 needs to run to output length of frames */
- a->stage2_times_max = src_ceil_divide(frames, stage2->blk_out);
- q = frames / stage2->blk_out;
- a->stage2_times = MAX(q, 1);
- a->blk_out = a->stage2_times * stage2->blk_out;
-
- /* Times that stage1 needs to run */
- num = frames * stage2->blk_in * stage1->blk_in;
- den = stage2->blk_out * stage1->blk_out;
- frames2 = src_ceil_divide(num, den);
- a->stage1_times_max = src_ceil_divide(frames2, stage1->blk_in);
- q = frames2 / stage1->blk_in;
- a->stage1_times = MAX(q, 1);
- a->blk_in = a->stage1_times * stage1->blk_in;
- }
-
- if (stage2->filter_length == 1) {
- a->fir_s2 = 0;
- a->out_s2 = 0;
- a->stage2_times = 0;
- a->stage2_times_max = 0;
- a->sbuf_length = 0;
- } else {
- a->fir_s2 = nch * src_fir_delay_length(stage2);
- a->out_s2 = nch * src_out_delay_length(stage2);
- /* 2x is an empirically tested length. Since the sink buffer
- * capability to receive samples varies a shorter stage 2 output
- * block will create a peak in internal buffer usage.
- */
- a->sbuf_length = 2 * nch * stage1->blk_out * a->stage1_times_max;
- }
-
- a->src_multich = a->fir_s1 + a->fir_s2 + a->out_s1 + a->out_s2;
- a->total = a->sbuf_length + a->src_multich;
-
- return 0;
-}
-
-static void src_state_reset(struct src_state *state)
-{
-
- state->fir_delay_size = 0;
- state->out_delay_size = 0;
- state->fir_wi = 0;
- state->out_ri = 0;
-}
-
-static int init_stages(
- struct src_stage *stage1, struct src_stage *stage2,
- struct polyphase_src *src, struct src_param *p,
- int n, int32_t *delay_lines_start)
-{
- /* Clear FIR state */
- src_state_reset(&src->state1);
- src_state_reset(&src->state2);
-
- src->number_of_stages = n;
- src->stage1 = stage1;
- src->stage2 = stage2;
- if ((n == 1) && (stage1->blk_out == 0))
- return -EINVAL;
-
- /* Delay line sizes */
- src->state1.fir_delay_size = p->fir_s1;
- src->state1.out_delay_size = p->out_s1;
- src->state1.fir_delay = delay_lines_start;
- src->state1.out_delay =
- src->state1.fir_delay + src->state1.fir_delay_size;
- if (n > 1) {
- src->state2.fir_delay_size = p->fir_s2;
- src->state2.out_delay_size = p->out_s2;
- src->state2.fir_delay =
- src->state1.out_delay + src->state1.out_delay_size;
- src->state2.out_delay =
- src->state2.fir_delay + src->state2.fir_delay_size;
- } else {
- src->state2.fir_delay_size = 0;
- src->state2.out_delay_size = 0;
- src->state2.fir_delay = NULL;
- src->state2.out_delay = NULL;
- }
-
- /* Check the sizes are less than MAX */
- if ((src->state1.fir_delay_size > MAX_FIR_DELAY_SIZE_XNCH)
- || (src->state1.out_delay_size > MAX_OUT_DELAY_SIZE_XNCH)
- || (src->state2.fir_delay_size > MAX_FIR_DELAY_SIZE_XNCH)
- || (src->state2.out_delay_size > MAX_OUT_DELAY_SIZE_XNCH)) {
- src->state1.fir_delay = NULL;
- src->state1.out_delay = NULL;
- src->state2.fir_delay = NULL;
- src->state2.out_delay = NULL;
- return -EINVAL;
- }
-
- return 0;
-}
-
-void src_polyphase_reset(struct polyphase_src *src)
-{
-
- src->number_of_stages = 0;
- src->stage1 = NULL;
- src->stage2 = NULL;
- src_state_reset(&src->state1);
- src_state_reset(&src->state2);
-}
-
-int src_polyphase_init(struct polyphase_src *src, struct src_param *p,
- int32_t *delay_lines_start)
-{
- struct src_stage *stage1;
- struct src_stage *stage2;
- int n_stages;
- int ret;
-
- if ((p->idx_in < 0) || (p->idx_out < 0)) {
- return -EINVAL;
- }
-
- /* Get setup for 2 stage conversion */
- stage1 = src_table1[p->idx_out][p->idx_in];
- stage2 = src_table2[p->idx_out][p->idx_in];
- ret = init_stages(stage1, stage2, src, p, 2, delay_lines_start);
- if (ret < 0)
- return -EINVAL;
-
- /* Get number of stages used for optimize opportunity. 2nd
- * stage length is one if conversion needs only one stage.
- * If input and output rate is the same return 0 to
- * use a simple copy function instead of 1 stage FIR with one
- * tap.
- */
- n_stages = (src->stage2->filter_length == 1) ? 1 : 2;
- if (p->idx_in == p->idx_out)
- n_stages = 0;
-
- /* If filter length for first stage is zero this is a deleted
- * mode from in/out matrix. Computing of such SRC mode needs
- * to be prevented.
- */
- if (src->stage1->filter_length == 0)
- return -EINVAL;
-
- return n_stages;
-}
-
-#if SRC_SHORT == 1
-
-/* Calculate a FIR filter part that does not need circular modification */
-
-static inline void fir_part(int64_t y[], int *id, int *ic,
- const int32_t data[], const int16_t coef[], int nch_x_taps, int nch)
-{
- int64_t tap0;
- int64_t tap1;
- int n;
- int64_t a = 0;
- int64_t b = 0;
- int c = *ic;
- int d = *id;
- int d_end = d - nch_x_taps;
-
- /* Data is Q1.31, coef is Q1.15, product is Q2.46 */
- if (nch == 2) {
- for (n = 0; n < (nch_x_taps >> 2); n++) {
- tap0 = coef[c++];
- tap1 = coef[c++];
- b += data[d--] * tap0;
- a += data[d--] * tap0;
- b += data[d--] * tap1;
- a += data[d--] * tap1;
- }
- if (d > d_end) {
- tap0 = coef[c++];
- b += data[d--] * tap0;
- a += data[d--] * tap0;
- }
- y[1] += b;
- y[0] += a;
- } else {
- while (d > d_end) {
- tap0 = coef[c++];
- for (n = nch - 1; n >= 0; n--)
- y[n] += data[d--] * tap0;
- }
- }
- *ic = c;
- *id = d;
-}
-
-#else
-
-static inline void fir_part(int64_t y[], int *id, int *ic,
- const int32_t data[], const int32_t coef[], int nch_x_taps, int nch)
-{
- int64_t tap0;
- int64_t tap1;
- int n;
- int64_t a = 0;
- int64_t b = 0;
- int c = *ic;
- int d = *id;
- int d_end = d - nch_x_taps;
-
- /* Data is Q1.31, coef is Q1.23, product is Q2.54 */
- if (nch == 2) {
- for (n = 0; n < (nch_x_taps >> 2); n++) {
- tap0 = coef[c++];
- tap1 = coef[c++];
- b += data[d--] * tap0;
- a += data[d--] * tap0;
- b += data[d--] * tap1;
- a += data[d--] * tap1;
- }
- if (d > d_end) {
- tap0 = coef[c++];
- b += data[d--] * tap0;
- a += data[d--] * tap0;
- }
- y[1] += b;
- y[0] += a;
- } else {
- while (d > d_end) {
- tap0 = coef[c++];
- for (n = nch - 1; n >= 0; n--)
- y[n] += data[d--] * tap0;
- }
- }
- *ic = c;
- *id = d;
-}
-
-#endif
-
-#if SRC_SHORT == 1
-
-static void fir_filter(int ri0, int *ci, int wi0, int32_t in_delay[],
- int32_t out_delay[], const int16_t coefs[], int dsm1, int taps,
- int shift, int nch)
-{
- int n2;
- int i;
- int64_t y[PLATFORM_MAX_CHANNELS];
- int ri = ri0;
- int wi = wi0;
- int n1 = ri0 + 1; /* Convert to number of sequential frames */
- int qshift = 15 + shift; /* Q2.46 -> Q2.31 */
- int32_t rnd = 1 << (qshift - 1); /* Half LSB */
- int nch_x_taps = nch * taps;
-
- /* Initialize to half LSB for rounding */
- for (i = 0; i < nch; i++)
- y[i] = rnd;
-
- if (n1 >= nch_x_taps) {
- fir_part(y, &ri, ci, in_delay, coefs, nch_x_taps, nch);
- } else {
- n2 = nch_x_taps - n1;
- fir_part(y, &ri, ci, in_delay, coefs, n1, nch);
- ri = dsm1;
- fir_part(y, &ri, ci, in_delay, coefs, n2, nch);
- }
-
- for (i = 0; i < nch; i++)
- out_delay[wi++] = sat_int32(y[i] >> qshift);
-}
-#else
-
-static void fir_filter(int ri0, int *ci, int wi0, int32_t in_delay[],
- int32_t out_delay[], const int32_t coefs[], int dsm1, int taps,
- int shift, int nch)
-{
- int n2;
- int i;
- int64_t y[PLATFORM_MAX_CHANNELS];
- int ri = ri0;
- int wi = wi0;
- int n1 = ri0 + 1; /* Convert to number of sequential frames */
- int qshift = 23 + shift; /* Q2.54 -> Q2.31 */
- int32_t rnd = 1 << (qshift - 1); /* Half LSB */
- int nch_x_taps = nch * taps;
-
- /* Initialize to half LSB for rounding */
- for (i = 0; i < nch; i++)
- y[i] = rnd;
-
- if (n1 >= nch_x_taps) {
- fir_part(y, &ri, ci, in_delay, coefs, nch_x_taps, nch);
- } else {
- n2 = nch_x_taps - n1;
- fir_part(y, &ri, ci, in_delay, coefs, n1, nch);
- ri = dsm1;
- fir_part(y, &ri, ci, in_delay, coefs, n2, nch);
- }
-
- for (i = 0; i < nch; i++)
- out_delay[wi++] = sat_int32(y[i] >> qshift);
-
-}
-
-#endif
-
-void src_polyphase_stage_cir(struct src_stage_prm * s)
-{
- struct src_state *fir = s->state;
- struct src_stage *cfg = s->stage;
- int n;
- int m;
- int f;
- int ci;
- int ri;
- int n_wrap_fir;
- int n_wrap_buf;
- int n_wrap_min;
- int n_min;
- int wi;
- const void *coef = cfg->coefs;
- int32_t *in_delay = fir->fir_delay;
- int32_t *out_delay = fir->out_delay;
- int dsm1 = fir->fir_delay_size - 1;
- int shift = cfg->shift;
- int nch = s->nch;
- int rewind = -nch * (cfg->blk_in
- + (cfg->num_of_subfilters - 1) * cfg->idm) + nch - 1;
- int nch_x_idm = cfg->idm * nch;
- int nch_x_odm = cfg->odm * nch;
- size_t sz = sizeof(int32_t);
- int blk_in_bytes = nch * cfg->blk_in * sz;
- int blk_out_bytes = nch * cfg->num_of_subfilters * sz;
-
-
- for (n = 0; n < s->times; n++) {
- /* Input data */
- m = blk_in_bytes;
- while (m > 0) {
- n_wrap_fir = (fir->fir_delay_size - fir->fir_wi) * sz;
- n_wrap_buf = s->x_end_addr - s->x_rptr;
- n_wrap_min = (n_wrap_fir < n_wrap_buf)
- ? n_wrap_fir : n_wrap_buf;
- n_min = (m < n_wrap_min) ? m : n_wrap_min;
- while (n_min > 0) {
- fir->fir_delay[fir->fir_wi++] = *s->x_rptr;
- s->x_rptr++;
- n_min -= sz;
- m -= sz;
- }
- /* Check for wrap */
- src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
- if (fir->fir_wi == fir->fir_delay_size)
- fir->fir_wi = 0;
- }
-
- /* Filter */
- ci = 0; /* Reset to 1st coefficient */
- ri = fir->fir_wi + rewind; /* Newest data for last subfilter */
- if (ri < 0)
- ri += fir->fir_delay_size;
-
- wi = fir->out_ri;
- for (f = 0; f < cfg->num_of_subfilters; f++) {
- fir_filter(ri, &ci, wi, in_delay, out_delay, coef,
- dsm1, cfg->subfilter_length, shift, nch);
-
- wi += nch_x_odm;
- if (wi >= fir->out_delay_size)
- wi -= fir->out_delay_size;
-
- ri += nch_x_idm; /* Next sub-filter start */
- if (ri >= fir->fir_delay_size)
- ri -= fir->fir_delay_size;
- }
-
- /* Output */
- m = blk_out_bytes;
- while (m > 0) {
- n_wrap_fir = (fir->out_delay_size - fir->out_ri) * sz;
- n_wrap_buf = s->y_end_addr - s->y_wptr;
- n_wrap_min = (n_wrap_fir < n_wrap_buf)
- ? n_wrap_fir : n_wrap_buf;
- n_min = (m < n_wrap_min) ? m : n_wrap_min;
- while (n_min > 0) {
- *s->y_wptr = fir->out_delay[fir->out_ri++];
- s->y_wptr++;
- n_min -= sz;
- m -= sz;
- }
- /* Check wrap */
- src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
- if (fir->out_ri == fir->out_delay_size)
- fir->out_ri = 0;
- }
- }
-}
-
-void src_polyphase_stage_cir_s24(struct src_stage_prm *s)
-{
- struct src_state *fir = s->state;
- struct src_stage *cfg = s->stage;
- int n;
- int m;
- int f;
- int ci;
- int ri;
- int n_wrap_fir;
- int n_wrap_buf;
- int n_wrap_min;
- int n_min;
- int wi;
- const void *coef = cfg->coefs;
- int32_t *in_delay = fir->fir_delay;
- int32_t *out_delay = fir->out_delay;
- int dsm1 = fir->fir_delay_size - 1;
- int shift = cfg->shift;
- int nch = s->nch;
- int rewind = -nch * (cfg->blk_in
- + (cfg->num_of_subfilters - 1) * cfg->idm) + nch - 1;
- int nch_x_idm = cfg->idm * nch;
- int nch_x_odm = cfg->odm * nch;
- size_t sz = sizeof(int32_t);
- int blk_in_bytes = nch * cfg->blk_in * sz;
- int blk_out_bytes = nch * cfg->num_of_subfilters * sz;
-
- for (n = 0; n < s->times; n++) {
- /* Input data */
- m = blk_in_bytes;
- while (m > 0) {
- n_wrap_fir = (fir->fir_delay_size - fir->fir_wi) * sz;
- n_wrap_buf = s->x_end_addr - s->x_rptr;
- n_wrap_min = (n_wrap_fir < n_wrap_buf)
- ? n_wrap_fir : n_wrap_buf;
- n_min = (m < n_wrap_min) ? m : n_wrap_min;
- while (n_min > 0) {
- fir->fir_delay[fir->fir_wi++] = *s->x_rptr << 8;
- s->x_rptr++;
- n_min -= sz;
- m -= sz;
- }
- /* Check for wrap */
- src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
- if (fir->fir_wi == fir->fir_delay_size)
- fir->fir_wi = 0;
- }
-
- /* Filter */
- ci = 0; /* Reset to 1st coefficient */
- ri = fir->fir_wi + rewind; /* Newest data for last subfilter */
- if (ri < 0)
- ri += fir->fir_delay_size;
-
- wi = fir->out_ri;
- for (f = 0; f < cfg->num_of_subfilters; f++) {
- fir_filter(ri, &ci, wi, in_delay, out_delay, coef,
- dsm1, cfg->subfilter_length, shift, nch);
-
- wi += nch_x_odm;
- if (wi >= fir->out_delay_size)
- wi -= fir->out_delay_size;
-
- ri += nch_x_idm; /* Next sub-filter start */
- if (ri >= fir->fir_delay_size)
- ri -= fir->fir_delay_size;
- }
-
- /* Output */
- m = blk_out_bytes;
- while (m > 0) {
- n_wrap_fir = (fir->out_delay_size - fir->out_ri) * sz;
- n_wrap_buf = s->y_end_addr - s->y_wptr;
- n_wrap_min = (n_wrap_fir < n_wrap_buf)
- ? n_wrap_fir : n_wrap_buf;
- n_min = (m < n_wrap_min) ? m : n_wrap_min;
- while (n_min > 0) {
- *s->y_wptr = fir->out_delay[fir->out_ri++] >> 8;
- s->y_wptr++;
- n_min -= sz;
- m -= sz;
- }
- /* Check wrap */
- src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
- if (fir->out_ri == fir->out_delay_size)
- fir->out_ri = 0;
- }
- }
-
-}
diff --git a/src/audio/src_generic.c b/src/audio/src_generic.c
new file mode 100644
index 0000000..9caa090
--- /dev/null
+++ b/src/audio/src_generic.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the Intel Corporation nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Seppo Ingalsuo <seppo.ingalsuo(a)linux.intel.com>
+ *
+ */
+
+/* Default C implementation guaranteed to work on any
+ * architecture.
+ */
+
+#include <stdint.h>
+#include <reef/alloc.h>
+#include <reef/audio/format.h>
+#include <reef/math/numbers.h>
+
+#include "src_config.h"
+#include "src.h"
+
+#if SRC_GENERIC
+
+#if SRC_SHORT /* 16 bit coefficients version */
+
+static inline void fir_filter_generic(int32_t *rp, const void *cp, int32_t *wp0,
+ int32_t *fir_start, int32_t *fir_end, const int fir_delay_length,
+ const int taps_x_nch, const int shift, const int nch)
+{
+ int64_t y0;
+ int64_t y1;
+ int32_t *data;
+ const int16_t *coef;
+ int i;
+ int j;
+ int n1;
+ int n2;
+ int frames;
+ const int qshift = 15 + shift; /* Q2.46 -> Q2.31 */
+ const int32_t rnd = 1 << (qshift - 1); /* Half LSB */
+ int32_t *d = rp;
+ int32_t *wp = wp0;
+
+ /* Check for 2ch FIR case */
+ if (nch == 2) {
+ /* Decrement data pointer to next channel start. Note that
+ * initialization code ensures that circular wrap does not
+ * happen mid-frame.
+ */
+ data = d - 1;
+
+ /* Initialize to half LSB for rounding, prepare for FIR core */
+ y0 = rnd;
+ y1 = rnd;
+ coef = (const int16_t *)cp;
+ frames = fir_end - data; /* Frames until wrap */
+ n1 = ((taps_x_nch < frames) ? taps_x_nch : frames) >> 1;
+ n2 = (taps_x_nch >> 1) - n1;
+
+ /* The FIR is calculated as Q1.15 x Q1.31 -> Q2.46. The
+ * output shift includes the shift by 15 for Qx.46 to
+ * Qx.31.
+ */
+ for (i = 0; i < n1; i++) {
+ y0 += (int64_t)(*coef) * (*data);
+ data++;
+ y1 += (int64_t)(*coef) * (*data);
+ data++;
+ coef++;
+ }
+ if (data == fir_end)
+ data = fir_start;
+
+ for (i = 0; i < n2; i++) {
+ y0 += (int64_t)(*coef) * (*data);
+ data++;
+ y1 += (int64_t)(*coef) * (*data);
+ data++;
+ coef++;
+ }
+
+ *wp = sat_int32(y1 >> qshift);
+ *(wp + 1) = sat_int32(y0 >> qshift);
+ return;
+ }
+
+ for (j = 0; j < nch; j++) {
+ /* Decrement data pointer to next channel start. Note that
+ * initialization code ensures that circular wrap does not
+ * happen mid-frame.
+ */
+ data = d--;
+
+ /* Initialize to half LSB for rounding, prepare for FIR core */
+ y0 = rnd;
+ coef = (const int16_t *)cp;
+ frames = fir_end - data + nch - j - 1; /* Frames until wrap */
+ n1 = (taps_x_nch < frames) ? taps_x_nch : frames;
+ n2 = taps_x_nch - n1;
+
+ /* The FIR is calculated as Q1.15 x Q1.31 -> Q2.46. The
+ * output shift includes the shift by 15 for Qx.46 to
+ * Qx.31.
+ */
+ for (i = 0; i < n1; i += nch) {
+ y0 += (int64_t)(*coef) * (*data);
+ coef++;
+ data += nch;
+ }
+ if (data >= fir_end)
+ data -= fir_delay_length;
+
+ for (i = 0; i < n2; i += nch) {
+ y0 += (int64_t)(*coef) * (*data);
+ coef++;
+ data += nch;
+ }
+
+ *wp = sat_int32(y0 >> qshift);
+ wp++;
+ }
+}
+
+#else /* 32bit coefficients version */
+
+static inline void fir_filter_generic(int32_t *rp, const void *cp, int32_t *wp0,
+ int32_t *fir_start, int32_t *fir_end, int fir_delay_length,
+ const int taps_x_nch, const int shift, const int nch)
+{
+ int64_t y0;
+ int64_t y1;
+ int32_t *data;
+ const int32_t *coef;
+ int i;
+ int j;
+ int frames;
+ int n1;
+ int n2;
+
+ const int qshift = 23 + shift; /* Qx.54 -> Qx.31 */
+ const int32_t rnd = 1 << (qshift - 1); /* Half LSB */
+ int32_t *d = rp;
+ int32_t *wp = wp0;
+
+ /* Check for 2ch FIR case */
+ if (nch == 2) {
+ /* Decrement data pointer to next channel start. Note that
+ * initialization code ensures that circular wrap does not
+ * happen mid-frame.
+ */
+ data = d - 1;
+
+ /* Initialize to half LSB for rounding, prepare for FIR core */
+ y0 = rnd;
+ y1 = rnd;
+ coef = (const int32_t *)cp;
+ frames = fir_end - data; /* Frames until wrap */
+ n1 = ((taps_x_nch < frames) ? taps_x_nch : frames) >> 1;
+ n2 = (taps_x_nch >> 1) - n1;
+
+ /* The FIR is calculated as Q1.23 x Q1.31 -> Q2.54. The
+ * output shift includes the shift by 23 for Qx.54 to
+ * Qx.31.
+ */
+ for (i = 0; i < n1; i++) {
+ y0 += (int64_t)(*coef >> 8) * (*data);
+ data++;
+ y1 += (int64_t)(*coef >> 8) * (*data);
+ data++;
+ coef++;
+ }
+ if (data == fir_end)
+ data = fir_start;
+
+ for (i = 0; i < n2; i++) {
+ y0 += (int64_t)(*coef >> 8) * (*data);
+ data++;
+ y1 += (int64_t)(*coef >> 8) * (*data);
+ data++;
+ coef++;
+ }
+ *wp = sat_int32(y1 >> qshift);
+ *(wp + 1) = sat_int32(y0 >> qshift);
+ return;
+ }
+
+ for (j = 0; j < nch; j++) {
+ /* Decrement data pointer to next channel start. Note that
+ * initialization code ensures that circular wrap does not
+ * happen mid-frame.
+ */
+ data = d--;
+
+ /* Initialize to half LSB for rounding, prepare for FIR core */
+ y0 = rnd;
+ coef = (const int32_t *)cp;
+ frames = fir_end - data + nch - j - 1; /* Frames until wrap */
+ n1 = (taps_x_nch < frames) ? taps_x_nch : frames;
+ n2 = taps_x_nch - n1;
+
+ /* The FIR is calculated as Q1.23 x Q1.31 -> Q2.54. The
+ * output shift includes the shift by 23 for Qx.54 to
+ * Qx.31.
+ */
+ for (i = 0; i < n1; i += nch) {
+ y0 += (int64_t)(*coef >> 8) * (*data);
+ coef++;
+ data += nch;
+ }
+ if (data >= fir_end)
+ data -= fir_delay_length;
+
+ for (i = 0; i < n2; i += nch) {
+ y0 += (int64_t)(*coef >> 8) * (*data);
+ coef++;
+ data += nch;
+ }
+ *wp = sat_int32(y0 >> qshift);
+ wp++;
+ }
+}
+
+#endif /* 32bit coefficients version */
+
+void src_polyphase_stage_cir(struct src_stage_prm *s)
+{
+ int i;
+ int n;
+ int m;
+ int n_wrap_buf;
+ int n_wrap_fir;
+ int n_min;
+ int32_t *rp;
+ int32_t *wp;
+
+ struct src_state *fir = s->state;
+ struct src_stage *cfg = s->stage;
+ int32_t *fir_delay = fir->fir_delay;
+ int32_t *fir_end = &fir->fir_delay[fir->fir_delay_size];
+ int32_t *out_delay_end = &fir->out_delay[fir->out_delay_size];
+ const void *cp; /* Can be int32_t or int16_t */
+ const size_t out_size = fir->out_delay_size * sizeof(int32_t);
+ const int nch = s->nch;
+ const int nch_x_odm = cfg->odm * nch;
+ const int blk_in_words = nch * cfg->blk_in;
+ const int blk_out_words = nch * cfg->num_of_subfilters;
+ const int fir_length = fir->fir_delay_size;
+ const int rewind = nch * (cfg->blk_in
+ + (cfg->num_of_subfilters - 1) * cfg->idm) - nch;
+ const int nch_x_idm = nch * cfg->idm;
+ const size_t fir_size = fir->fir_delay_size * sizeof(int32_t);
+ const int taps_x_nch = cfg->subfilter_length * nch;
+
+#if SRC_SHORT
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int16_t);
+#else
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int32_t);
+#endif
+
+ for (n = 0; n < s->times; n++) {
+ /* Input data */
+ m = blk_in_words;
+ while (m > 0) {
+ /* Number of words without circular wrap */
+ n_wrap_buf = s->x_end_addr - s->x_rptr;
+ n_wrap_fir = fir->fir_wp - fir->fir_delay + 1;
+ n_min = (n_wrap_fir < n_wrap_buf)
+ ? n_wrap_fir : n_wrap_buf;
+ n_min = (m < n_min) ? m : n_min;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ *fir->fir_wp = *s->x_rptr;
+ fir->fir_wp--;
+ s->x_rptr++;
+ }
+ /* Check for wrap */
+ src_circ_dec_wrap(&fir->fir_wp, fir_delay, fir_size);
+ src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
+ }
+
+ /* Filter */
+ cp = cfg->coefs; /* Reset to 1st coefficient */
+ rp = fir->fir_wp + rewind;
+ src_circ_inc_wrap(&rp, fir_end, fir_size);
+ wp = fir->out_rp;
+ for (i = 0; i < cfg->num_of_subfilters; i++) {
+ fir_filter_generic(rp, cp, wp,
+ fir_delay, fir_end, fir_length,
+ taps_x_nch, cfg->shift, nch);
+ wp += nch_x_odm;
+ cp += subfilter_size;
+ src_circ_inc_wrap(&wp, out_delay_end, out_size);
+ rp -= nch_x_idm; /* Next sub-filter start */
+ src_circ_dec_wrap(&rp, fir_delay, fir_size);
+ }
+
+ /* Output */
+ m = blk_out_words;
+ while (m > 0) {
+ n_wrap_fir = out_delay_end - fir->out_rp;
+ n_wrap_buf = s->y_end_addr - s->y_wptr;
+ n_min = (n_wrap_fir < n_wrap_buf)
+ ? n_wrap_fir : n_wrap_buf;
+ n_min = (m < n_min) ? m : n_min;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ *s->y_wptr = *fir->out_rp;
+ s->y_wptr++;
+ fir->out_rp++;
+ }
+ /* Check wrap */
+ src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
+ src_circ_inc_wrap(&fir->out_rp, out_delay_end,
+ out_size);
+ }
+ }
+}
+
+void src_polyphase_stage_cir_s24(struct src_stage_prm *s)
+{
+ int i;
+ int n;
+ int m;
+ int n_wrap_buf;
+ int n_wrap_fir;
+ int n_min;
+ int32_t *rp;
+ int32_t *wp;
+
+ struct src_state *fir = s->state;
+ struct src_stage *cfg = s->stage;
+ int32_t *fir_delay = fir->fir_delay;
+ int32_t *fir_end = &fir->fir_delay[fir->fir_delay_size];
+ int32_t *out_delay_end = &fir->out_delay[fir->out_delay_size];
+ const void *cp; /* Can be int32_t or int16_t */
+ const size_t out_size = fir->out_delay_size * sizeof(int32_t);
+ const int nch = s->nch;
+ const int nch_x_odm = cfg->odm * nch;
+ const int blk_in_words = nch * cfg->blk_in;
+ const int blk_out_words = nch * cfg->num_of_subfilters;
+ const int fir_length = fir->fir_delay_size;
+ const int rewind = nch * (cfg->blk_in
+ + (cfg->num_of_subfilters - 1) * cfg->idm) - nch;
+ const int nch_x_idm = nch * cfg->idm;
+ const size_t fir_size = fir->fir_delay_size * sizeof(int32_t);
+ const int taps_x_nch = cfg->subfilter_length * nch;
+
+#if SRC_SHORT
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int16_t);
+#else
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int32_t);
+#endif
+
+ for (n = 0; n < s->times; n++) {
+ /* Input data */
+ m = blk_in_words;
+ while (m > 0) {
+ /* Number of words without circular wrap */
+ n_wrap_buf = s->x_end_addr - s->x_rptr;
+ n_wrap_fir = fir->fir_wp - fir->fir_delay + 1;
+ n_min = (n_wrap_fir < n_wrap_buf)
+ ? n_wrap_fir : n_wrap_buf;
+ n_min = (m < n_min) ? m : n_min;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ *fir->fir_wp = *s->x_rptr << 8;
+ fir->fir_wp--;
+ s->x_rptr++;
+ }
+ /* Check for wrap */
+ src_circ_dec_wrap(&fir->fir_wp, fir_delay, fir_size);
+ src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
+ }
+
+ /* Filter */
+ cp = cfg->coefs; /* Reset to 1st coefficient */
+ rp = fir->fir_wp + rewind;
+ src_circ_inc_wrap(&rp, fir_end, fir_size);
+ wp = fir->out_rp;
+ for (i = 0; i < cfg->num_of_subfilters; i++) {
+ fir_filter_generic(rp, cp, wp,
+ fir_delay, fir_end, fir_length,
+ taps_x_nch, cfg->shift, nch);
+ wp += nch_x_odm;
+ cp += subfilter_size;
+ src_circ_inc_wrap(&wp, out_delay_end, out_size);
+ rp -= nch_x_idm; /* Next sub-filter start */
+ src_circ_dec_wrap(&rp, fir_delay, fir_size);
+ }
+
+ /* Output */
+ m = blk_out_words;
+ while (m > 0) {
+ n_wrap_fir = out_delay_end - fir->out_rp;
+ n_wrap_buf = s->y_end_addr - s->y_wptr;
+ n_min = (n_wrap_fir < n_wrap_buf)
+ ? n_wrap_fir : n_wrap_buf;
+ n_min = (m < n_min) ? m : n_min;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ *s->y_wptr = *fir->out_rp >> 8;
+ s->y_wptr++;
+ fir->out_rp++;
+ }
+ /* Check wrap */
+ src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
+ src_circ_inc_wrap(&fir->out_rp, out_delay_end,
+ out_size);
+ }
+ }
+}
+
+#endif
diff --git a/src/audio/src_hifi2ep.c b/src/audio/src_hifi2ep.c
new file mode 100644
index 0000000..0d03ffa
--- /dev/null
+++ b/src/audio/src_hifi2ep.c
@@ -0,0 +1,562 @@
+/*
+ * Copyright (c) 2017, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the Intel Corporation nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Seppo Ingalsuo <seppo.ingalsuo(a)linux.intel.com>
+ *
+ */
+
+/* HiFi EP optimized code parts for SRC */
+
+#include <stdint.h>
+#include <reef/alloc.h>
+#include <reef/audio/format.h>
+#include <reef/math/numbers.h>
+
+#include "src_config.h"
+#include "src.h"
+
+#if SRC_HIFIEP
+
+#include <xtensa/config/defs.h>
+#include <xtensa/tie/xt_hifi2.h>
+
+/* HiFi EP has
+ * 4x 56 bit registers in register file Q
+ * 8x 48 bit registers in register file P
+ */
+
+#if SRC_SHORT /* 16 bit coefficients version */
+
+static inline void fir_filter(ae_q32s *rp, const void *cp, ae_q32s *wp0,
+ const int taps_div_4, const int shift, const int nch)
+{
+ /* This function uses
+ * 2x 56 bit registers Q,
+ * 4x 48 bit registers P
+ * 3x integers
+ * 4x address pointers,
+ */
+ ae_q56s a0;
+ ae_q56s a1;
+ ae_p24x2f data2;
+ ae_p24x2f coef2;
+ ae_p24x2f p0;
+ ae_p24x2f p1;
+ ae_p16x2s *coefp;
+ ae_p24x2f *dp = (ae_p24x2f *)rp;
+ ae_p24x2f *dp0;
+ ae_q32s *wp = wp0;
+ int i;
+ int j;
+ const int inc = sizeof(ae_p24x2f);
+
+ /* 2ch FIR case */
+ if (nch == 2) {
+ /* Move data pointer back by one sample to start from right
+ * channel sample. Discard read value p0.
+ */
+ AE_LP24F_C(p0, dp, -sizeof(ae_p24f));
+
+ /* Reset coefficient pointer and clear accumulator */
+ coefp = (ae_p16x2s *)cp;
+ a0 = AE_ZEROQ56();
+ a1 = AE_ZEROQ56();
+
+ /* Compute FIR filter for current channel with four
+ * taps per every loop iteration. Two coefficients
+ * are loaded simultaneously. Data is read
+ * from interleaved buffer with stride of channels
+ * count.
+ */
+ for (i = 0; i < taps_div_4; i++) {
+ /* Load two coefficients. Coef2_h contains tap *coefp
+ * and coef2_l contains the next tap.
+ */
+ coef2 = AE_LP16X2F_I(coefp, 0);
+ coefp++;
+
+ /* Load two data samples from two channels */
+ AE_LP24X2F_C(p0, dp, inc); /* r0, l0 */
+ AE_LP24X2F_C(p1, dp, inc); /* r1, l1 */
+
+ /* Select to d0 successive left channel samples, to d1
+ * successive right channel samples. Then accumulate
+ * data2_h * coef2_h + data2_l * coef2_l. The Q1.31
+ * data and Q1.15 coefficients are used as 24 bits as
+ * Q1.23 values.
+ */
+ data2 = AE_SELP24_LL(p0, p1);
+ AE_MULAAFP24S_HH_LL(a0, data2, coef2);
+ data2 = AE_SELP24_HH(p0, p1);
+ AE_MULAAFP24S_HH_LL(a1, data2, coef2);
+
+ /* Repeat for next two taps */
+ coef2 = AE_LP16X2F_I(coefp, 0);
+ coefp++;
+ AE_LP24X2F_C(p0, dp, inc); /* r2, l2 */
+ AE_LP24X2F_C(p1, dp, inc); /* r3, l3 */
+ data2 = AE_SELP24_LL(p0, p1);
+ AE_MULAAFP24S_HH_LL(a0, data2, coef2);
+ data2 = AE_SELP24_HH(p0, p1);
+ AE_MULAAFP24S_HH_LL(a1, data2, coef2);
+ }
+
+ /* Scale FIR output with right shifts, round/saturate
+ * to Q1.31, and store 32 bit output.
+ */
+ AE_SQ32F_I(AE_ROUNDSQ32SYM(AE_SRAAQ56(a0, shift)), wp, 0);
+ AE_SQ32F_I(AE_ROUNDSQ32SYM(AE_SRAAQ56(a1, shift)), wp,
+ sizeof(int32_t));
+ return;
+ }
+
+ for (j = 0; j < nch; j++) {
+ /* Copy pointer and advance to next ch with dummy load */
+ dp0 = dp;
+ AE_LP24F_C(p0, dp, -sizeof(ae_p24f));
+
+ /* Reset coefficient pointer and clear accumulator */
+ coefp = (ae_p16x2s *)cp;
+ a0 = AE_ZEROQ56();
+
+ /* Compute FIR filter for current channel with four
+ * taps per every loop iteration. Two coefficients
+ * are loaded simultaneously. Data is read
+ * from interleaved buffer with stride of channels
+ * count.
+ */
+ for (i = 0; i < taps_div_4; i++) {
+ /* Load two coefficients */
+ coef2 = *coefp++;
+
+ /* Load two data samples */
+ AE_LP24F_C(p0, dp0, inc);
+ AE_LP24F_C(p1, dp0, inc);
+
+ /* Pack p0 and p1 to data2_h and data2_l */
+ data2 = AE_SELP24_LL(p0, p1);
+
+ /* Accumulate data2_h * coef2_h + data2_l * coef2_l */
+ AE_MULAAFP24S_HH_LL(a0, data2, coef2);
+
+ /* Repeat for next two filter taps */
+ coef2 = *coefp++;
+ AE_LP24F_C(p0, dp0, inc);
+ AE_LP24F_C(p1, dp0, inc);
+ data2 = AE_SELP24_LL(p0, p1);
+ AE_MULAAFP24S_HH_LL(a0, data2, coef2);
+ }
+
+ /* Scale FIR output with right shifts, round/saturate
+ * to Q1.31, and store 32 bit output. Advance write
+ * pointer to next sample.
+ */
+ AE_SQ32F_I(AE_ROUNDSQ32SYM(AE_SRAAQ56(a0, shift)), wp, 0);
+ wp++;
+ }
+}
+
+#else /* 32bit coefficients version */
+
+static inline void fir_filter(ae_q32s *rp, const void *cp, ae_q32s *wp0,
+ const int taps_div_4, const int shift, const int nch)
+{
+ /* This function uses
+ * 2x 56 bit registers Q,
+ * 4x 48 bit registers P
+ * 3x integers
+ * 4x address pointers,
+ */
+ ae_q56s a0;
+ ae_q56s a1;
+ ae_p24x2f p0;
+ ae_p24x2f p1;
+ ae_p24x2f data2;
+ ae_p24x2f coef2;
+ ae_p24x2f *coefp;
+ ae_p24x2f *dp = (ae_p24x2f *)rp;
+ ae_p24x2f *dp0;
+ ae_q32s *wp = wp0;
+ int i;
+ int j;
+ const int inc = sizeof(ae_p24x2f);
+
+ /* 2ch FIR case */
+ if (nch == 2) {
+ /* Move data pointer back by one sample to start from right
+ * channel sample. Discard read value p0.
+ */
+ AE_LP24F_C(p0, dp, -sizeof(ae_p24f));
+
+ /* Reset coefficient pointer and clear accumulator */
+ coefp = (ae_p24x2f *)cp;
+ a0 = AE_ZEROQ56();
+ a1 = AE_ZEROQ56();
+
+ /* Compute FIR filter for current channel with four
+ * taps per every loop iteration. Two coefficients
+ * are loaded simultaneously. Data is read
+ * from interleaved buffer with stride of channels
+ * count.
+ */
+ for (i = 0; i < taps_div_4; i++) {
+ /* Load two coefficients. Coef2_h contains tap *coefp
+ * and coef2_l contains the next tap.
+ */
+ /* TODO: Ensure coefficients are 64 bits aligned */
+ coef2 = AE_LP24X2F_I(coefp, 0);
+ coefp++;
+
+ /* Load two data samples from two channels */
+ AE_LP24X2F_C(p0, dp, inc); /* r0, l0 */
+ AE_LP24X2F_C(p1, dp, inc); /* r1, l1 */
+
+ /* Select to d0 successive left channel samples, to d1
+ * successive right channel samples.
+ */
+
+ /* Accumulate to m
+ * data2_h * coef2_h + data2_l * coef2_l. The Q1.31
+ * data and Q1.15 coefficients are used as 24 bits as
+ * Q1.23 values.
+ */
+ data2 = AE_SELP24_LL(p0, p1);
+ AE_MULAAFP24S_HH_LL(a0, data2, coef2);
+ data2 = AE_SELP24_HH(p0, p1);
+ AE_MULAAFP24S_HH_LL(a1, data2, coef2);
+
+ /* Repeat for next two taps */
+ coef2 = AE_LP24X2F_I(coefp, 0);
+ coefp++;
+ AE_LP24X2F_C(p0, dp, inc); /* r2, l2 */
+ AE_LP24X2F_C(p1, dp, inc); /* r3, l3 */
+ data2 = AE_SELP24_LL(p0, p1);
+ AE_MULAAFP24S_HH_LL(a0, data2, coef2);
+ data2 = AE_SELP24_HH(p0, p1);
+ AE_MULAAFP24S_HH_LL(a1, data2, coef2);
+ }
+
+ /* Scale FIR output with right shifts, round/saturate
+ * to Q1.31, and store 32 bit output.
+ */
+ AE_SQ32F_I(AE_ROUNDSQ32SYM(AE_SRAAQ56(a0, shift)), wp, 0);
+ AE_SQ32F_I(AE_ROUNDSQ32SYM(AE_SRAAQ56(a1, shift)), wp,
+ sizeof(int32_t));
+ return;
+ }
+
+ for (j = 0; j < nch; j++) {
+ /* Copy pointer and advance to next ch with dummy load */
+ dp0 = dp;
+ AE_LP24F_C(p0, dp, -sizeof(ae_p24f));
+
+ /* Reset coefficient pointer and clear accumulator */
+ coefp = (ae_p24x2f *)cp;
+ a0 = AE_ZEROQ56();
+
+ /* Compute FIR filter for current channel with four
+ * taps per every loop iteration. Two coefficients
+ * are loaded simultaneously. Data is read
+ * from interleaved buffer with stride of channels
+ * count.
+ */
+ for (i = 0; i < taps_div_4; i++) {
+ /* Load two coefficients */
+ coef2 = *coefp++;
+
+ /* Load two data samples and place them to L and H of
+ * data2.
+ */
+ AE_LP24F_C(p0, dp0, inc);
+ AE_LP24F_C(p1, dp0, inc);
+ data2 = AE_SELP24_LH(p0, p1);
+
+ /* Accumulate to m
+ * data2_h * coef2_h + data2_l * coef2_l. The Q1.31
+ * data and coefficients are used as the most
+ * significant 24 bits as Q1.23 values.
+ */
+ AE_MULAAFP24S_HH_LL(a0, data2, coef2);
+
+ /* Repeat for next two filter taps */
+ coef2 = *coefp++;
+ AE_LP24F_C(p0, dp0, inc);
+ AE_LP24F_C(p1, dp0, inc);
+ data2 = AE_SELP24_LH(p0, p1);
+ AE_MULAAFP24S_HH_LL(a0, data2, coef2);
+ }
+
+ /* Scale FIR output with right shifts, round/saturate
+ * to Q1.31, and store 32 bit output. Advance write
+ * pointer to next sample.
+ */
+ AE_SQ32F_I(AE_ROUNDSQ32SYM(AE_SRAAQ56(a0, shift)), wp, 0);
+ wp++;
+ }
+}
+#endif /* 32bit coefficients version */
+
+void src_polyphase_stage_cir(struct src_stage_prm *s)
+{
+ /* This function uses
+ * 1x 56 bit registers Q,
+ * 0x 48 bit registers P,
+ * 16x integers
+ * 7x address pointers,
+ */
+ ae_q56s q;
+ ae_q32s *rp;
+ ae_q32s *wp;
+ int i;
+ int n;
+ int m;
+ int n_wrap_buf;
+ int n_min;
+ struct src_state *fir = s->state;
+ struct src_stage *cfg = s->stage;
+ int32_t *fir_end = &fir->fir_delay[fir->fir_delay_size];
+ int32_t *out_delay_end = &fir->out_delay[fir->out_delay_size];
+ const void *cp; /* Can be int32_t or int16_t */
+ const size_t out_size = fir->out_delay_size * sizeof(int32_t);
+ const int nch = s->nch;
+ const int nch_x_odm = cfg->odm * nch;
+ const int blk_in_words = nch * cfg->blk_in;
+ const int blk_out_words = nch * cfg->num_of_subfilters;
+ const int sz = sizeof(int32_t);
+ const int n_sz = -sizeof(int32_t);
+ const int rewind_sz = sz * (nch * (cfg->blk_in
+ + (cfg->num_of_subfilters - 1) * cfg->idm) - nch);
+ const int nch_x_idm_sz = -nch * cfg->idm * sizeof(int32_t);
+ const int taps_div_4 = cfg->subfilter_length >> 2;
+
+#if SRC_SHORT
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int16_t);
+#else
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int32_t);
+#endif
+
+ for (n = 0; n < s->times; n++) {
+ /* Input data to filter */
+ m = blk_in_words;
+
+ /* Setup circular buffer for FIR input data delay */
+ AE_SETCBEGIN0(fir->fir_delay);
+ AE_SETCEND0(fir_end);
+
+ while (m > 0) {
+ /* Number of words until circular wrap */
+ n_wrap_buf = s->x_end_addr - s->x_rptr;
+ n_min = (m < n_wrap_buf) ? m : n_wrap_buf;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ /* Load 32 bits sample to accumulator */
+ q = AE_LQ32F_I((ae_q32s *)s->x_rptr++, 0);
+
+ /* Store to circular buffer, advance pointer */
+ AE_SQ32F_C(q, (ae_q32s *)fir->fir_wp, n_sz);
+ }
+
+ /* Check for wrap */
+ src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
+ }
+
+ /* Do filter */
+ cp = cfg->coefs; /* Reset to 1st coefficient */
+ rp = (ae_q32s *)fir->fir_wp;
+
+ /* Do circular modification to pointer rp by amount of
+ * rewind to to data start. Loaded value q is discarded.
+ */
+ AE_LQ32F_C(q, (ae_q32s *)rp, rewind_sz);
+
+ /* Reset FIR write pointer and compute all polyphase
+ * sub-filters.
+ */
+ wp = (ae_q32s *)fir->out_rp;
+ for (i = 0; i < cfg->num_of_subfilters; i++) {
+ fir_filter(rp, cp, wp, taps_div_4, cfg->shift, nch);
+ wp += nch_x_odm;
+ cp += subfilter_size;
+ src_circ_inc_wrap((int32_t **)&wp, out_delay_end,
+ out_size);
+
+ /* Circular advance pointer rp by number of
+ * channels x input delay multiplier. Loaded value q
+ * is discarded.
+ */
+ AE_LQ32F_C(q, rp, nch_x_idm_sz);
+ }
+
+ /* Output */
+
+ /* Setup circular buffer for SRC out delay access */
+ AE_SETCBEGIN0(fir->out_delay);
+ AE_SETCEND0(out_delay_end);
+ m = blk_out_words;
+ while (m > 0) {
+ n_wrap_buf = s->y_end_addr - s->y_wptr;
+ n_min = (m < n_wrap_buf) ? m : n_wrap_buf;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ /* Circular load followed by linear store */
+ AE_LQ32F_C(q, (ae_q32s *)fir->out_rp, sz);
+ AE_SQ32F_I(q, (ae_q32s *)s->y_wptr, 0);
+ s->y_wptr++;
+ }
+ /* Check wrap */
+ src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
+ }
+ }
+}
+
+void src_polyphase_stage_cir_s24(struct src_stage_prm *s)
+{
+ /* This function uses
+ * 1x 56 bit registers Q,
+ * 0x 48 bit registers P,
+ * 16x integers
+ * 7x address pointers,
+ */
+ ae_q56s q;
+ ae_q32s *rp;
+ ae_q32s *wp;
+ int i;
+ int n;
+ int m;
+ int n_wrap_buf;
+ int n_min;
+ struct src_state *fir = s->state;
+ struct src_stage *cfg = s->stage;
+ int32_t *fir_end = &fir->fir_delay[fir->fir_delay_size];
+ int32_t *out_delay_end = &fir->out_delay[fir->out_delay_size];
+ const void *cp; /* Can be int32_t or int16_t */
+ const size_t out_size = fir->out_delay_size * sizeof(int32_t);
+ const int nch = s->nch;
+ const int nch_x_odm = cfg->odm * nch;
+ const int blk_in_words = nch * cfg->blk_in;
+ const int blk_out_words = nch * cfg->num_of_subfilters;
+ const int sz = sizeof(int32_t);
+ const int n_sz = -sizeof(int32_t);
+ const int rewind_sz = sz * (nch * (cfg->blk_in
+ + (cfg->num_of_subfilters - 1) * cfg->idm) - nch);
+ const int nch_x_idm_sz = -nch * cfg->idm * sizeof(int32_t);
+ const int taps_div_4 = cfg->subfilter_length >> 2;
+
+#if SRC_SHORT
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int16_t);
+#else
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int32_t);
+#endif
+
+ for (n = 0; n < s->times; n++) {
+ /* Input data to filter */
+ m = blk_in_words;
+
+ /* Setup circular buffer for FIR input data delay */
+ AE_SETCBEGIN0(fir->fir_delay);
+ AE_SETCEND0(fir_end);
+
+ while (m > 0) {
+ /* Number of words without circular wrap */
+ n_wrap_buf = s->x_end_addr - s->x_rptr;
+ n_min = (m < n_wrap_buf) ? m : n_wrap_buf;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ /* Load 32 bits sample to accumulator
+ * and left shift by 8, advance read
+ * pointer.
+ */
+ q = AE_SLLIQ56(AE_LQ32F_I(
+ (ae_q32s *)s->x_rptr++, 0), 8);
+
+ /* Store to circular buffer, advance
+ * write pointer.
+ */
+ AE_SQ32F_C(q, (ae_q32s *)fir->fir_wp, n_sz);
+ }
+
+ /* Check for wrap */
+ src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
+ }
+
+ /* Do filter */
+ cp = cfg->coefs; /* Reset to 1st coefficient */
+ rp = (ae_q32s *)fir->fir_wp;
+
+ /* Do circular modification to pointer rp by amount of
+ * rewind to to data start. Loaded value q is discarded.
+ */
+ AE_LQ32F_C(q, (ae_q32s *)rp, rewind_sz);
+
+ /* Reset FIR output write pointer and compute all polyphase
+ * sub-filters.
+ */
+ wp = (ae_q32s *)fir->out_rp;
+ for (i = 0; i < cfg->num_of_subfilters; i++) {
+ fir_filter(rp, cp, wp, taps_div_4, cfg->shift, nch);
+ wp += nch_x_odm;
+ cp += subfilter_size;
+ src_circ_inc_wrap((int32_t **)&wp, out_delay_end,
+ out_size);
+
+ /* Circular advance pointer rp by number of
+ * channels x input delay multiplier. Loaded value q
+ * is discarded.
+ */
+ AE_LQ32F_C(q, rp, nch_x_idm_sz);
+ }
+
+ /* Output */
+
+ /* Setup circular buffer for SRC out delay access */
+ AE_SETCBEGIN0(fir->out_delay);
+ AE_SETCEND0(out_delay_end);
+ m = blk_out_words;
+ while (m > 0) {
+ n_wrap_buf = s->y_end_addr - s->y_wptr;
+ n_min = (m < n_wrap_buf) ? m : n_wrap_buf;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ /* Circular load for 32 bit sample,
+ * advance pointer.
+ */
+ AE_LQ32F_C(q, (ae_q32s *)fir->out_rp, sz);
+
+ /* Store value as shifted right by 8 for
+ * sign extended 24 bit value, advance pointer.
+ */
+ AE_SQ32F_I(AE_SRAIQ56(q, 8),
+ (ae_q32s *)s->y_wptr, 0);
+ s->y_wptr++;
+ }
+ /* Check wrap */
+ src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
+ }
+ }
+}
+
+#endif
diff --git a/src/audio/src_hifi3.c b/src/audio/src_hifi3.c
new file mode 100644
index 0000000..96d3c99
--- /dev/null
+++ b/src/audio/src_hifi3.c
@@ -0,0 +1,567 @@
+/*
+ * Copyright (c) 2016, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the Intel Corporation nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: Seppo Ingalsuo <seppo.ingalsuo(a)linux.intel.com>
+ *
+ */
+
+/* HiFi3 optimized code parts for SRC */
+
+#include <stdint.h>
+#include <reef/alloc.h>
+#include <reef/audio/format.h>
+#include <reef/math/numbers.h>
+
+#include "src_config.h"
+#include "src.h"
+
+#if SRC_HIFI3
+
+#include <xtensa/config/defs.h>
+#include <xtensa/tie/xt_hifi3.h>
+
+/* HiFi3 has
+ * 16x 64 bit registers in register file AE_DR
+ */
+
+#if SRC_SHORT /* 16 bit coefficients version */
+
+static inline void fir_filter(ae_f32 *rp, const void *cp, ae_f32 *wp0,
+ const int taps_div_4, const int shift, const int nch)
+{
+ /* This function uses
+ * 6x 64 bit registers
+ * 3x integers
+ * 5x address pointers,
+ */
+ ae_f64 a0;
+ ae_f64 a1;
+ ae_valign u;
+ ae_f16x4 coef4;
+ ae_f32x2 d0;
+ ae_f32x2 d1;
+ ae_f32x2 data2;
+ ae_f16x4 *coefp;
+ ae_f32x2 *dp;
+ ae_f32 *dp0;
+ ae_f32 *dp1;
+ int i;
+ int j;
+ ae_f32 *wp = wp0;
+ const int inc = nch * sizeof(int32_t);
+
+ if (nch == 2) {
+ /* Move data pointer back by one sample to start from right
+ * channel sample. Discard read value p0.
+ */
+ dp = (ae_f32x2 *)rp;
+ AE_L32_XC(d0, (ae_f32 *)dp, -sizeof(ae_f32));
+
+ /* Reset coefficient pointer and clear accumulator */
+ coefp = (ae_f16x4 *)cp;
+ a0 = AE_ZERO64();
+ a1 = AE_ZERO64();
+
+ /* Compute FIR filter for current channel with four
+ * taps per every loop iteration. Four coefficients
+ * are loaded simultaneously. Data is read
+ * from interleaved buffer with stride of channels
+ * count.
+ */
+ for (i = 0; i < taps_div_4; i++) {
+ /* Load four coefficients */
+ AE_LA16X4_IP(coef4, u, coefp);
+
+ /* Load two data samples from two channels */
+ AE_L32X2_XC(d0, dp, inc); /* r0, l0 */
+ AE_L32X2_XC(d1, dp, inc); /* r1, l1 */
+
+ /* Select to data2 sequential samples from a channel
+ * and then accumulate to a0 and a1
+ * data2_h * coef4_3 + data2_l * coef4_2.
+ * The data is 32 bits Q1.31 and coefficient 16 bits
+ * Q1.15. The accumulators are Q17.47.
+ */
+ data2 = AE_SEL32_LL(d0, d1); /* l0, l1 */
+ AE_MULAAFD32X16_H3_L2(a0, data2, coef4);
+ data2 = AE_SEL32_HH(d0, d1); /* r0, r1 */
+ AE_MULAAFD32X16_H3_L2(a1, data2, coef4);
+
+ /* Load two data samples from two channels */
+ AE_L32X2_XC(d0, dp, inc); /* r2, l2 */
+ AE_L32X2_XC(d1, dp, inc); /* r3, l3 */
+
+ /* Accumulate
+ * data2_h * coef4_1 + data2_l * coef4_0.
+ */
+ data2 = AE_SEL32_LL(d0, d1); /* l2, l3 */
+ AE_MULAAFD32X16_H1_L0(a0, data2, coef4);
+ data2 = AE_SEL32_HH(d0, d1); /* r2, r3 */
+ AE_MULAAFD32X16_H1_L0(a1, data2, coef4);
+ }
+
+ /* Scale FIR output with right shifts, round/saturate
+ * to Q1.31, and store 32 bit output.
+ */
+ AE_S32_L_XP(AE_ROUND32F48SSYM(AE_SRAA64(a0, shift)), wp,
+ sizeof(int32_t));
+ AE_S32_L_XP(AE_ROUND32F48SSYM(AE_SRAA64(a1, shift)), wp,
+ sizeof(int32_t));
+
+ return;
+ }
+
+ dp1 = (ae_f32 *)rp;
+ for (j = 0; j < nch; j++) {
+ /* Copy pointer and advance to next ch with dummy load */
+ dp0 = dp1;
+ AE_L32_XC(d0, dp1, -sizeof(ae_f32));
+
+ /* Reset coefficient pointer and clear accumulator */
+ coefp = (ae_f16x4 *)cp;
+ a0 = AE_ZERO64();
+
+ /* Compute FIR filter for current channel with four
+ * taps per every loop iteration. Data is read from
+ * interleaved buffer with stride of channels count.
+ */
+ for (i = 0; i < taps_div_4; i++) {
+ /* Load four coefficients */
+ AE_LA16X4_IP(coef4, u, coefp);
+
+ /* Load two data samples, place to high and
+ * low of data2.
+ */
+ AE_L32_XC(d0, dp0, inc);
+ AE_L32_XC(d1, dp0, inc);
+ data2 = AE_SEL32_LL(d0, d1);
+
+ /* Accumulate
+ * data2_h * coef4_3 + data2_l* coef4_2.
+ * The data is 32 bits Q1.31 and coefficient 16 bits
+ * Q1.15. The accumulator is Q17.47.
+ */
+ AE_MULAAFD32X16_H3_L2(a0, data2, coef4);
+
+ /* Repeat with next two samples */
+ AE_L32_XC(d0, dp0, inc);
+ AE_L32_XC(d1, dp0, inc);
+ data2 = AE_SEL32_LL(d0, d1);
+
+ /* Accumulate
+ * data2_h * coef4_1 + data2_l * coef4_0.
+ */
+ AE_MULAAFD32X16_H1_L0(a0, data2, coef4);
+ }
+
+ /* Scale FIR output with right shifts, round/saturate Q17.47
+ * to Q1.31, and store 32 bit output. Advance write
+ * pointer to next sample.
+ */
+ AE_S32_L_XP(AE_ROUND32F48SSYM(AE_SRAA64(a0, shift)), wp,
+ sizeof(int32_t));
+ }
+}
+
+#else /* 32bit coefficients version */
+
+static inline void fir_filter(ae_f32 *rp, const void *cp, ae_f32 *wp0,
+ const int taps_div_4, const int shift, const int nch)
+{
+ /* This function uses
+ * 6x 64 bit registers
+ * 3x integers
+ * 5x address pointers,
+ */
+ ae_f64 a0;
+ ae_f64 a1;
+ ae_f24x2 data2;
+ ae_f24x2 coef2;
+ ae_f24x2 d0;
+ ae_f24x2 d1;
+ ae_f24x2 *coefp;
+ ae_f24x2 *dp;
+ ae_f24 *dp1;
+ ae_f24 *dp0;
+ int i;
+ int j;
+ ae_f32 *wp = wp0;
+ const int inc = nch * sizeof(int32_t);
+
+ if (nch == 2) {
+ /* Move data pointer back by one sample to start from right
+ * channel sample. Discard read value p0.
+ */
+ dp = (ae_f24x2 *)rp;
+ AE_L32F24_XC(d0, (ae_f24 *)dp, -sizeof(ae_f24));
+
+ /* Reset coefficient pointer and clear accumulator */
+ coefp = (ae_f24x2 *)cp;
+ a0 = AE_ZERO64();
+ a1 = AE_ZERO64();
+
+ /* Compute FIR filter for current channel with four
+ * taps per every loop iteration. Two coefficients
+ * are loaded simultaneously. Data is read
+ * from interleaved buffer with stride of channels
+ * count.
+ */
+ for (i = 0; i < taps_div_4; i++) {
+ /* Load two coefficients. Coef2_h contains tap *coefp
+ * and coef2_l contains the next tap.
+ */
+ /* TODO: Ensure coefficients are 64 bits aligned */
+ AE_L32X2F24_IP(coef2, coefp, sizeof(ae_f24x2));
+
+ /* Load two data samples from two channels */
+ AE_L32X2F24_XC(d0, dp, inc); /* r0, l0 */
+ AE_L32X2F24_XC(d1, dp, inc); /* r1, l1 */
+
+ /* Select to d0 successive left channel samples, to d1
+ * successive right channel samples. Then Accumulate
+ * to a0 and a1
+ * data2_h * coef2_h + data2_l * coef2_l. The Q1.31
+ * data and Q1.15 coefficients are used as 24 bits as
+ * Q1.23 values.
+ */
+ data2 = AE_SELP24_LL(d0, d1);
+ AE_MULAAFP24S_HH_LL(a0, data2, coef2);
+ data2 = AE_SELP24_HH(d0, d1);
+ AE_MULAAFP24S_HH_LL(a1, data2, coef2);
+
+ /* Repeat for next two taps */
+ AE_L32X2F24_IP(coef2, coefp, sizeof(ae_f24x2));
+ AE_L32X2F24_XC(d0, dp, inc); /* r2, l2 */
+ AE_L32X2F24_XC(d1, dp, inc); /* r3, l3 */
+ data2 = AE_SELP24_LL(d0, d1);
+ AE_MULAAFP24S_HH_LL(a0, data2, coef2);
+ data2 = AE_SELP24_HH(d0, d1);
+ AE_MULAAFP24S_HH_LL(a1, data2, coef2);
+ }
+
+ /* Scale FIR output with right shifts, round/saturate
+ * to Q1.31, and store 32 bit output.
+ */
+ AE_S32_L_XP(AE_ROUND32F48SSYM(AE_SRAA64(a0, shift)), wp,
+ sizeof(int32_t));
+ AE_S32_L_XP(AE_ROUND32F48SSYM(AE_SRAA64(a1, shift)), wp,
+ sizeof(int32_t));
+
+ return;
+ }
+
+ dp1 = (ae_f24 *)rp;
+ for (j = 0; j < nch; j++) {
+ /* Copy pointer and advance to next ch with dummy load */
+ dp0 = dp1;
+ AE_L32F24_XC(data2, dp1, -sizeof(ae_f24));
+
+ /* Reset coefficient pointer and clear accumulator */
+ coefp = (ae_f24x2 *)cp;
+ a0 = AE_ZERO64();
+
+ /* Compute FIR filter for current channel with four
+ * taps per every loop iteration. Data is read from
+ * interleaved buffer with stride of channels count.
+ */
+ for (i = 0; i < taps_div_4; i++) {
+ /* Load two coefficients */
+ coef2 = *coefp++;
+
+ /* Load two data samples, place to high and
+ * low of data2.
+ */
+ AE_L32F24_XC(d0, dp0, inc);
+ AE_L32F24_XC(d1, dp0, inc);
+ data2 = AE_SELP24_LL(d0, d1);
+
+ /* Accumulate to data2_h * coef2_h +
+ * data2_l*coef2_l. The Q1.31 bit data is used
+ * as Q1.23 from MSB side bits of the 32 bit
+ * word. The accumulator m is Q17.47.
+ */
+ AE_MULAAFD24_HH_LL(a0, data2, coef2);
+
+ /* Repeat the same for next two filter taps */
+ coef2 = *coefp++;
+ AE_L32F24_XC(d0, dp0, inc);
+ AE_L32F24_XC(d1, dp0, inc);
+ data2 = AE_SELP24_LL(d0, d1);
+ AE_MULAAFD24_HH_LL(a0, data2, coef2);
+ }
+
+ /* Scale FIR output with right shifts, round/saturate Q17.47
+ * to Q1.31, and store 32 bit output. Advance write
+ * pointer to next sample.
+ */
+ AE_S32_L_XP(AE_ROUND32F48SSYM(AE_SRAA64(a0, shift)), wp,
+ sizeof(int32_t));
+ }
+}
+
+#endif /* 32bit coefficients version */
+
+void src_polyphase_stage_cir(struct src_stage_prm *s)
+{
+ /* This function uses
+ * 1x 64 bit registers
+ * 16x integers
+ * 7x address pointers,
+ */
+ ae_int32x2 q;
+ ae_f32 *rp;
+ ae_f32 *wp;
+ int i;
+ int n;
+ int m;
+ int n_wrap_buf;
+ int n_min;
+ struct src_state *fir = s->state;
+ struct src_stage *cfg = s->stage;
+ int32_t *fir_end = &fir->fir_delay[fir->fir_delay_size];
+ int32_t *out_delay_end = &fir->out_delay[fir->out_delay_size];
+ const void *cp; /* Can be int32_t or int16_t */
+ const size_t out_size = fir->out_delay_size * sizeof(int32_t);
+ const int nch = s->nch;
+ const int nch_x_odm = cfg->odm * nch;
+ const int blk_in_words = nch * cfg->blk_in;
+ const int blk_out_words = nch * cfg->num_of_subfilters;
+ const int sz = sizeof(int32_t);
+ const int n_sz = -sizeof(int32_t);
+ const int rewind_sz = sz * (nch * (cfg->blk_in
+ + (cfg->num_of_subfilters - 1) * cfg->idm) - nch);
+ const int nch_x_idm_sz = -nch * cfg->idm * sizeof(int32_t);
+ const int taps_div_4 = cfg->subfilter_length >> 2;
+
+#if SRC_SHORT
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int16_t);
+#else
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int32_t);
+#endif
+
+ for (n = 0; n < s->times; n++) {
+ /* Input data to filter */
+ m = blk_in_words;
+
+ /* Setup circular buffer for FIR input data delay */
+ AE_SETCBEGIN0(fir->fir_delay);
+ AE_SETCEND0(fir_end);
+
+ while (m > 0) {
+ /* Number of words until circular wrap */
+ n_wrap_buf = s->x_end_addr - s->x_rptr;
+ n_min = (m < n_wrap_buf) ? m : n_wrap_buf;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ /* Load 32 bits sample to accumulator,
+ * advance pointer.
+ */
+ AE_L32_XP(q, (ae_int32 *)s->x_rptr, sz);
+
+ /* Store to circular buffer, advance pointer */
+ AE_S32_L_XC(q, (ae_int32 *)fir->fir_wp, n_sz);
+ }
+
+ /* Check for wrap */
+ src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
+ }
+
+ /* Do filter */
+ cp = cfg->coefs; /* Reset to 1st coefficient */
+ rp = (ae_f32 *)fir->fir_wp;
+
+ /* Do circular modification to pointer rp by amount of
+ * rewind to to data start. Loaded value q is discarded.
+ */
+ AE_L32_XC(q, rp, rewind_sz);
+
+ /* Reset FIR write pointer and compute all polyphase
+ * sub-filters.
+ */
+ wp = (ae_f32 *)fir->out_rp;
+ for (i = 0; i < cfg->num_of_subfilters; i++) {
+ fir_filter(rp, cp, wp, taps_div_4, cfg->shift, nch);
+ wp += nch_x_odm;
+ cp += subfilter_size;
+ src_circ_inc_wrap((int32_t **)&wp, out_delay_end,
+ out_size);
+
+ /* Circular advance pointer rp by number of
+ * channels x input delay multiplier. Loaded value q
+ * is discarded.
+ */
+ AE_L32_XC(q, rp, nch_x_idm_sz);
+ }
+
+ /* Output */
+
+ /* Setup circular buffer for SRC out delay access */
+ AE_SETCBEGIN0(fir->out_delay);
+ AE_SETCEND0(out_delay_end);
+ m = blk_out_words;
+ while (m > 0) {
+ n_wrap_buf = s->y_end_addr - s->y_wptr;
+ n_min = (m < n_wrap_buf) ? m : n_wrap_buf;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ /* Circular load followed by linear store,
+ * advance read and write pointers.
+ */
+ AE_L32_XC(q, (ae_int32 *)fir->out_rp, sz);
+ AE_S32_L_XP(q, (ae_int32 *)s->y_wptr, sz);
+ }
+
+ /* Check wrap */
+ src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
+ }
+ }
+}
+
+void src_polyphase_stage_cir_s24(struct src_stage_prm *s)
+{
+ /* This function uses
+ * 1x 64 bit registers
+ * 16x integers
+ * 7x address pointers,
+ */
+ ae_int32x2 q;
+ ae_f32 *rp;
+ ae_f32 *wp;
+ int i;
+ int n;
+ int m;
+ int n_wrap_buf;
+ int n_min;
+
+ struct src_state *fir = s->state;
+ struct src_stage *cfg = s->stage;
+ int32_t *fir_end = &fir->fir_delay[fir->fir_delay_size];
+ int32_t *out_delay_end = &fir->out_delay[fir->out_delay_size];
+ const void *cp; /* Can be int32_t or int16_t */
+ const size_t out_size = fir->out_delay_size * sizeof(int32_t);
+ const int nch = s->nch;
+ const int nch_x_odm = cfg->odm * nch;
+ const int blk_in_words = nch * cfg->blk_in;
+ const int blk_out_words = nch * cfg->num_of_subfilters;
+ const int sz = sizeof(int32_t);
+ const int n_sz = -sizeof(int32_t);
+ const int rewind_sz = sz * (nch * (cfg->blk_in
+ + (cfg->num_of_subfilters - 1) * cfg->idm) - nch);
+ const int nch_x_idm_sz = -nch * cfg->idm * sizeof(int32_t);
+ const int taps_div_4 = cfg->subfilter_length >> 2;
+
+#if SRC_SHORT
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int16_t);
+#else
+ const size_t subfilter_size = cfg->subfilter_length * sizeof(int32_t);
+#endif
+
+ for (n = 0; n < s->times; n++) {
+ /* Input data */
+ m = blk_in_words;
+
+ /* Setup circular buffer for FIR input data delay */
+ AE_SETCBEGIN0(fir->fir_delay);
+ AE_SETCEND0(fir_end);
+
+ while (m > 0) {
+ /* Number of words without circular wrap */
+ n_wrap_buf = s->x_end_addr - s->x_rptr;
+ n_min = (m < n_wrap_buf) ? m : n_wrap_buf;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ /* Load 32 bits sample to accumulator
+ * and left shift by 8, advance read
+ * pointer.
+ */
+ AE_L32_XP(q, (ae_int32 *)s->x_rptr, sz);
+ AE_S32_L_XC(AE_SLAI32(q, 8),
+ (ae_int32 *)fir->fir_wp, n_sz);
+ }
+
+ /* Check for wrap */
+ src_circ_inc_wrap(&s->x_rptr, s->x_end_addr, s->x_size);
+ }
+
+ /* Do filter */
+ cp = cfg->coefs; /* Reset to 1st coefficient */
+ rp = (ae_f32 *)fir->fir_wp;
+
+ /* Do circular modification to pointer rp by amount of
+ * rewind to to data start. Loaded value q is discarded.
+ */
+ AE_L32_XC(q, rp, rewind_sz);
+
+ /* Reset FIR output write pointer and compute all polyphase
+ * sub-filters.
+ */
+ wp = (ae_f32 *)fir->out_rp;
+ for (i = 0; i < cfg->num_of_subfilters; i++) {
+ fir_filter(rp, cp, wp, taps_div_4, cfg->shift, nch);
+ wp += nch_x_odm;
+ cp += subfilter_size;
+ src_circ_inc_wrap((int32_t **)&wp, out_delay_end,
+ out_size);
+
+ /* Circular advance pointer rp by number of
+ * channels x input delay multiplier. Loaded value q
+ * is discarded.
+ */
+ AE_L32_XC(q, rp, nch_x_idm_sz);
+ }
+
+ /* Output */
+
+ /* Setup circular buffer for SRC out delay access */
+ AE_SETCBEGIN0(fir->out_delay);
+ AE_SETCEND0(out_delay_end);
+ m = blk_out_words;
+ while (m > 0) {
+ n_wrap_buf = s->y_end_addr - s->y_wptr;
+ n_min = (m < n_wrap_buf) ? m : n_wrap_buf;
+ m -= n_min;
+ for (i = 0; i < n_min; i++) {
+ /* Circular load for 32 bit sample,
+ * advance read pointer.
+ */
+ AE_L32_XC(q, (ae_int32 *)fir->out_rp, sz);
+
+ /* Store value as shifted right by 8
+ * for sign extended 24 bit value,
+ * advance write pointer.
+ */
+ AE_S32_L_XP(AE_SRAI32(q, 8),
+ (ae_int32 *)s->y_wptr, sz);
+ }
+
+ /* Check wrap */
+ src_circ_inc_wrap(&s->y_wptr, s->y_end_addr, s->y_size);
+ }
+ }
+}
+
+#endif
--
2.14.1
3
9
09 Mar '18
From: Yan Wang <yan.wang(a)linux.intel.com>
The channel numbers of playback and capture are reversed on CNL.
The channel number of playback is 9.
The channel number of capture is 7.
Signed-off-by: Yan Wang <yan.wang(a)linux.intel.com>
---
src/platform/cannonlake/dma.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/platform/cannonlake/dma.c b/src/platform/cannonlake/dma.c
index 4c488c1..59e0fc7 100644
--- a/src/platform/cannonlake/dma.c
+++ b/src/platform/cannonlake/dma.c
@@ -134,7 +134,7 @@ static struct dma dma[] = {
.plat_data = {
.id = DMA_HOST_IN_DMAC,
.base = GTW_HOST_IN_STREAM_BASE(0),
- .channels = 9,
+ .channels = 7,
.irq = IRQ_EXT_HOST_DMA_IN_LVL3(0, 0),
},
.ops = &hda_host_dma_ops,
@@ -143,7 +143,7 @@ static struct dma dma[] = {
.plat_data = {
.id = DMA_HOST_OUT_DMAC,
.base = GTW_HOST_OUT_STREAM_BASE(0),
- .channels = 7,
+ .channels = 9,
.irq = IRQ_EXT_HOST_DMA_OUT_LVL3(0, 0),
},
.ops = &hda_host_dma_ops,
--
2.14.3
3
3
From: Yan Wang <yan.wang(a)linux.intel.com>
Work queue cannot work rightly because it uses wrong timer setting.
Based on Keyon's comments, use SSP CLK intead of CPU CLK.
It is confirmed on my CNL platform.
Signed-off-by: Yan Wang <yan.wang(a)linux.intel.com>
---
src/platform/cannonlake/platform.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/platform/cannonlake/platform.c b/src/platform/cannonlake/platform.c
index c49dcb8..c9449ed 100644
--- a/src/platform/cannonlake/platform.c
+++ b/src/platform/cannonlake/platform.c
@@ -129,8 +129,8 @@ static struct work_queue_timesource platform_generic_queue = {
.id = TIMER3, /* external timer */
.irq = IRQ_EXT_TSTAMP0_LVL2(0),
},
- .clk = CLK_CPU,
- .notifier = NOTIFIER_ID_CPU_FREQ,
+ .clk = CLK_SSP,
+ .notifier = NOTIFIER_ID_SSP_FREQ,
.timer_set = platform_timer_set,
.timer_clear = platform_timer_clear,
.timer_get = platform_timer_get,
--
2.14.3
4
3
09 Mar '18
missing assignment of 'ret' variable, fix
Detected with sonarcloud.
Fixes: 4252c49d537 ('pipeline: Add XRUN state and handler within pipeline')
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
---
src/audio/pipeline.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/audio/pipeline.c b/src/audio/pipeline.c
index 728b64d..c7f8aed 100644
--- a/src/audio/pipeline.c
+++ b/src/audio/pipeline.c
@@ -1063,14 +1063,14 @@ static int pipeline_xrun_recover(struct pipeline *p)
p->xrun_bytes = 0;
/* prepare the pipeline */
- pipeline_prepare(p, p->source_comp);
+ ret = pipeline_prepare(p, p->source_comp);
if (ret < 0) {
trace_pipe_error("px1");
return ret;
}
/* restart pipeline comps */
- pipeline_cmd(p, p->source_comp, COMP_CMD_START, NULL);
+ ret = pipeline_cmd(p, p->source_comp, COMP_CMD_START, NULL);
if (ret < 0) {
trace_pipe_error("px2");
return ret;
--
2.14.1
2
1
[Sound-open-firmware] [PATCH 2/2] cnl: dma: refine dma interrupt processing on cnl
by Rander Wang 09 Mar '18
by Rander Wang 09 Mar '18
09 Mar '18
On cnl, all the dma share the same interrupt pin, so
all the dma controller need to be checked in interrupt
function.
Signed-off-by: Rander Wang <rander.wang(a)linux.intel.com>
---
src/drivers/dw-dma.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/src/drivers/dw-dma.c b/src/drivers/dw-dma.c
index f736679..a06087e 100644
--- a/src/drivers/dw-dma.c
+++ b/src/drivers/dw-dma.c
@@ -1136,6 +1136,26 @@ static void dw_dma_irq_handler(void *data)
}
}
+/*All the dma share the same interrupt on CNL, so check each dma*/
+/*controller when interrupt coming, then do the work */
+static void dw_dma_irq_cnl(void *data)
+{
+ struct dma *dma;
+ uint32_t status_intr;
+
+ /*check interrupt status of DMA controller 0*/
+ dma = dma_get(DMA_GP_LP_DMAC0);
+ status_intr = dw_read(dma, DW_INTR_STATUS);
+ if (status_intr)
+ dw_dma_irq_handler(dma);
+
+ /*check interrupt status of DMA controller 1*/
+ dma = dma_get(DMA_GP_LP_DMAC1);
+ status_intr = dw_read(dma, DW_INTR_STATUS);
+ if (status_intr)
+ dw_dma_irq_handler(dma);
+}
+
static int dw_dma_probe(struct dma *dma)
{
struct dma_pdata *dw_pdata;
@@ -1157,7 +1177,11 @@ static int dw_dma_probe(struct dma *dma)
}
/* register our IRQ handler */
+#ifdef CONFIG_CANNONLAKE
+ interrupt_register(dma_irq(dma), dw_dma_irq_cnl, dma);
+#else
interrupt_register(dma_irq(dma), dw_dma_irq_handler, dma);
+#endif
interrupt_enable(dma_irq(dma));
return 0;
--
2.14.1
3
8
[Sound-open-firmware] [PATCH] clk: add missing break in switch block
by Pierre-Louis Bossart 09 Mar '18
by Pierre-Louis Bossart 09 Mar '18
09 Mar '18
Make static analyzers happy by adding a break.
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
---
src/platform/baytrail/clk.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/platform/baytrail/clk.c b/src/platform/baytrail/clk.c
index fc1cd9d..fc535ca 100644
--- a/src/platform/baytrail/clk.c
+++ b/src/platform/baytrail/clk.c
@@ -214,6 +214,7 @@ uint32_t clock_set_freq(int clock, uint32_t hz)
/* tell anyone interested we have now changed CPU freq */
notifier_event(NOTIFIER_ID_SSP_FREQ, CLOCK_NOTIFY_POST,
¬ify_data);
+ break;
default:
break;
--
2.14.1
1
0