[Sound-open-firmware] [PATCH v2] volume: support 8-channel feature
add the 8-channel feature for the capture function for apl-gpmrb platform.
Signed-off-by: Wu Zhigang zhigang.wu@linux.intel.com Reviewed-by: Keyon Jie yang.jie@linux.intel.com
--- v2: rebase the code and make patch.
--- test with: apl-gpmrb with tdf8532 codec
linux topic/sof-dev: commit 0d51a5ed28c5 sof master: commit 48d2a1c551d7 soft master: commit 2cc3ad2a9287
pass the regression test already: Minnowboard Turbo UP^2 CNL-RVP --- src/audio/volume_generic.c | 363 +++++++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+)
diff --git a/src/audio/volume_generic.c b/src/audio/volume_generic.c index 200cf12..fa23240 100644 --- a/src/audio/volume_generic.c +++ b/src/audio/volume_generic.c @@ -466,6 +466,358 @@ static void vol_s24_to_s24_4ch(struct comp_dev *dev, struct comp_buffer *sink, } }
+#ifdef CONFIG_APOLLOLAKE +/* volume scaling functions for 8-channel input */ + +/* copy and scale volume from 16 bit source buffer to 32 bit dest buffer */ +static void vol_s16_to_s32_8ch(struct comp_dev *dev, struct comp_buffer *sink, + struct comp_buffer *source) +{ + struct comp_data *cd = comp_get_drvdata(dev); + int16_t *src = (int16_t *)source->r_ptr; + int32_t *dest = (int32_t *)sink->w_ptr; + int32_t i; + + /* buffer sizes are always divisible by period frames */ + /* Samples are Q1.15 --> Q1.31 and volume is Q1.16 */ + for (i = 0; i < dev->frames * 8; i += 8) { + dest[i] = (int32_t)src[i] * cd->volume[0]; + dest[i + 1] = (int32_t)src[i + 1] * cd->volume[1]; + dest[i + 2] = (int32_t)src[i + 2] * cd->volume[2]; + dest[i + 3] = (int32_t)src[i + 3] * cd->volume[3]; + dest[i + 4] = (int32_t)src[i + 4] * cd->volume[4]; + dest[i + 5] = (int32_t)src[i + 5] * cd->volume[5]; + dest[i + 6] = (int32_t)src[i + 6] * cd->volume[6]; + dest[i + 7] = (int32_t)src[i + 7] * cd->volume[7]; + } +} + +/* copy and scale volume from 32 bit source buffer to 16 bit dest buffer */ +static void vol_s32_to_s16_8ch(struct comp_dev *dev, struct comp_buffer *sink, + struct comp_buffer *source) +{ + struct comp_data *cd = comp_get_drvdata(dev); + int32_t *src = (int32_t *)source->r_ptr; + int16_t *dest = (int16_t *)sink->w_ptr; + int32_t i; + + /* buffer sizes are always divisible by period frames */ + /* Samples are Q1.31 --> Q1.15 and volume is Q1.16 */ + for (i = 0; i < dev->frames * 8; i += 8) { + dest[i] = (int16_t)q_multsr_sat_32x32(src[i], cd->volume[0], + Q_SHIFT_BITS_64(31, 16, + 15)); + dest[i + 1] = (int16_t)q_multsr_sat_32x32(src[i + 1], + cd->volume[1], + Q_SHIFT_BITS_64(31, + 16, + 15)); + dest[i + 2] = (int16_t)q_multsr_sat_32x32(src[i + 2], + cd->volume[2], + Q_SHIFT_BITS_64(31, + 16, + 15)); + dest[i + 3] = (int16_t)q_multsr_sat_32x32(src[i + 3], + cd->volume[3], + Q_SHIFT_BITS_64(31, + 16, + 15)); + dest[i + 4] = (int16_t)q_multsr_sat_32x32(src[i + 4], + cd->volume[4], + Q_SHIFT_BITS_64(31, + 16, + 15)); + dest[i + 5] = (int16_t)q_multsr_sat_32x32(src[i + 5], + cd->volume[5], + Q_SHIFT_BITS_64(31, + 16, + 15)); + dest[i + 6] = (int16_t)q_multsr_sat_32x32(src[i + 6], + cd->volume[6], + Q_SHIFT_BITS_64(31, + 16, + 15)); + dest[i + 7] = (int16_t)q_multsr_sat_32x32(src[i + 7], + cd->volume[7], + Q_SHIFT_BITS_64(31, + 16, + 15)); + } +} + +/* copy and scale volume from 32 bit source buffer to 32 bit dest buffer */ +static void vol_s32_to_s32_8ch(struct comp_dev *dev, struct comp_buffer *sink, + struct comp_buffer *source) +{ + struct comp_data *cd = comp_get_drvdata(dev); + int32_t *src = (int32_t *)source->r_ptr; + int32_t *dest = (int32_t *)sink->w_ptr; + int32_t i; + + /* buffer sizes are always divisible by period frames */ + /* Samples are Q1.31 --> Q1.31 and volume is Q1.16 */ + for (i = 0; i < dev->frames * 8; i += 8) { + dest[i] = q_multsr_sat_32x32(src[i], cd->volume[0], + Q_SHIFT_BITS_64(31, 16, 31)); + dest[i + 1] = q_multsr_sat_32x32(src[i + 1], cd->volume[1], + Q_SHIFT_BITS_64(31, 16, 31)); + dest[i + 2] = q_multsr_sat_32x32(src[i + 2], cd->volume[2], + Q_SHIFT_BITS_64(31, 16, 31)); + dest[i + 3] = q_multsr_sat_32x32(src[i + 3], cd->volume[3], + Q_SHIFT_BITS_64(31, 16, 31)); + dest[i + 4] = q_multsr_sat_32x32(src[i + 4], cd->volume[4], + Q_SHIFT_BITS_64(31, 16, 31)); + dest[i + 5] = q_multsr_sat_32x32(src[i + 5], cd->volume[5], + Q_SHIFT_BITS_64(31, 16, 31)); + dest[i + 6] = q_multsr_sat_32x32(src[i + 6], cd->volume[6], + Q_SHIFT_BITS_64(31, 16, 31)); + dest[i + 7] = q_multsr_sat_32x32(src[i + 7], cd->volume[7], + Q_SHIFT_BITS_64(31, 16, 31)); + } +} + +/* copy and scale volume from 16 bit source buffer to 16 bit dest buffer */ +static void vol_s16_to_s16_8ch(struct comp_dev *dev, struct comp_buffer *sink, + struct comp_buffer *source) +{ + struct comp_data *cd = comp_get_drvdata(dev); + int16_t *src = (int16_t *)source->r_ptr; + int16_t *dest = (int16_t *)sink->w_ptr; + int32_t i; + + /* buffer sizes are always divisible by period frames */ + /* Samples are Q1.15 --> Q1.15 and volume is Q1.16 */ + for (i = 0; i < dev->frames * 8; i += 8) { + dest[i] = q_multsr_sat_16x16(src[i], cd->volume[0], + Q_SHIFT_BITS_32(15, 16, 15)); + dest[i + 1] = q_multsr_sat_16x16(src[i + 1], cd->volume[1], + Q_SHIFT_BITS_32(15, 16, 15)); + dest[i + 2] = q_multsr_sat_16x16(src[i + 2], cd->volume[2], + Q_SHIFT_BITS_32(15, 16, 15)); + dest[i + 3] = q_multsr_sat_16x16(src[i + 3], cd->volume[3], + Q_SHIFT_BITS_32(15, 16, 15)); + dest[i + 4] = q_multsr_sat_16x16(src[i + 4], cd->volume[4], + Q_SHIFT_BITS_32(15, 16, 15)); + dest[i + 5] = q_multsr_sat_16x16(src[i + 5], cd->volume[5], + Q_SHIFT_BITS_32(15, 16, 15)); + dest[i + 6] = q_multsr_sat_16x16(src[i + 6], cd->volume[6], + Q_SHIFT_BITS_32(15, 16, 15)); + dest[i + 7] = q_multsr_sat_16x16(src[i + 7], cd->volume[7], + Q_SHIFT_BITS_32(15, 16, 15)); + } +} + +/* copy and scale volume from 16 bit source buffer to 24 bit + * on 32 bit boundary buffer + */ +static void vol_s16_to_s24_8ch(struct comp_dev *dev, struct comp_buffer *sink, + struct comp_buffer *source) +{ + struct comp_data *cd = comp_get_drvdata(dev); + int16_t *src = (int16_t *)source->r_ptr; + int32_t *dest = (int32_t *)sink->w_ptr; + int32_t i; + + /* buffer sizes are always divisible by period frames */ + /* Samples are Q1.15 and volume is Q1.16 */ + for (i = 0; i < dev->frames * 8; i += 8) { + dest[i] = q_multsr_sat_32x32(src[i], cd->volume[0], + Q_SHIFT_BITS_64(15, 16, 23)); + dest[i + 1] = q_multsr_sat_32x32(src[i + 1], cd->volume[1], + Q_SHIFT_BITS_64(15, 16, 23)); + dest[i + 2] = q_multsr_sat_32x32(src[i + 2], cd->volume[2], + Q_SHIFT_BITS_64(15, 16, 23)); + dest[i + 3] = q_multsr_sat_32x32(src[i + 3], cd->volume[3], + Q_SHIFT_BITS_64(15, 16, 23)); + dest[i + 4] = q_multsr_sat_32x32(src[i + 4], cd->volume[4], + Q_SHIFT_BITS_64(15, 16, 23)); + dest[i + 5] = q_multsr_sat_32x32(src[i + 5], cd->volume[5], + Q_SHIFT_BITS_64(15, 16, 23)); + dest[i + 6] = q_multsr_sat_32x32(src[i + 6], cd->volume[6], + Q_SHIFT_BITS_64(15, 16, 23)); + dest[i + 7] = q_multsr_sat_32x32(src[i + 7], cd->volume[7], + Q_SHIFT_BITS_64(15, 16, 23)); + } +} + +/* copy and scale volume from 16 bit source buffer to 24 bit + * on 32 bit boundary dest buffer + */ +static void vol_s24_to_s16_8ch(struct comp_dev *dev, struct comp_buffer *sink, + struct comp_buffer *source) +{ + struct comp_data *cd = comp_get_drvdata(dev); + int32_t *src = (int32_t *)source->r_ptr; + int16_t *dest = (int16_t *)sink->w_ptr; + int32_t i, sample; + + /* buffer sizes are always divisible by period frames */ + /* Samples are Q1.23 --> Q1.15 and volume is Q1.16 */ + for (i = 0; i < dev->frames * 8; i += 8) { + sample = sign_extend_s24(src[i]); + dest[i] = (int16_t)q_multsr_sat_32x32(sample, cd->volume[0], + Q_SHIFT_BITS_64(23, 16, + 15)); + sample = sign_extend_s24(src[i + 1]); + dest[i + 1] = (int16_t)q_multsr_sat_32x32(sample, + cd->volume[1], + Q_SHIFT_BITS_64(23, + 16, + 15)); + sample = sign_extend_s24(src[i + 2]); + dest[i + 2] = (int16_t)q_multsr_sat_32x32(sample, + cd->volume[2], + Q_SHIFT_BITS_64(23, + 16, + 15)); + sample = sign_extend_s24(src[i + 3]); + dest[i + 3] = (int16_t)q_multsr_sat_32x32(sample, + cd->volume[3], + Q_SHIFT_BITS_64(23, + 16, + 15)); + sample = sign_extend_s24(src[i + 4]); + dest[i + 4] = (int16_t)q_multsr_sat_32x32(sample, + cd->volume[4], + Q_SHIFT_BITS_64(23, + 16, + 15)); + sample = sign_extend_s24(src[i + 5]); + dest[i + 5] = (int16_t)q_multsr_sat_32x32(sample, + cd->volume[5], + Q_SHIFT_BITS_64(23, + 16, + 15)); + sample = sign_extend_s24(src[i + 6]); + dest[i + 6] = (int16_t)q_multsr_sat_32x32(sample, + cd->volume[6], + Q_SHIFT_BITS_64(23, + 16, + 15)); + sample = sign_extend_s24(src[i + 7]); + dest[i + 7] = (int16_t)q_multsr_sat_32x32(sample, + cd->volume[7], + Q_SHIFT_BITS_64(23, + 16, + 15)); + } +} + +/* copy and scale volume from 32 bit source buffer to 24 bit + * on 32 bit boundary dest buffer + */ +static void vol_s32_to_s24_8ch(struct comp_dev *dev, struct comp_buffer *sink, + struct comp_buffer *source) +{ + struct comp_data *cd = comp_get_drvdata(dev); + int32_t *src = (int32_t *)source->r_ptr; + int32_t *dest = (int32_t *)sink->w_ptr; + int32_t i; + + /* buffer sizes are always divisible by period frames */ + /* Samples are Q1.31 --> Q1.23 and volume is Q1.16 */ + for (i = 0; i < dev->frames * 8; i += 8) { + dest[i] = q_multsr_sat_32x32(src[i], cd->volume[0], + Q_SHIFT_BITS_64(31, 16, 23)); + dest[i + 1] = q_multsr_sat_32x32(src[i + 1], cd->volume[1], + Q_SHIFT_BITS_64(31, 16, 23)); + dest[i + 2] = q_multsr_sat_32x32(src[i + 2], cd->volume[2], + Q_SHIFT_BITS_64(31, 16, 23)); + dest[i + 3] = q_multsr_sat_32x32(src[i + 3], cd->volume[3], + Q_SHIFT_BITS_64(31, 16, 23)); + dest[i + 4] = q_multsr_sat_32x32(src[i + 4], cd->volume[4], + Q_SHIFT_BITS_64(31, 16, 23)); + dest[i + 5] = q_multsr_sat_32x32(src[i + 5], cd->volume[5], + Q_SHIFT_BITS_64(31, 16, 23)); + dest[i + 6] = q_multsr_sat_32x32(src[i + 6], cd->volume[6], + Q_SHIFT_BITS_64(31, 16, 23)); + dest[i + 7] = q_multsr_sat_32x32(src[i + 7], cd->volume[7], + Q_SHIFT_BITS_64(31, 16, 23)); + } +} + +/* copy and scale volume from 16 bit source buffer to 24 bit + * on 32 bit boundary dest buffer + */ +static void vol_s24_to_s32_8ch(struct comp_dev *dev, struct comp_buffer *sink, + struct comp_buffer *source) +{ + struct comp_data *cd = comp_get_drvdata(dev); + int32_t *src = (int32_t *)source->r_ptr; + int32_t *dest = (int32_t *)sink->w_ptr; + int32_t i; + + /* buffer sizes are always divisible by period frames */ + /* Samples are Q1.23 --> Q1.31 and volume is Q1.16 */ + for (i = 0; i < dev->frames * 8; i += 8) { + dest[i] = q_multsr_sat_32x32(sign_extend_s24(src[i]), + cd->volume[0], + Q_SHIFT_BITS_64(23, 16, 31)); + dest[i + 1] = q_multsr_sat_32x32(sign_extend_s24(src[i + 1]), + cd->volume[1], + Q_SHIFT_BITS_64(23, 16, 31)); + dest[i + 2] = q_multsr_sat_32x32(sign_extend_s24(src[i + 2]), + cd->volume[2], + Q_SHIFT_BITS_64(23, 16, 31)); + dest[i + 3] = q_multsr_sat_32x32(sign_extend_s24(src[i + 3]), + cd->volume[3], + Q_SHIFT_BITS_64(23, 16, 31)); + dest[i + 4] = q_multsr_sat_32x32(sign_extend_s24(src[i + 4]), + cd->volume[4], + Q_SHIFT_BITS_64(23, 16, 31)); + dest[i + 5] = q_multsr_sat_32x32(sign_extend_s24(src[i + 5]), + cd->volume[5], + Q_SHIFT_BITS_64(23, 16, 31)); + dest[i + 6] = q_multsr_sat_32x32(sign_extend_s24(src[i + 6]), + cd->volume[6], + Q_SHIFT_BITS_64(23, 16, 31)); + dest[i + 7] = q_multsr_sat_32x32(sign_extend_s24(src[i + 7]), + cd->volume[7], + Q_SHIFT_BITS_64(23, 16, 31)); + } +} + +/* Copy and scale volume from 24 bit source buffer to 24 bit on 32 bit boundary + * dest buffer. + */ +static void vol_s24_to_s24_8ch(struct comp_dev *dev, struct comp_buffer *sink, + struct comp_buffer *source) +{ + struct comp_data *cd = comp_get_drvdata(dev); + int32_t i, *src = (int32_t *)source->r_ptr; + int32_t *dest = (int32_t *)sink->w_ptr; + + /* buffer sizes are always divisible by period frames */ + /* Samples are Q1.23 --> Q1.23 and volume is Q1.16 */ + for (i = 0; i < dev->frames * 8; i += 8) { + dest[i] = q_multsr_sat_32x32(sign_extend_s24(src[i]), + cd->volume[0], + Q_SHIFT_BITS_64(23, 16, 23)); + dest[i + 1] = q_multsr_sat_32x32(sign_extend_s24(src[i + 1]), + cd->volume[1], + Q_SHIFT_BITS_64(23, 16, 23)); + dest[i + 2] = q_multsr_sat_32x32(sign_extend_s24(src[i + 2]), + cd->volume[2], + Q_SHIFT_BITS_64(23, 16, 23)); + dest[i + 3] = q_multsr_sat_32x32(sign_extend_s24(src[i + 3]), + cd->volume[3], + Q_SHIFT_BITS_64(23, 16, 23)); + dest[i + 4] = q_multsr_sat_32x32(sign_extend_s24(src[i + 4]), + cd->volume[4], + Q_SHIFT_BITS_64(23, 16, 23)); + dest[i + 5] = q_multsr_sat_32x32(sign_extend_s24(src[i + 5]), + cd->volume[5], + Q_SHIFT_BITS_64(23, 16, 23)); + dest[i + 6] = q_multsr_sat_32x32(sign_extend_s24(src[i + 6]), + cd->volume[6], + Q_SHIFT_BITS_64(23, 16, 23)); + dest[i + 7] = q_multsr_sat_32x32(sign_extend_s24(src[i + 7]), + cd->volume[7], + Q_SHIFT_BITS_64(23, 16, 23)); + } +} +#endif + const struct comp_func_map func_map[] = { {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S16_LE, 2, vol_s16_to_s16_2ch}, {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S32_LE, 2, vol_s16_to_s32_2ch}, @@ -485,6 +837,17 @@ const struct comp_func_map func_map[] = { {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S24_4LE, 4, vol_s32_to_s24_4ch}, {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S32_LE, 4, vol_s24_to_s32_4ch}, {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S24_4LE, 4, vol_s24_to_s24_4ch}, +#ifdef CONFIG_APOLLOLAKE + {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S16_LE, 8, vol_s16_to_s16_8ch}, + {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S32_LE, 8, vol_s16_to_s32_8ch}, + {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S16_LE, 8, vol_s32_to_s16_8ch}, + {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S32_LE, 8, vol_s32_to_s32_8ch}, + {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s16_to_s24_8ch}, + {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S16_LE, 8, vol_s24_to_s16_8ch}, + {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s32_to_s24_8ch}, + {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S32_LE, 8, vol_s24_to_s32_8ch}, + {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s24_to_s24_8ch}, +#endif };
scale_vol vol_get_processing_function(struct comp_dev *dev)
On 19.06.2018 09:10, Wu Zhigang wrote:
const struct comp_func_map func_map[] = { {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S16_LE, 2, vol_s16_to_s16_2ch}, {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S32_LE, 2, vol_s16_to_s32_2ch}, @@ -485,6 +837,17 @@ const struct comp_func_map func_map[] = { {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S24_4LE, 4, vol_s32_to_s24_4ch}, {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S32_LE, 4, vol_s24_to_s32_4ch}, {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S24_4LE, 4, vol_s24_to_s24_4ch}, +#ifdef CONFIG_APOLLOLAKE
- {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S16_LE, 8, vol_s16_to_s16_8ch},
- {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S32_LE, 8, vol_s16_to_s32_8ch},
- {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S16_LE, 8, vol_s32_to_s16_8ch},
- {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S32_LE, 8, vol_s32_to_s32_8ch},
- {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s16_to_s24_8ch},
- {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S16_LE, 8, vol_s24_to_s16_8ch},
- {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s32_to_s24_8ch},
- {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S32_LE, 8, vol_s24_to_s32_8ch},
- {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s24_to_s24_8ch},
The code size gets quite big with all the channel count versions of generic C volume control. This is probably OK for now since it's for larger APL DSP but all volume variants should be possible to handle efficiently with 1/2/4 dividable channels count volume handlers versions and with Intrinsics code maybe just 1/2. There will be in future also float data so this approach is suffering from combinatory explosion.
Thanks, Seppo
On 2018年06月19日 15:44, Seppo Ingalsuo wrote:
On 19.06.2018 09:10, Wu Zhigang wrote:
const struct comp_func_map func_map[] = { {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S16_LE, 2, vol_s16_to_s16_2ch}, {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S32_LE, 2, vol_s16_to_s32_2ch}, @@ -485,6 +837,17 @@ const struct comp_func_map func_map[] = { {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S24_4LE, 4, vol_s32_to_s24_4ch}, {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S32_LE, 4, vol_s24_to_s32_4ch}, {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S24_4LE, 4, vol_s24_to_s24_4ch}, +#ifdef CONFIG_APOLLOLAKE + {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S16_LE, 8, vol_s16_to_s16_8ch}, + {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S32_LE, 8, vol_s16_to_s32_8ch}, + {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S16_LE, 8, vol_s32_to_s16_8ch}, + {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S32_LE, 8, vol_s32_to_s32_8ch}, + {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s16_to_s24_8ch}, + {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S16_LE, 8, vol_s24_to_s16_8ch}, + {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s32_to_s24_8ch}, + {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S32_LE, 8, vol_s24_to_s32_8ch}, + {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s24_to_s24_8ch},
The code size gets quite big with all the channel count versions of generic C volume control. This is probably OK for now since it's for larger APL DSP but all volume variants should be possible to handle efficiently with 1/2/4 dividable channels count volume handlers versions and with Intrinsics code maybe just 1/2. There will be in future also float data so this approach is suffering from combinatory explosion.
Thanks, Seppo
I definitely agree with you. Intrinsic instruction is good for the performance, but it is bad for the portable. If we do not care about the portable, we also could try the compiler option. it is also another choice. we have to do some experiments in future to balance the code size and performance. thanks ~zhigang
Sound-open-firmware mailing list Sound-open-firmware@alsa-project.org http://mailman.alsa-project.org/mailman/listinfo/sound-open-firmware
On Tue, 2018-06-19 at 14:10 +0800, Wu Zhigang wrote:
add the 8-channel feature for the capture function for apl-gpmrb platform.
Signed-off-by: Wu Zhigang zhigang.wu@linux.intel.com Reviewed-by: Keyon Jie yang.jie@linux.intel.com
v2: rebase the code and make patch.
test with: apl-gpmrb with tdf8532 codec
linux topic/sof-dev: commit 0d51a5ed28c5 sof master: commit 48d2a1c551d7 soft master: commit 2cc3ad2a9287
pass the regression test already: Minnowboard Turbo UP^2 CNL-RVP
src/audio/volume_generic.c | 363 +++++++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+)
diff --git a/src/audio/volume_generic.c b/src/audio/volume_generic.c index 200cf12..fa23240 100644 --- a/src/audio/volume_generic.c +++ b/src/audio/volume_generic.c @@ -466,6 +466,358 @@ static void vol_s24_to_s24_4ch(struct comp_dev *dev, struct comp_buffer *sink, } }
+#ifdef CONFIG_APOLLOLAKE
Please make this a platform generic macro.
#define PLATFORM_CHANNEL_MAX 8
#if PLATFORM_MAX_CHANNELS >= 8
You can do likewise for 4 channels too.
+/* volume scaling functions for 8-channel input */
+/* copy and scale volume from 16 bit source buffer to 32 bit dest buffer */ +static void vol_s16_to_s32_8ch(struct comp_dev *dev, struct comp_buffer *sink,
struct comp_buffer *source)
+{
- struct comp_data *cd = comp_get_drvdata(dev);
- int16_t *src = (int16_t *)source->r_ptr;
- int32_t *dest = (int32_t *)sink->w_ptr;
- int32_t i;
- /* buffer sizes are always divisible by period frames */
- /* Samples are Q1.15 --> Q1.31 and volume is Q1.16 */
- for (i = 0; i < dev->frames * 8; i += 8) {
dest[i] = (int32_t)src[i] * cd->volume[0];
dest[i + 1] = (int32_t)src[i + 1] * cd->volume[1];
dest[i + 2] = (int32_t)src[i + 2] * cd->volume[2];
dest[i + 3] = (int32_t)src[i + 3] * cd->volume[3];
dest[i + 4] = (int32_t)src[i + 4] * cd->volume[4];
dest[i + 5] = (int32_t)src[i + 5] * cd->volume[5];
dest[i + 6] = (int32_t)src[i + 6] * cd->volume[6];
dest[i + 7] = (int32_t)src[i + 7] * cd->volume[7];
- }
+}
+/* copy and scale volume from 32 bit source buffer to 16 bit dest buffer */ +static void vol_s32_to_s16_8ch(struct comp_dev *dev, struct comp_buffer *sink,
struct comp_buffer *source)
+{
- struct comp_data *cd = comp_get_drvdata(dev);
- int32_t *src = (int32_t *)source->r_ptr;
- int16_t *dest = (int16_t *)sink->w_ptr;
- int32_t i;
- /* buffer sizes are always divisible by period frames */
- /* Samples are Q1.31 --> Q1.15 and volume is Q1.16 */
- for (i = 0; i < dev->frames * 8; i += 8) {
dest[i] = (int16_t)q_multsr_sat_32x32(src[i], cd->volume[0],
Q_SHIFT_BITS_64(31, 16,
15));
dest[i + 1] = (int16_t)q_multsr_sat_32x32(src[i + 1],
cd->volume[1],
Q_SHIFT_BITS_64(31,
16,
15)
);
dest[i + 2] = (int16_t)q_multsr_sat_32x32(src[i + 2],
cd->volume[2],
Q_SHIFT_BITS_64(31,
16,
15)
);
dest[i + 3] = (int16_t)q_multsr_sat_32x32(src[i + 3],
cd->volume[3],
Q_SHIFT_BITS_64(31,
16,
15)
);
dest[i + 4] = (int16_t)q_multsr_sat_32x32(src[i + 4],
cd->volume[4],
Q_SHIFT_BITS_64(31,
16,
15)
);
dest[i + 5] = (int16_t)q_multsr_sat_32x32(src[i + 5],
cd->volume[5],
Q_SHIFT_BITS_64(31,
16,
15)
);
dest[i + 6] = (int16_t)q_multsr_sat_32x32(src[i + 6],
cd->volume[6],
Q_SHIFT_BITS_64(31,
16,
15)
);
dest[i + 7] = (int16_t)q_multsr_sat_32x32(src[i + 7],
cd->volume[7],
Q_SHIFT_BITS_64(31,
16,
15)
);
- }
+}
Doing this is fine on smaller less complex conversions that will easily vectorise, but this is doing a multiply, two shifts and an addition for each channel. Lets revisit the compiler output here after 1.2 and see what the output is like on xtensa and IA (where AVX2 can cope with 8*32 bit SIMD).
+/* copy and scale volume from 32 bit source buffer to 32 bit dest buffer */ +static void vol_s32_to_s32_8ch(struct comp_dev *dev, struct comp_buffer *sink,
struct comp_buffer *source)
+{
- struct comp_data *cd = comp_get_drvdata(dev);
- int32_t *src = (int32_t *)source->r_ptr;
- int32_t *dest = (int32_t *)sink->w_ptr;
- int32_t i;
- /* buffer sizes are always divisible by period frames */
- /* Samples are Q1.31 --> Q1.31 and volume is Q1.16 */
- for (i = 0; i < dev->frames * 8; i += 8) {
dest[i] = q_multsr_sat_32x32(src[i], cd->volume[0],
Q_SHIFT_BITS_64(31, 16, 31));
dest[i + 1] = q_multsr_sat_32x32(src[i + 1], cd->volume[1],
Q_SHIFT_BITS_64(31, 16,
31));
dest[i + 2] = q_multsr_sat_32x32(src[i + 2], cd->volume[2],
Q_SHIFT_BITS_64(31, 16,
31));
dest[i + 3] = q_multsr_sat_32x32(src[i + 3], cd->volume[3],
Q_SHIFT_BITS_64(31, 16,
31));
dest[i + 4] = q_multsr_sat_32x32(src[i + 4], cd->volume[4],
Q_SHIFT_BITS_64(31, 16,
31));
dest[i + 5] = q_multsr_sat_32x32(src[i + 5], cd->volume[5],
Q_SHIFT_BITS_64(31, 16,
31));
dest[i + 6] = q_multsr_sat_32x32(src[i + 6], cd->volume[6],
Q_SHIFT_BITS_64(31, 16,
31));
dest[i + 7] = q_multsr_sat_32x32(src[i + 7], cd->volume[7],
Q_SHIFT_BITS_64(31, 16,
31));
- }
+}
+/* copy and scale volume from 16 bit source buffer to 16 bit dest buffer */ +static void vol_s16_to_s16_8ch(struct comp_dev *dev, struct comp_buffer *sink,
struct comp_buffer *source)
+{
- struct comp_data *cd = comp_get_drvdata(dev);
- int16_t *src = (int16_t *)source->r_ptr;
- int16_t *dest = (int16_t *)sink->w_ptr;
- int32_t i;
- /* buffer sizes are always divisible by period frames */
- /* Samples are Q1.15 --> Q1.15 and volume is Q1.16 */
- for (i = 0; i < dev->frames * 8; i += 8) {
dest[i] = q_multsr_sat_16x16(src[i], cd->volume[0],
Q_SHIFT_BITS_32(15, 16, 15));
dest[i + 1] = q_multsr_sat_16x16(src[i + 1], cd->volume[1],
Q_SHIFT_BITS_32(15, 16,
15));
dest[i + 2] = q_multsr_sat_16x16(src[i + 2], cd->volume[2],
Q_SHIFT_BITS_32(15, 16,
15));
dest[i + 3] = q_multsr_sat_16x16(src[i + 3], cd->volume[3],
Q_SHIFT_BITS_32(15, 16,
15));
dest[i + 4] = q_multsr_sat_16x16(src[i + 4], cd->volume[4],
Q_SHIFT_BITS_32(15, 16,
15));
dest[i + 5] = q_multsr_sat_16x16(src[i + 5], cd->volume[5],
Q_SHIFT_BITS_32(15, 16,
15));
dest[i + 6] = q_multsr_sat_16x16(src[i + 6], cd->volume[6],
Q_SHIFT_BITS_32(15, 16,
15));
dest[i + 7] = q_multsr_sat_16x16(src[i + 7], cd->volume[7],
Q_SHIFT_BITS_32(15, 16,
15));
- }
+}
+/* copy and scale volume from 16 bit source buffer to 24 bit
- on 32 bit boundary buffer
- */
+static void vol_s16_to_s24_8ch(struct comp_dev *dev, struct comp_buffer *sink,
struct comp_buffer *source)
+{
- struct comp_data *cd = comp_get_drvdata(dev);
- int16_t *src = (int16_t *)source->r_ptr;
- int32_t *dest = (int32_t *)sink->w_ptr;
- int32_t i;
- /* buffer sizes are always divisible by period frames */
- /* Samples are Q1.15 and volume is Q1.16 */
- for (i = 0; i < dev->frames * 8; i += 8) {
dest[i] = q_multsr_sat_32x32(src[i], cd->volume[0],
Q_SHIFT_BITS_64(15, 16, 23));
dest[i + 1] = q_multsr_sat_32x32(src[i + 1], cd->volume[1],
Q_SHIFT_BITS_64(15, 16,
23));
dest[i + 2] = q_multsr_sat_32x32(src[i + 2], cd->volume[2],
Q_SHIFT_BITS_64(15, 16,
23));
dest[i + 3] = q_multsr_sat_32x32(src[i + 3], cd->volume[3],
Q_SHIFT_BITS_64(15, 16,
23));
dest[i + 4] = q_multsr_sat_32x32(src[i + 4], cd->volume[4],
Q_SHIFT_BITS_64(15, 16,
23));
dest[i + 5] = q_multsr_sat_32x32(src[i + 5], cd->volume[5],
Q_SHIFT_BITS_64(15, 16,
23));
dest[i + 6] = q_multsr_sat_32x32(src[i + 6], cd->volume[6],
Q_SHIFT_BITS_64(15, 16,
23));
dest[i + 7] = q_multsr_sat_32x32(src[i + 7], cd->volume[7],
Q_SHIFT_BITS_64(15, 16,
23));
- }
+}
+/* copy and scale volume from 16 bit source buffer to 24 bit
- on 32 bit boundary dest buffer
- */
+static void vol_s24_to_s16_8ch(struct comp_dev *dev, struct comp_buffer *sink,
struct comp_buffer *source)
+{
- struct comp_data *cd = comp_get_drvdata(dev);
- int32_t *src = (int32_t *)source->r_ptr;
- int16_t *dest = (int16_t *)sink->w_ptr;
- int32_t i, sample;
- /* buffer sizes are always divisible by period frames */
- /* Samples are Q1.23 --> Q1.15 and volume is Q1.16 */
- for (i = 0; i < dev->frames * 8; i += 8) {
sample = sign_extend_s24(src[i]);
dest[i] = (int16_t)q_multsr_sat_32x32(sample, cd->volume[0],
Q_SHIFT_BITS_64(23, 16,
15));
sample = sign_extend_s24(src[i + 1]);
dest[i + 1] = (int16_t)q_multsr_sat_32x32(sample,
cd->volume[1],
Q_SHIFT_BITS_64(23,
16,
15)
);
sample = sign_extend_s24(src[i + 2]);
dest[i + 2] = (int16_t)q_multsr_sat_32x32(sample,
cd->volume[2],
Q_SHIFT_BITS_64(23,
16,
15)
);
sample = sign_extend_s24(src[i + 3]);
dest[i + 3] = (int16_t)q_multsr_sat_32x32(sample,
cd->volume[3],
Q_SHIFT_BITS_64(23,
16,
15)
);
sample = sign_extend_s24(src[i + 4]);
dest[i + 4] = (int16_t)q_multsr_sat_32x32(sample,
cd->volume[4],
Q_SHIFT_BITS_64(23,
16,
15)
);
sample = sign_extend_s24(src[i + 5]);
dest[i + 5] = (int16_t)q_multsr_sat_32x32(sample,
cd->volume[5],
Q_SHIFT_BITS_64(23,
16,
15)
);
sample = sign_extend_s24(src[i + 6]);
dest[i + 6] = (int16_t)q_multsr_sat_32x32(sample,
cd->volume[6],
Q_SHIFT_BITS_64(23,
16,
15)
);
sample = sign_extend_s24(src[i + 7]);
dest[i + 7] = (int16_t)q_multsr_sat_32x32(sample,
cd->volume[7],
Q_SHIFT_BITS_64(23,
16,
15)
);
- }
+}
+/* copy and scale volume from 32 bit source buffer to 24 bit
- on 32 bit boundary dest buffer
- */
+static void vol_s32_to_s24_8ch(struct comp_dev *dev, struct comp_buffer *sink,
struct comp_buffer *source)
+{
- struct comp_data *cd = comp_get_drvdata(dev);
- int32_t *src = (int32_t *)source->r_ptr;
- int32_t *dest = (int32_t *)sink->w_ptr;
- int32_t i;
- /* buffer sizes are always divisible by period frames */
- /* Samples are Q1.31 --> Q1.23 and volume is Q1.16 */
- for (i = 0; i < dev->frames * 8; i += 8) {
dest[i] = q_multsr_sat_32x32(src[i], cd->volume[0],
Q_SHIFT_BITS_64(31, 16, 23));
dest[i + 1] = q_multsr_sat_32x32(src[i + 1], cd->volume[1],
Q_SHIFT_BITS_64(31, 16,
23));
dest[i + 2] = q_multsr_sat_32x32(src[i + 2], cd->volume[2],
Q_SHIFT_BITS_64(31, 16,
23));
dest[i + 3] = q_multsr_sat_32x32(src[i + 3], cd->volume[3],
Q_SHIFT_BITS_64(31, 16,
23));
dest[i + 4] = q_multsr_sat_32x32(src[i + 4], cd->volume[4],
Q_SHIFT_BITS_64(31, 16,
23));
dest[i + 5] = q_multsr_sat_32x32(src[i + 5], cd->volume[5],
Q_SHIFT_BITS_64(31, 16,
23));
dest[i + 6] = q_multsr_sat_32x32(src[i + 6], cd->volume[6],
Q_SHIFT_BITS_64(31, 16,
23));
dest[i + 7] = q_multsr_sat_32x32(src[i + 7], cd->volume[7],
Q_SHIFT_BITS_64(31, 16,
23));
- }
+}
+/* copy and scale volume from 16 bit source buffer to 24 bit
- on 32 bit boundary dest buffer
- */
+static void vol_s24_to_s32_8ch(struct comp_dev *dev, struct comp_buffer *sink,
struct comp_buffer *source)
+{
- struct comp_data *cd = comp_get_drvdata(dev);
- int32_t *src = (int32_t *)source->r_ptr;
- int32_t *dest = (int32_t *)sink->w_ptr;
- int32_t i;
- /* buffer sizes are always divisible by period frames */
- /* Samples are Q1.23 --> Q1.31 and volume is Q1.16 */
- for (i = 0; i < dev->frames * 8; i += 8) {
dest[i] = q_multsr_sat_32x32(sign_extend_s24(src[i]),
cd->volume[0],
Q_SHIFT_BITS_64(23, 16, 31));
dest[i + 1] = q_multsr_sat_32x32(sign_extend_s24(src[i + 1]),
cd->volume[1],
Q_SHIFT_BITS_64(23, 16,
31));
dest[i + 2] = q_multsr_sat_32x32(sign_extend_s24(src[i + 2]),
cd->volume[2],
Q_SHIFT_BITS_64(23, 16,
31));
dest[i + 3] = q_multsr_sat_32x32(sign_extend_s24(src[i + 3]),
cd->volume[3],
Q_SHIFT_BITS_64(23, 16,
31));
dest[i + 4] = q_multsr_sat_32x32(sign_extend_s24(src[i + 4]),
cd->volume[4],
Q_SHIFT_BITS_64(23, 16,
31));
dest[i + 5] = q_multsr_sat_32x32(sign_extend_s24(src[i + 5]),
cd->volume[5],
Q_SHIFT_BITS_64(23, 16,
31));
dest[i + 6] = q_multsr_sat_32x32(sign_extend_s24(src[i + 6]),
cd->volume[6],
Q_SHIFT_BITS_64(23, 16,
31));
dest[i + 7] = q_multsr_sat_32x32(sign_extend_s24(src[i + 7]),
cd->volume[7],
Q_SHIFT_BITS_64(23, 16,
31));
- }
+}
+/* Copy and scale volume from 24 bit source buffer to 24 bit on 32 bit boundary
- dest buffer.
- */
+static void vol_s24_to_s24_8ch(struct comp_dev *dev, struct comp_buffer *sink,
struct comp_buffer *source)
+{
- struct comp_data *cd = comp_get_drvdata(dev);
- int32_t i, *src = (int32_t *)source->r_ptr;
- int32_t *dest = (int32_t *)sink->w_ptr;
- /* buffer sizes are always divisible by period frames */
- /* Samples are Q1.23 --> Q1.23 and volume is Q1.16 */
- for (i = 0; i < dev->frames * 8; i += 8) {
dest[i] = q_multsr_sat_32x32(sign_extend_s24(src[i]),
cd->volume[0],
Q_SHIFT_BITS_64(23, 16, 23));
dest[i + 1] = q_multsr_sat_32x32(sign_extend_s24(src[i + 1]),
cd->volume[1],
Q_SHIFT_BITS_64(23, 16,
23));
dest[i + 2] = q_multsr_sat_32x32(sign_extend_s24(src[i + 2]),
cd->volume[2],
Q_SHIFT_BITS_64(23, 16,
23));
dest[i + 3] = q_multsr_sat_32x32(sign_extend_s24(src[i + 3]),
cd->volume[3],
Q_SHIFT_BITS_64(23, 16,
23));
dest[i + 4] = q_multsr_sat_32x32(sign_extend_s24(src[i + 4]),
cd->volume[4],
Q_SHIFT_BITS_64(23, 16,
23));
dest[i + 5] = q_multsr_sat_32x32(sign_extend_s24(src[i + 5]),
cd->volume[5],
Q_SHIFT_BITS_64(23, 16,
23));
dest[i + 6] = q_multsr_sat_32x32(sign_extend_s24(src[i + 6]),
cd->volume[6],
Q_SHIFT_BITS_64(23, 16,
23));
dest[i + 7] = q_multsr_sat_32x32(sign_extend_s24(src[i + 7]),
cd->volume[7],
Q_SHIFT_BITS_64(23, 16,
23));
- }
+} +#endif
const struct comp_func_map func_map[] = { {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S16_LE, 2, vol_s16_to_s16_2ch}, {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S32_LE, 2, vol_s16_to_s32_2ch}, @@ -485,6 +837,17 @@ const struct comp_func_map func_map[] = { {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S24_4LE, 4, vol_s32_to_s24_4ch}, {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S32_LE, 4, vol_s24_to_s32_4ch}, {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S24_4LE, 4, vol_s24_to_s24_4ch}, +#ifdef CONFIG_APOLLOLAKE
- {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S16_LE, 8, vol_s16_to_s16_8ch},
- {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S32_LE, 8, vol_s16_to_s32_8ch},
- {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S16_LE, 8, vol_s32_to_s16_8ch},
- {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S32_LE, 8, vol_s32_to_s32_8ch},
- {SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s16_to_s24_8ch},
- {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S16_LE, 8, vol_s24_to_s16_8ch},
- {SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s32_to_s24_8ch},
- {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S32_LE, 8, vol_s24_to_s32_8ch},
- {SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S24_4LE, 8,
vol_s24_to_s24_8ch}, +#endif };
scale_vol vol_get_processing_function(struct comp_dev *dev)
Liam
participants (4)
-
Liam Girdwood
-
Seppo Ingalsuo
-
Wu Zhigang
-
zhigangw