[Sound-open-firmware] [PATCH v2] volume: support 8-channel feature

Tue Jun 19 12:37:53 CEST 2018

On Tue, 2018-06-19 at 14:10 +0800, Wu Zhigang wrote:
> add the 8-channel feature for the capture function
> for apl-gpmrb platform.
> 
> Signed-off-by: Wu Zhigang <zhigang.wu at linux.intel.com>
> Reviewed-by: Keyon Jie <yang.jie at linux.intel.com>
> 
> ---
> v2: rebase the code and make patch.
> 
> ---
> test with:
> apl-gpmrb with tdf8532 codec
> 
> linux topic/sof-dev: commit 0d51a5ed28c5
> sof master: commit 48d2a1c551d7
> soft master: commit 2cc3ad2a9287
> 
> pass the regression test already:
> Minnowboard Turbo
> UP^2
> CNL-RVP
> ---
>  src/audio/volume_generic.c | 363 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 363 insertions(+)
> 
> diff --git a/src/audio/volume_generic.c b/src/audio/volume_generic.c
> index 200cf12..fa23240 100644
> --- a/src/audio/volume_generic.c
> +++ b/src/audio/volume_generic.c
> @@ -466,6 +466,358 @@ static void vol_s24_to_s24_4ch(struct comp_dev *dev,
> struct comp_buffer *sink,
>  	}
>  }
>  
> +#ifdef CONFIG_APOLLOLAKE

Please make this a platform generic macro.

#define PLATFORM_CHANNEL_MAX	8

#if PLATFORM_MAX_CHANNELS >= 8

You can do likewise for 4 channels too.

> +/* volume scaling functions for 8-channel input */
> +
> +/* copy and scale volume from 16 bit source buffer to 32 bit dest buffer */
> +static void vol_s16_to_s32_8ch(struct comp_dev *dev, struct comp_buffer
> *sink,
> +			       struct comp_buffer *source)
> +{
> +	struct comp_data *cd = comp_get_drvdata(dev);
> +	int16_t *src = (int16_t *)source->r_ptr;
> +	int32_t *dest = (int32_t *)sink->w_ptr;
> +	int32_t i;
> +
> +	/* buffer sizes are always divisible by period frames */
> +	/* Samples are Q1.15 --> Q1.31 and volume is Q1.16 */
> +	for (i = 0; i < dev->frames * 8; i += 8) {
> +		dest[i] = (int32_t)src[i] * cd->volume[0];
> +		dest[i + 1] = (int32_t)src[i + 1] * cd->volume[1];
> +		dest[i + 2] = (int32_t)src[i + 2] * cd->volume[2];
> +		dest[i + 3] = (int32_t)src[i + 3] * cd->volume[3];
> +		dest[i + 4] = (int32_t)src[i + 4] * cd->volume[4];
> +		dest[i + 5] = (int32_t)src[i + 5] * cd->volume[5];
> +		dest[i + 6] = (int32_t)src[i + 6] * cd->volume[6];
> +		dest[i + 7] = (int32_t)src[i + 7] * cd->volume[7];
> +	}
> +}
> +
> +/* copy and scale volume from 32 bit source buffer to 16 bit dest buffer */
> +static void vol_s32_to_s16_8ch(struct comp_dev *dev, struct comp_buffer
> *sink,
> +			       struct comp_buffer *source)
> +{
> +	struct comp_data *cd = comp_get_drvdata(dev);
> +	int32_t *src = (int32_t *)source->r_ptr;
> +	int16_t *dest = (int16_t *)sink->w_ptr;
> +	int32_t i;
> +
> +	/* buffer sizes are always divisible by period frames */
> +	/* Samples are Q1.31 --> Q1.15 and volume is Q1.16 */
> +	for (i = 0; i < dev->frames * 8; i += 8) {
> +		dest[i] = (int16_t)q_multsr_sat_32x32(src[i], cd->volume[0],
> +						      Q_SHIFT_BITS_64(31, 16,
> +								      15));
> +		dest[i + 1] = (int16_t)q_multsr_sat_32x32(src[i + 1],
> +							  cd->volume[1],
> +							  Q_SHIFT_BITS_64(31,
> +									  16,
> +									  15)
> );
> +		dest[i + 2] = (int16_t)q_multsr_sat_32x32(src[i + 2],
> +							  cd->volume[2],
> +							  Q_SHIFT_BITS_64(31,
> +									  16,
> +									  15)
> );
> +		dest[i + 3] = (int16_t)q_multsr_sat_32x32(src[i + 3],
> +							  cd->volume[3],
> +							  Q_SHIFT_BITS_64(31,
> +									  16,
> +									  15)
> );
> +		dest[i + 4] = (int16_t)q_multsr_sat_32x32(src[i + 4],
> +							  cd->volume[4],
> +							  Q_SHIFT_BITS_64(31,
> +									  16,
> +									  15)
> );
> +		dest[i + 5] = (int16_t)q_multsr_sat_32x32(src[i + 5],
> +							  cd->volume[5],
> +							  Q_SHIFT_BITS_64(31,
> +									  16,
> +									  15)
> );
> +		dest[i + 6] = (int16_t)q_multsr_sat_32x32(src[i + 6],
> +							  cd->volume[6],
> +							  Q_SHIFT_BITS_64(31,
> +									  16,
> +									  15)
> );
> +		dest[i + 7] = (int16_t)q_multsr_sat_32x32(src[i + 7],
> +							  cd->volume[7],
> +							  Q_SHIFT_BITS_64(31,
> +									  16,
> +									  15)
> );
> +	}
> +}
> +

Doing this is fine on smaller less complex conversions that will easily
vectorise, but this is doing a multiply, two shifts and an addition for each
channel.  Lets revisit the compiler output here after 1.2 and see what the
output is like on xtensa and IA (where AVX2 can cope with 8*32 bit SIMD). 

> +/* copy and scale volume from 32 bit source buffer to 32 bit dest buffer */
> +static void vol_s32_to_s32_8ch(struct comp_dev *dev, struct comp_buffer
> *sink,
> +			       struct comp_buffer *source)
> +{
> +	struct comp_data *cd = comp_get_drvdata(dev);
> +	int32_t *src = (int32_t *)source->r_ptr;
> +	int32_t *dest = (int32_t *)sink->w_ptr;
> +	int32_t i;
> +
> +	/* buffer sizes are always divisible by period frames */
> +	/* Samples are Q1.31 --> Q1.31 and volume is Q1.16 */
> +	for (i = 0; i < dev->frames * 8; i += 8) {
> +		dest[i] = q_multsr_sat_32x32(src[i], cd->volume[0],
> +					     Q_SHIFT_BITS_64(31, 16, 31));
> +		dest[i + 1] = q_multsr_sat_32x32(src[i + 1], cd->volume[1],
> +						 Q_SHIFT_BITS_64(31, 16,
> 31));
> +		dest[i + 2] = q_multsr_sat_32x32(src[i + 2], cd->volume[2],
> +						 Q_SHIFT_BITS_64(31, 16,
> 31));
> +		dest[i + 3] = q_multsr_sat_32x32(src[i + 3], cd->volume[3],
> +						 Q_SHIFT_BITS_64(31, 16,
> 31));
> +		dest[i + 4] = q_multsr_sat_32x32(src[i + 4], cd->volume[4],
> +						 Q_SHIFT_BITS_64(31, 16,
> 31));
> +		dest[i + 5] = q_multsr_sat_32x32(src[i + 5], cd->volume[5],
> +						 Q_SHIFT_BITS_64(31, 16,
> 31));
> +		dest[i + 6] = q_multsr_sat_32x32(src[i + 6], cd->volume[6],
> +						 Q_SHIFT_BITS_64(31, 16,
> 31));
> +		dest[i + 7] = q_multsr_sat_32x32(src[i + 7], cd->volume[7],
> +						 Q_SHIFT_BITS_64(31, 16,
> 31));
> +	}
> +}
> +
> +/* copy and scale volume from 16 bit source buffer to 16 bit dest buffer */
> +static void vol_s16_to_s16_8ch(struct comp_dev *dev, struct comp_buffer
> *sink,
> +			       struct comp_buffer *source)
> +{
> +	struct comp_data *cd = comp_get_drvdata(dev);
> +	int16_t *src = (int16_t *)source->r_ptr;
> +	int16_t *dest = (int16_t *)sink->w_ptr;
> +	int32_t i;
> +
> +	/* buffer sizes are always divisible by period frames */
> +	/* Samples are Q1.15 --> Q1.15 and volume is Q1.16 */
> +	for (i = 0; i < dev->frames * 8; i += 8) {
> +		dest[i] = q_multsr_sat_16x16(src[i], cd->volume[0],
> +					     Q_SHIFT_BITS_32(15, 16, 15));
> +		dest[i + 1] = q_multsr_sat_16x16(src[i + 1], cd->volume[1],
> +						 Q_SHIFT_BITS_32(15, 16,
> 15));
> +		dest[i + 2] = q_multsr_sat_16x16(src[i + 2], cd->volume[2],
> +						 Q_SHIFT_BITS_32(15, 16,
> 15));
> +		dest[i + 3] = q_multsr_sat_16x16(src[i + 3], cd->volume[3],
> +						 Q_SHIFT_BITS_32(15, 16,
> 15));
> +		dest[i + 4] = q_multsr_sat_16x16(src[i + 4], cd->volume[4],
> +						 Q_SHIFT_BITS_32(15, 16,
> 15));
> +		dest[i + 5] = q_multsr_sat_16x16(src[i + 5], cd->volume[5],
> +						 Q_SHIFT_BITS_32(15, 16,
> 15));
> +		dest[i + 6] = q_multsr_sat_16x16(src[i + 6], cd->volume[6],
> +						 Q_SHIFT_BITS_32(15, 16,
> 15));
> +		dest[i + 7] = q_multsr_sat_16x16(src[i + 7], cd->volume[7],
> +						 Q_SHIFT_BITS_32(15, 16,
> 15));
> +	}
> +}
> +
> +/* copy and scale volume from 16 bit source buffer to 24 bit
> + * on 32 bit boundary buffer
> + */
> +static void vol_s16_to_s24_8ch(struct comp_dev *dev, struct comp_buffer
> *sink,
> +			       struct comp_buffer *source)
> +{
> +	struct comp_data *cd = comp_get_drvdata(dev);
> +	int16_t *src = (int16_t *)source->r_ptr;
> +	int32_t *dest = (int32_t *)sink->w_ptr;
> +	int32_t i;
> +
> +	/* buffer sizes are always divisible by period frames */
> +	/* Samples are Q1.15 and volume is Q1.16 */
> +	for (i = 0; i < dev->frames * 8; i += 8) {
> +		dest[i] = q_multsr_sat_32x32(src[i], cd->volume[0],
> +					     Q_SHIFT_BITS_64(15, 16, 23));
> +		dest[i + 1] = q_multsr_sat_32x32(src[i + 1], cd->volume[1],
> +						 Q_SHIFT_BITS_64(15, 16,
> 23));
> +		dest[i + 2] = q_multsr_sat_32x32(src[i + 2], cd->volume[2],
> +						 Q_SHIFT_BITS_64(15, 16,
> 23));
> +		dest[i + 3] = q_multsr_sat_32x32(src[i + 3], cd->volume[3],
> +						 Q_SHIFT_BITS_64(15, 16,
> 23));
> +		dest[i + 4] = q_multsr_sat_32x32(src[i + 4], cd->volume[4],
> +						 Q_SHIFT_BITS_64(15, 16,
> 23));
> +		dest[i + 5] = q_multsr_sat_32x32(src[i + 5], cd->volume[5],
> +						 Q_SHIFT_BITS_64(15, 16,
> 23));
> +		dest[i + 6] = q_multsr_sat_32x32(src[i + 6], cd->volume[6],
> +						 Q_SHIFT_BITS_64(15, 16,
> 23));
> +		dest[i + 7] = q_multsr_sat_32x32(src[i + 7], cd->volume[7],
> +						 Q_SHIFT_BITS_64(15, 16,
> 23));
> +	}
> +}
> +
> +/* copy and scale volume from 16 bit source buffer to 24 bit
> + * on 32 bit boundary dest buffer
> + */
> +static void vol_s24_to_s16_8ch(struct comp_dev *dev, struct comp_buffer
> *sink,
> +			       struct comp_buffer *source)
> +{
> +	struct comp_data *cd = comp_get_drvdata(dev);
> +	int32_t *src = (int32_t *)source->r_ptr;
> +	int16_t *dest = (int16_t *)sink->w_ptr;
> +	int32_t i, sample;
> +
> +	/* buffer sizes are always divisible by period frames */
> +	/* Samples are Q1.23 --> Q1.15 and volume is Q1.16 */
> +	for (i = 0; i < dev->frames * 8; i += 8) {
> +		sample = sign_extend_s24(src[i]);
> +		dest[i] = (int16_t)q_multsr_sat_32x32(sample, cd->volume[0],
> +						      Q_SHIFT_BITS_64(23, 16,
> +								      15));
> +		sample = sign_extend_s24(src[i + 1]);
> +		dest[i + 1] = (int16_t)q_multsr_sat_32x32(sample,
> +							  cd->volume[1],
> +							  Q_SHIFT_BITS_64(23,
> +									  16,
> +									  15)
> );
> +		sample = sign_extend_s24(src[i + 2]);
> +		dest[i + 2] = (int16_t)q_multsr_sat_32x32(sample,
> +							  cd->volume[2],
> +							  Q_SHIFT_BITS_64(23,
> +									  16,
> +									  15)
> );
> +		sample = sign_extend_s24(src[i + 3]);
> +		dest[i + 3] = (int16_t)q_multsr_sat_32x32(sample,
> +							  cd->volume[3],
> +							  Q_SHIFT_BITS_64(23,
> +									  16,
> +									  15)
> );
> +		sample = sign_extend_s24(src[i + 4]);
> +		dest[i + 4] = (int16_t)q_multsr_sat_32x32(sample,
> +							  cd->volume[4],
> +							  Q_SHIFT_BITS_64(23,
> +									  16,
> +									  15)
> );
> +		sample = sign_extend_s24(src[i + 5]);
> +		dest[i + 5] = (int16_t)q_multsr_sat_32x32(sample,
> +							  cd->volume[5],
> +							  Q_SHIFT_BITS_64(23,
> +									  16,
> +									  15)
> );
> +		sample = sign_extend_s24(src[i + 6]);
> +		dest[i + 6] = (int16_t)q_multsr_sat_32x32(sample,
> +							  cd->volume[6],
> +							  Q_SHIFT_BITS_64(23,
> +									  16,
> +									  15)
> );
> +		sample = sign_extend_s24(src[i + 7]);
> +		dest[i + 7] = (int16_t)q_multsr_sat_32x32(sample,
> +							  cd->volume[7],
> +							  Q_SHIFT_BITS_64(23,
> +									  16,
> +									  15)
> );
> +	}
> +}
> +
> +/* copy and scale volume from 32 bit source buffer to 24 bit
> + * on 32 bit boundary dest buffer
> + */
> +static void vol_s32_to_s24_8ch(struct comp_dev *dev, struct comp_buffer
> *sink,
> +			       struct comp_buffer *source)
> +{
> +	struct comp_data *cd = comp_get_drvdata(dev);
> +	int32_t *src = (int32_t *)source->r_ptr;
> +	int32_t *dest = (int32_t *)sink->w_ptr;
> +	int32_t i;
> +
> +	/* buffer sizes are always divisible by period frames */
> +	/* Samples are Q1.31 --> Q1.23 and volume is Q1.16 */
> +	for (i = 0; i < dev->frames * 8; i += 8) {
> +		dest[i] = q_multsr_sat_32x32(src[i], cd->volume[0],
> +					     Q_SHIFT_BITS_64(31, 16, 23));
> +		dest[i + 1] = q_multsr_sat_32x32(src[i + 1], cd->volume[1],
> +						 Q_SHIFT_BITS_64(31, 16,
> 23));
> +		dest[i + 2] = q_multsr_sat_32x32(src[i + 2], cd->volume[2],
> +						 Q_SHIFT_BITS_64(31, 16,
> 23));
> +		dest[i + 3] = q_multsr_sat_32x32(src[i + 3], cd->volume[3],
> +						 Q_SHIFT_BITS_64(31, 16,
> 23));
> +		dest[i + 4] = q_multsr_sat_32x32(src[i + 4], cd->volume[4],
> +						 Q_SHIFT_BITS_64(31, 16,
> 23));
> +		dest[i + 5] = q_multsr_sat_32x32(src[i + 5], cd->volume[5],
> +						 Q_SHIFT_BITS_64(31, 16,
> 23));
> +		dest[i + 6] = q_multsr_sat_32x32(src[i + 6], cd->volume[6],
> +						 Q_SHIFT_BITS_64(31, 16,
> 23));
> +		dest[i + 7] = q_multsr_sat_32x32(src[i + 7], cd->volume[7],
> +						 Q_SHIFT_BITS_64(31, 16,
> 23));
> +	}
> +}
> +
> +/* copy and scale volume from 16 bit source buffer to 24 bit
> + * on 32 bit boundary dest buffer
> + */
> +static void vol_s24_to_s32_8ch(struct comp_dev *dev, struct comp_buffer
> *sink,
> +			       struct comp_buffer *source)
> +{
> +	struct comp_data *cd = comp_get_drvdata(dev);
> +	int32_t *src = (int32_t *)source->r_ptr;
> +	int32_t *dest = (int32_t *)sink->w_ptr;
> +	int32_t i;
> +
> +	/* buffer sizes are always divisible by period frames */
> +	/* Samples are Q1.23 --> Q1.31 and volume is Q1.16 */
> +	for (i = 0; i < dev->frames * 8; i += 8) {
> +		dest[i] = q_multsr_sat_32x32(sign_extend_s24(src[i]),
> +					     cd->volume[0],
> +					     Q_SHIFT_BITS_64(23, 16, 31));
> +		dest[i + 1] = q_multsr_sat_32x32(sign_extend_s24(src[i + 1]),
> +						 cd->volume[1],
> +						 Q_SHIFT_BITS_64(23, 16,
> 31));
> +		dest[i + 2] = q_multsr_sat_32x32(sign_extend_s24(src[i + 2]),
> +						 cd->volume[2],
> +						 Q_SHIFT_BITS_64(23, 16,
> 31));
> +		dest[i + 3] = q_multsr_sat_32x32(sign_extend_s24(src[i + 3]),
> +						 cd->volume[3],
> +						 Q_SHIFT_BITS_64(23, 16,
> 31));
> +		dest[i + 4] = q_multsr_sat_32x32(sign_extend_s24(src[i + 4]),
> +						 cd->volume[4],
> +						 Q_SHIFT_BITS_64(23, 16,
> 31));
> +		dest[i + 5] = q_multsr_sat_32x32(sign_extend_s24(src[i + 5]),
> +						 cd->volume[5],
> +						 Q_SHIFT_BITS_64(23, 16,
> 31));
> +		dest[i + 6] = q_multsr_sat_32x32(sign_extend_s24(src[i + 6]),
> +						 cd->volume[6],
> +						 Q_SHIFT_BITS_64(23, 16,
> 31));
> +		dest[i + 7] = q_multsr_sat_32x32(sign_extend_s24(src[i + 7]),
> +						 cd->volume[7],
> +						 Q_SHIFT_BITS_64(23, 16,
> 31));
> +	}
> +}
> +
> +/* Copy and scale volume from 24 bit source buffer to 24 bit on 32 bit
> boundary
> + * dest buffer.
> + */
> +static void vol_s24_to_s24_8ch(struct comp_dev *dev, struct comp_buffer
> *sink,
> +			       struct comp_buffer *source)
> +{
> +	struct comp_data *cd = comp_get_drvdata(dev);
> +	int32_t i, *src = (int32_t *)source->r_ptr;
> +	int32_t *dest = (int32_t *)sink->w_ptr;
> +
> +	/* buffer sizes are always divisible by period frames */
> +	/* Samples are Q1.23 --> Q1.23 and volume is Q1.16 */
> +	for (i = 0; i < dev->frames * 8; i += 8) {
> +		dest[i] = q_multsr_sat_32x32(sign_extend_s24(src[i]),
> +					     cd->volume[0],
> +					     Q_SHIFT_BITS_64(23, 16, 23));
> +		dest[i + 1] = q_multsr_sat_32x32(sign_extend_s24(src[i + 1]),
> +						 cd->volume[1],
> +						 Q_SHIFT_BITS_64(23, 16,
> 23));
> +		dest[i + 2] = q_multsr_sat_32x32(sign_extend_s24(src[i + 2]),
> +						 cd->volume[2],
> +						 Q_SHIFT_BITS_64(23, 16,
> 23));
> +		dest[i + 3] = q_multsr_sat_32x32(sign_extend_s24(src[i + 3]),
> +						 cd->volume[3],
> +						 Q_SHIFT_BITS_64(23, 16,
> 23));
> +		dest[i + 4] = q_multsr_sat_32x32(sign_extend_s24(src[i + 4]),
> +						 cd->volume[4],
> +						 Q_SHIFT_BITS_64(23, 16,
> 23));
> +		dest[i + 5] = q_multsr_sat_32x32(sign_extend_s24(src[i + 5]),
> +						 cd->volume[5],
> +						 Q_SHIFT_BITS_64(23, 16,
> 23));
> +		dest[i + 6] = q_multsr_sat_32x32(sign_extend_s24(src[i + 6]),
> +						 cd->volume[6],
> +						 Q_SHIFT_BITS_64(23, 16,
> 23));
> +		dest[i + 7] = q_multsr_sat_32x32(sign_extend_s24(src[i + 7]),
> +						 cd->volume[7],
> +						 Q_SHIFT_BITS_64(23, 16,
> 23));
> +	}
> +}
> +#endif
> +
>  const struct comp_func_map func_map[] = {
>  	{SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S16_LE, 2, vol_s16_to_s16_2ch},
>  	{SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S32_LE, 2, vol_s16_to_s32_2ch},
> @@ -485,6 +837,17 @@ const struct comp_func_map func_map[] = {
>  	{SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S24_4LE, 4, vol_s32_to_s24_4ch},
>  	{SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S32_LE, 4, vol_s24_to_s32_4ch},
>  	{SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S24_4LE, 4,
> vol_s24_to_s24_4ch},
> +#ifdef CONFIG_APOLLOLAKE
> +	{SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S16_LE, 8, vol_s16_to_s16_8ch},
> +	{SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S32_LE, 8, vol_s16_to_s32_8ch},
> +	{SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S16_LE, 8, vol_s32_to_s16_8ch},
> +	{SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S32_LE, 8, vol_s32_to_s32_8ch},
> +	{SOF_IPC_FRAME_S16_LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s16_to_s24_8ch},
> +	{SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S16_LE, 8, vol_s24_to_s16_8ch},
> +	{SOF_IPC_FRAME_S32_LE, SOF_IPC_FRAME_S24_4LE, 8, vol_s32_to_s24_8ch},
> +	{SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S32_LE, 8, vol_s24_to_s32_8ch},
> +	{SOF_IPC_FRAME_S24_4LE, SOF_IPC_FRAME_S24_4LE, 8,
> vol_s24_to_s24_8ch},
> +#endif
>  };
>  
>  scale_vol vol_get_processing_function(struct comp_dev *dev)

Liam