diff --git a/src/audio/mixin_mixout/mixin_mixout.c b/src/audio/mixin_mixout/mixin_mixout.c index 9c1abe30386c..576b46bb31f7 100644 --- a/src/audio/mixin_mixout/mixin_mixout.c +++ b/src/audio/mixin_mixout/mixin_mixout.c @@ -75,6 +75,7 @@ struct mixin_sink_config { /* mixin component private data */ struct mixin_data { mix_func mix; + mix_func gain_mix; struct mixin_sink_config sink_config[MIXIN_MAX_SINKS]; }; @@ -205,8 +206,13 @@ static int mix(struct comp_dev *dev, const struct mixin_data *mixin_data, sink_config = &mixin_data->sink_config[sink_index]; - mixin_data->mix(sink, start_sample, mixed_samples, + if (sink_config->gain == IPC4_MIXIN_UNITY_GAIN) { + mixin_data->mix(sink, start_sample, mixed_samples, source, sample_count, sink_config->gain); + } else { + mixin_data->gain_mix(sink, start_sample, mixed_samples, + source, sample_count, sink_config->gain); + } return 0; } @@ -555,6 +561,7 @@ static int mixin_reset(struct processing_module *mod) comp_dbg(dev, "mixin_reset()"); mixin_data->mix = NULL; + mixin_data->gain_mix = NULL; return 0; } @@ -685,15 +692,15 @@ static int mixin_prepare(struct processing_module *mod, case SOF_IPC_FRAME_S16_LE: case SOF_IPC_FRAME_S24_4LE: case SOF_IPC_FRAME_S32_LE: - md->mix = mixin_get_processing_function(fmt); + mixin_get_processing_functions(fmt, &md->mix, &md->gain_mix); break; default: comp_err(dev, "unsupported data format %d", fmt); return -EINVAL; } - if (!md->mix) { - comp_err(dev, "have not found the suitable processing function"); + if (!md->mix || !md->gain_mix) { + comp_err(dev, "have not found suitable processing functions"); return -EINVAL; } diff --git a/src/audio/mixin_mixout/mixin_mixout.h b/src/audio/mixin_mixout/mixin_mixout.h index 085187e0e743..d7519ca80fd8 100644 --- a/src/audio/mixin_mixout/mixin_mixout.h +++ b/src/audio/mixin_mixout/mixin_mixout.h @@ -110,7 +110,8 @@ typedef void (*mix_func)(struct cir_buf_ptr *sink, int32_t start_sample, */ struct mix_func_map { uint16_t frame_fmt; /* frame format */ - mix_func func; /* mixin processing function */ + mix_func mix; /* faster mixing func without gain support */ + mix_func gain_mix; /* slower mixing func with gain support */ }; extern const struct mix_func_map mix_func_map[]; @@ -119,17 +120,23 @@ extern const size_t mix_count; * \brief Retrievies mixin processing function. * \param[in] fmt stream PCM frame format */ -static inline mix_func mixin_get_processing_function(int fmt) +static inline bool mixin_get_processing_functions(int fmt, mix_func *mix, mix_func *gain_mix) { int i; + *mix = NULL; + *gain_mix = NULL; + /* map mixin processing function for source and sink buffers */ for (i = 0; i < mix_count; i++) { - if (fmt == mix_func_map[i].frame_fmt) - return mix_func_map[i].func; + if (fmt == mix_func_map[i].frame_fmt) { + *mix = mix_func_map[i].mix; + *gain_mix = mix_func_map[i].gain_mix; + return true; + } } - return NULL; + return false; } #endif /* __SOF_IPC4_MIXIN_MIXOUT_H__ */ diff --git a/src/audio/mixin_mixout/mixin_mixout_generic.c b/src/audio/mixin_mixout/mixin_mixout_generic.c index 60a9f8211b05..af9dac8394af 100644 --- a/src/audio/mixin_mixout/mixin_mixout_generic.c +++ b/src/audio/mixin_mixout/mixin_mixout_generic.c @@ -23,6 +23,8 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe int16_t *dst = (int16_t *)sink->ptr + start_sample; int16_t *src = source->ptr; + ARG_UNUSED(gain); + assert(mixed_samples >= start_sample); samples_to_mix = mixed_samples - start_sample; samples_to_mix = MIN(samples_to_mix, sample_count); @@ -54,6 +56,54 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe src += n; } } + +static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples, + const struct cir_buf_ptr *source, + int32_t sample_count, uint16_t gain) +{ + int32_t samples_to_mix, samples_to_copy, left_samples; + int32_t n, nmax, i; + + /* cir_buf_wrap() is required and is done below in a loop */ + int16_t *dst = (int16_t *)sink->ptr + start_sample; + int16_t *src = source->ptr; + + assert(mixed_samples >= start_sample); + samples_to_mix = mixed_samples - start_sample; + samples_to_mix = MIN(samples_to_mix, sample_count); + samples_to_copy = sample_count - samples_to_mix; + + for (left_samples = samples_to_mix; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst, sink->buf_start, sink->buf_end); + /* calculate the remaining samples*/ + nmax = (int16_t *)source->buf_end - src; + n = MIN(left_samples, nmax); + nmax = (int16_t *)sink->buf_end - dst; + n = MIN(n, nmax); + for (i = 0; i < n; i++) { + *dst = sat_int16((int32_t)*dst + + q_mults_16x16(*src, gain, IPC4_MIXIN_GAIN_SHIFT)); + src++; + dst++; + } + } + + for (left_samples = samples_to_copy; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst, sink->buf_start, sink->buf_end); + nmax = (int16_t *)source->buf_end - src; + n = MIN(left_samples, nmax); + nmax = (int16_t *)sink->buf_end - dst; + n = MIN(n, nmax); + + for (i = 0; i < n; i++) { + *dst = q_mults_16x16(*src, gain, IPC4_MIXIN_GAIN_SHIFT); + src++; + dst++; + } + } +} #endif /* CONFIG_FORMAT_S16LE */ #if CONFIG_FORMAT_S24LE @@ -67,6 +117,8 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe int32_t *dst = (int32_t *)sink->ptr + start_sample; int32_t *src = source->ptr; + ARG_UNUSED(gain); + assert(mixed_samples >= start_sample); samples_to_mix = mixed_samples - start_sample; samples_to_mix = MIN(samples_to_mix, sample_count); @@ -99,6 +151,52 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe } } +static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples, + const struct cir_buf_ptr *source, + int32_t sample_count, uint16_t gain) +{ + int32_t samples_to_mix, samples_to_copy, left_samples; + int32_t n, nmax, i; + /* cir_buf_wrap() is required and is done below in a loop */ + int32_t *dst = (int32_t *)sink->ptr + start_sample; + int32_t *src = source->ptr; + + assert(mixed_samples >= start_sample); + samples_to_mix = mixed_samples - start_sample; + samples_to_mix = MIN(samples_to_mix, sample_count); + samples_to_copy = sample_count - samples_to_mix; + + for (left_samples = samples_to_mix; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst, sink->buf_start, sink->buf_end); + /* calculate the remaining samples*/ + nmax = (int32_t *)source->buf_end - src; + n = MIN(left_samples, nmax); + nmax = (int32_t *)sink->buf_end - dst; + n = MIN(n, nmax); + for (i = 0; i < n; i++) { + *dst = sat_int24(sign_extend_s24(*dst) + + (int32_t)q_mults_32x32(sign_extend_s24(*src), + gain, IPC4_MIXIN_GAIN_SHIFT)); + src++; + dst++; + } + } + + for (left_samples = samples_to_copy; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst, sink->buf_start, sink->buf_end); + nmax = (int32_t *)source->buf_end - src; + n = MIN(left_samples, nmax); + nmax = (int32_t *)sink->buf_end - dst; + n = MIN(n, nmax); + for (i = 0; i < n; i++) { + *dst = q_mults_32x32(sign_extend_s24(*src), gain, IPC4_MIXIN_GAIN_SHIFT); + src++; + dst++; + } + } +} #endif /* CONFIG_FORMAT_S24LE */ #if CONFIG_FORMAT_S32LE @@ -111,6 +209,8 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe int32_t *dst = (int32_t *)sink->ptr + start_sample; int32_t *src = source->ptr; + ARG_UNUSED(gain); + assert(mixed_samples >= start_sample); samples_to_mix = mixed_samples - start_sample; samples_to_mix = MIN(samples_to_mix, sample_count); @@ -143,17 +243,61 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe } } +static void mix_s32_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples, + const struct cir_buf_ptr *source, + int32_t sample_count, uint16_t gain) +{ + int32_t samples_to_mix, samples_to_copy, left_samples; + int32_t n, nmax, i; + int32_t *dst = (int32_t *)sink->ptr + start_sample; + int32_t *src = source->ptr; + + assert(mixed_samples >= start_sample); + samples_to_mix = mixed_samples - start_sample; + samples_to_mix = MIN(samples_to_mix, sample_count); + samples_to_copy = sample_count - samples_to_mix; + + for (left_samples = samples_to_mix; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst, sink->buf_start, sink->buf_end); + /* calculate the remaining samples*/ + nmax = (int32_t *)source->buf_end - src; + n = MIN(left_samples, nmax); + nmax = (int32_t *)sink->buf_end - dst; + n = MIN(n, nmax); + for (i = 0; i < n; i++) { + *dst = sat_int32((int64_t)*dst + + q_mults_32x32(*src, gain, IPC4_MIXIN_GAIN_SHIFT)); + src++; + dst++; + } + } + + for (left_samples = samples_to_copy; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst, sink->buf_start, sink->buf_end); + nmax = (int32_t *)source->buf_end - src; + n = MIN(left_samples, nmax); + nmax = (int32_t *)sink->buf_end - dst; + n = MIN(n, nmax); + for (i = 0; i < n; i++) { + *dst = q_mults_32x32(*src, gain, IPC4_MIXIN_GAIN_SHIFT); + src++; + dst++; + } + } +} #endif /* CONFIG_FORMAT_S32LE */ const struct mix_func_map mix_func_map[] = { #if CONFIG_FORMAT_S16LE - { SOF_IPC_FRAME_S16_LE, mix_s16 }, + { SOF_IPC_FRAME_S16_LE, mix_s16, mix_s16_gain }, #endif #if CONFIG_FORMAT_S24LE - { SOF_IPC_FRAME_S24_4LE, mix_s24 }, + { SOF_IPC_FRAME_S24_4LE, mix_s24, mix_s24_gain }, #endif #if CONFIG_FORMAT_S32LE - { SOF_IPC_FRAME_S32_LE, mix_s32 } + { SOF_IPC_FRAME_S32_LE, mix_s32, mix_s32_gain } #endif }; diff --git a/src/audio/mixin_mixout/mixin_mixout_hifi3.c b/src/audio/mixin_mixout/mixin_mixout_hifi3.c index 568cbed5202b..a0f1eb651285 100644 --- a/src/audio/mixin_mixout/mixin_mixout_hifi3.c +++ b/src/audio/mixin_mixout/mixin_mixout_hifi3.c @@ -28,6 +28,99 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe ae_int16 *dst = (ae_int16 *)sink->ptr + start_sample; ae_int16 *src = source->ptr; + ARG_UNUSED(gain); + + assert(mixed_samples >= start_sample); + samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count); + samples_to_copy = sample_count - samples_to_mix; + n = 0; + + for (left_samples = samples_to_mix; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src + n, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end); + /* calculate the remaining samples*/ + nmax = (ae_int16 *)source->buf_end - src; + n = AE_MIN_32_signed(left_samples, nmax); + nmax = (ae_int16 *)sink->buf_end - dst; + n = AE_MIN_32_signed(n, nmax); + in = (ae_int16x4 *)src; + out = (ae_int16x4 *)dst; + inu = AE_LA64_PP(in); + outu1 = AE_LA64_PP(out); + m = n >> 2; + left = n & 0x03; + /* process 4 frames per loop */ + for (i = 0; i < m; i++) { + AE_LA16X4_IP(in_sample, inu, in); + AE_LA16X4_IP(out_sample, outu1, out); + out--; + out_sample = AE_ADD16S(in_sample, out_sample); + AE_SA16X4_IP(out_sample, outu2, out); + } + AE_SA64POS_FP(outu2, out); + + /* process the left samples that less than 4 + * one by one to avoid memory access overrun + */ + for (i = 0; i < left ; i++) { + AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16)); + AE_L16_IP(out_sample, (ae_int16 *)out, 0); + out_sample = AE_ADD16S(in_sample, out_sample); + AE_S16_0_IP(out_sample, (ae_int16 *)out, sizeof(ae_int16)); + } + } + + for (left_samples = samples_to_copy; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src + n, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end); + /* calculate the remaining samples*/ + nmax = (ae_int16 *)source->buf_end - src; + n = AE_MIN_32_signed(left_samples, nmax); + nmax = (ae_int16 *)sink->buf_end - dst; + n = AE_MIN_32_signed(n, nmax); + in = (ae_int16x4 *)src; + out = (ae_int16x4 *)dst; + inu = AE_LA64_PP(in); + m = n >> 2; + left = n & 0x03; + /* process 4 frames per loop */ + for (i = 0; i < m; i++) { + AE_LA16X4_IP(in_sample, inu, in); + AE_SA16X4_IP(in_sample, outu2, out); + } + AE_SA64POS_FP(outu2, out); + + /* process the left samples that less than 4 + * one by one to avoid memory access overrun + */ + for (i = 0; i < left ; i++) { + AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16)); + AE_S16_0_IP(in_sample, (ae_int16 *)out, sizeof(ae_int16)); + } + } +} + +static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples, + const struct cir_buf_ptr *source, + int32_t sample_count, uint16_t gain) +{ + int samples_to_mix, samples_to_copy, left_samples; + int n, nmax, i, m, left; + ae_int16x4 in_sample; + ae_int16x4 out_sample; + ae_int16x4 *in; + ae_int16x4 *out; + ae_valign inu = AE_ZALIGN64(); + ae_valign outu1 = AE_ZALIGN64(); + ae_valign outu2 = AE_ZALIGN64(); + /* cir_buf_wrap() is required and is done below in a loop */ + ae_int16 *dst = (ae_int16 *)sink->ptr + start_sample; + ae_int16 *src = source->ptr; + ae_int16x4 gain_vec; + ae_int32x2 tmpl, tmph; + + gain_vec = AE_L16_I((ae_int16 *)&gain, 0); + assert(mixed_samples >= start_sample); samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count); samples_to_copy = sample_count - samples_to_mix; @@ -50,6 +143,13 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe /* process 4 frames per loop */ for (i = 0; i < m; i++) { AE_LA16X4_IP(in_sample, inu, in); + + /* apply gain to in_sample */ + AE_MUL16X4(tmph, tmpl, in_sample, gain_vec); + tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT); + tmph = AE_SRAI32(tmph, IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_CVT16X4(tmph, tmpl); + AE_LA16X4_IP(out_sample, outu1, out); out--; out_sample = AE_ADD16S(in_sample, out_sample); @@ -62,6 +162,11 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe */ for (i = 0; i < left ; i++) { AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16)); + + AE_MUL16X4(tmph, tmpl, in_sample, gain_vec); + tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_CVT16X4(tmpl, tmpl); + AE_L16_IP(out_sample, (ae_int16 *)out, 0); out_sample = AE_ADD16S(in_sample, out_sample); AE_S16_0_IP(out_sample, (ae_int16 *)out, sizeof(ae_int16)); @@ -84,6 +189,12 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe /* process 4 frames per loop */ for (i = 0; i < m; i++) { AE_LA16X4_IP(in_sample, inu, in); + + AE_MUL16X4(tmph, tmpl, in_sample, gain_vec); + tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT); + tmph = AE_SRAI32(tmph, IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_CVT16X4(tmph, tmpl); + AE_SA16X4_IP(in_sample, outu2, out); } AE_SA64POS_FP(outu2, out); @@ -93,6 +204,11 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe */ for (i = 0; i < left ; i++) { AE_L16_IP(in_sample, (ae_int16 *)in, sizeof(ae_int16)); + + AE_MUL16X4(tmph, tmpl, in_sample, gain_vec); + tmpl = AE_SRAI32(tmpl, IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_CVT16X4(tmpl, tmpl); + AE_S16_0_IP(in_sample, (ae_int16 *)out, sizeof(ae_int16)); } } @@ -117,6 +233,92 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe int32_t *dst = (int32_t *)sink->ptr + start_sample; int32_t *src = source->ptr; + ARG_UNUSED(gain); + + assert(mixed_samples >= start_sample); + samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count); + samples_to_copy = sample_count - samples_to_mix; + n = 0; + + for (left_samples = samples_to_mix; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src + n, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end); + /* calculate the remaining samples*/ + nmax = (int32_t *)source->buf_end - src; + n = AE_MIN_32_signed(left_samples, nmax); + nmax = (int32_t *)sink->buf_end - dst; + n = AE_MIN_32_signed(n, nmax); + in = (ae_int32x2 *)src; + out = (ae_int32x2 *)dst; + inu = AE_LA64_PP(in); + outu1 = AE_LA64_PP(out); + m = n >> 1; + left = n & 1; + /* process 2 samples per time */ + for (i = 0; i < m; i++) { + AE_LA32X2_IP(in_sample, inu, in); + AE_LA32X2_IP(out_sample, outu1, out); + out--; + out_sample = AE_ADD24S(in_sample, out_sample); + AE_SA32X2_IP(out_sample, outu2, out); + } + AE_SA64POS_FP(outu2, out); + + /* process the left sample to avoid memory access overrun */ + if (left) { + AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32)); + AE_L32_IP(out_sample, (ae_int32 *)out, 0); + out_sample = AE_ADD24S(in_sample, out_sample); + AE_S32_L_IP(out_sample, (ae_int32 *)out, sizeof(ae_int32)); + } + } + + for (left_samples = samples_to_copy; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src + n, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end); + nmax = (int32_t *)source->buf_end - src; + n = AE_MIN_32_signed(left_samples, nmax); + nmax = (int32_t *)sink->buf_end - dst; + n = AE_MIN_32_signed(n, nmax); + in = (ae_int32x2 *)src; + out = (ae_int32x2 *)dst; + inu = AE_LA64_PP(in); + m = n >> 1; + left = n & 1; + for (i = 0; i < m; i++) { + AE_LA32X2_IP(in_sample, inu, in); + AE_SA32X2_IP(in_sample, outu2, out); + } + AE_SA64POS_FP(outu2, out); + /* process the left sample to avoid memory access overrun */ + if (left) { + AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32)); + AE_S32_L_IP(in_sample, (ae_int32 *)out, sizeof(ae_int32)); + } + } +} + +static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples, + const struct cir_buf_ptr *source, + int32_t sample_count, uint16_t gain) +{ + int samples_to_mix, samples_to_copy, left_samples; + int n, nmax, i, m, left; + ae_int32x2 in_sample, in_sample32; + ae_int32x2 out_sample; + ae_int32x2 *in; + ae_int32x2 *out; + ae_valign inu = AE_ZALIGN64(); + ae_valign outu1 = AE_ZALIGN64(); + ae_valign outu2 = AE_ZALIGN64(); + /* cir_buf_wrap() is required and is done below in a loop */ + int32_t *dst = (int32_t *)sink->ptr + start_sample; + int32_t *src = source->ptr; + ae_int16x4 gain_vec; + ae_int64 tmph, tmpl; + + gain_vec = AE_L16_I((ae_int16 *)&gain, 0); + assert(mixed_samples >= start_sample); samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count); samples_to_copy = sample_count - samples_to_mix; @@ -139,6 +341,16 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe /* process 2 samples per time */ for (i = 0; i < m; i++) { AE_LA32X2_IP(in_sample, inu, in); + + /* apply gain to in_sample */ + in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */ + tmpl = AE_MUL32X16_L0(in_sample32, gain_vec); + tmph = AE_MUL32X16_H0(in_sample32, gain_vec); + tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT); + tmph = AE_SRAI64(tmph, 8 + IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph), + AE_MOVINT32X2_FROMINT64(tmpl)); + AE_LA32X2_IP(out_sample, outu1, out); out--; out_sample = AE_ADD24S(in_sample, out_sample); @@ -149,6 +361,12 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe /* process the left sample to avoid memory access overrun */ if (left) { AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32)); + + in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */ + tmpl = AE_MUL32X16_L0(in_sample32, gain_vec); + tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_MOVINT32X2_FROMINT64(tmpl); + AE_L32_IP(out_sample, (ae_int32 *)out, 0); out_sample = AE_ADD24S(in_sample, out_sample); AE_S32_L_IP(out_sample, (ae_int32 *)out, sizeof(ae_int32)); @@ -169,12 +387,27 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe left = n & 1; for (i = 0; i < m; i++) { AE_LA32X2_IP(in_sample, inu, in); + + in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */ + tmpl = AE_MUL32X16_L0(in_sample32, gain_vec); + tmph = AE_MUL32X16_H0(in_sample32, gain_vec); + tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT); + tmph = AE_SRAI64(tmph, 8 + IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph), + AE_MOVINT32X2_FROMINT64(tmpl)); + AE_SA32X2_IP(in_sample, outu2, out); } AE_SA64POS_FP(outu2, out); /* process the left sample to avoid memory access overrun */ if (left) { AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32)); + + in_sample32 = AE_SLAI32(in_sample, 8); /* sign extension */ + tmpl = AE_MUL32X16_L0(in_sample32, gain_vec); + tmpl = AE_SRAI64(tmpl, 8 + IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_MOVINT32X2_FROMINT64(tmpl); + AE_S32_L_IP(in_sample, (ae_int32 *)out, sizeof(ae_int32)); } } @@ -200,6 +433,93 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe int32_t *dst = (int32_t *)sink->ptr + start_sample; int32_t *src = source->ptr; + ARG_UNUSED(gain); + + assert(mixed_samples >= start_sample); + samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count); + samples_to_copy = sample_count - samples_to_mix; + n = 0; + + for (left_samples = samples_to_mix; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src + n, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end); + /* calculate the remaining samples*/ + nmax = (int32_t *)source->buf_end - src; + n = AE_MIN_32_signed(left_samples, nmax); + nmax = (int32_t *)sink->buf_end - dst; + n = AE_MIN_32_signed(n, nmax); + in = (ae_int32x2 *)src; + out = (ae_int32x2 *)dst; + inu = AE_LA64_PP(in); + outu1 = AE_LA64_PP(out); + m = n >> 1; + left = n & 1; + for (i = 0; i < m; i++) { + AE_LA32X2_IP(in_sample, inu, in); + AE_LA32X2_IP(out_sample, outu1, out); + out--; + out_sample = AE_ADD32S(in_sample, out_sample); + AE_SA32X2_IP(out_sample, outu2, out); + } + AE_SA64POS_FP(outu2, out); + + /* process the left sample to avoid memory access overrun */ + if (left) { + AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32)); + AE_L32_IP(out_sample, (ae_int32 *)out, 0); + out_sample = AE_ADD32S(in_sample, out_sample); + AE_S32_L_IP(out_sample, (ae_int32 *)out, sizeof(ae_int32)); + } + } + + for (left_samples = samples_to_copy; left_samples > 0; left_samples -= n) { + src = cir_buf_wrap(src + n, source->buf_start, source->buf_end); + dst = cir_buf_wrap(dst + n, sink->buf_start, sink->buf_end); + /* calculate the remaining samples*/ + nmax = (int32_t *)source->buf_end - src; + n = AE_MIN_32_signed(left_samples, nmax); + nmax = (int32_t *)sink->buf_end - dst; + n = AE_MIN_32_signed(n, nmax); + in = (ae_int32x2 *)src; + out = (ae_int32x2 *)dst; + inu = AE_LA64_PP(in); + m = n >> 1; + left = n & 1; + for (i = 0; i < m; i++) { + AE_LA32X2_IP(in_sample, inu, in); + AE_SA32X2_IP(in_sample, outu2, out); + } + AE_SA64POS_FP(outu2, out); + + /* process the left sample to avoid memory access overrun */ + if (left) { + AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32)); + AE_S32_L_IP(in_sample, (ae_int32 *)out, sizeof(ae_int32)); + } + } +} + +static void mix_s32_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixed_samples, + const struct cir_buf_ptr *source, + int32_t sample_count, uint16_t gain) +{ + int samples_to_mix, samples_to_copy, left_samples; + int n, nmax, i, m, left; + ae_int32x2 in_sample; + ae_int32x2 out_sample; + ae_int32x2 *in; + ae_int32x2 *out; + ae_valign inu = AE_ZALIGN64(); + ae_valign outu1 = AE_ZALIGN64(); + ae_valign outu2 = AE_ZALIGN64(); + /* cir_buf_wrap() is required and is done below in a loop */ + int32_t *dst = (int32_t *)sink->ptr + start_sample; + int32_t *src = source->ptr; + ae_int16x4 gain_vec; + ae_int64 tmpl, tmph; + + gain_vec = AE_L16_I((ae_int16 *)&gain, 0); + assert(mixed_samples >= start_sample); samples_to_mix = AE_MIN_32_signed(mixed_samples - start_sample, sample_count); samples_to_copy = sample_count - samples_to_mix; @@ -221,6 +541,15 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe left = n & 1; for (i = 0; i < m; i++) { AE_LA32X2_IP(in_sample, inu, in); + + /* apply gain to in_sample */ + tmpl = AE_MUL32X16_L0(in_sample, gain_vec); + tmph = AE_MUL32X16_H0(in_sample, gain_vec); + tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT); + tmph = AE_SRAI64(tmph, IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph), + AE_MOVINT32X2_FROMINT64(tmpl)); + AE_LA32X2_IP(out_sample, outu1, out); out--; out_sample = AE_ADD32S(in_sample, out_sample); @@ -231,6 +560,11 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe /* process the left sample to avoid memory access overrun */ if (left) { AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32)); + + tmpl = AE_MUL32X16_L0(in_sample, gain_vec); + tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_MOVINT32X2_FROMINT64(tmpl); + AE_L32_IP(out_sample, (ae_int32 *)out, 0); out_sample = AE_ADD32S(in_sample, out_sample); AE_S32_L_IP(out_sample, (ae_int32 *)out, sizeof(ae_int32)); @@ -252,6 +586,14 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe left = n & 1; for (i = 0; i < m; i++) { AE_LA32X2_IP(in_sample, inu, in); + + tmpl = AE_MUL32X16_L0(in_sample, gain_vec); + tmph = AE_MUL32X16_H0(in_sample, gain_vec); + tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT); + tmph = AE_SRAI64(tmph, IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_SEL32_LL(AE_MOVINT32X2_FROMINT64(tmph), + AE_MOVINT32X2_FROMINT64(tmpl)); + AE_SA32X2_IP(in_sample, outu2, out); } AE_SA64POS_FP(outu2, out); @@ -259,6 +601,11 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe /* process the left sample to avoid memory access overrun */ if (left) { AE_L32_IP(in_sample, (ae_int32 *)in, sizeof(ae_int32)); + + tmpl = AE_MUL32X16_L0(in_sample, gain_vec); + tmpl = AE_SRAI64(tmpl, IPC4_MIXIN_GAIN_SHIFT); + in_sample = AE_MOVINT32X2_FROMINT64(tmpl); + AE_S32_L_IP(in_sample, (ae_int32 *)out, sizeof(ae_int32)); } } @@ -268,13 +615,13 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe const struct mix_func_map mix_func_map[] = { #if CONFIG_FORMAT_S16LE - { SOF_IPC_FRAME_S16_LE, mix_s16 }, + { SOF_IPC_FRAME_S16_LE, mix_s16, mix_s16_gain }, #endif #if CONFIG_FORMAT_S24LE - { SOF_IPC_FRAME_S24_4LE, mix_s24 }, + { SOF_IPC_FRAME_S24_4LE, mix_s24, mix_s24_gain }, #endif #if CONFIG_FORMAT_S32LE - { SOF_IPC_FRAME_S32_LE, mix_s32 } + { SOF_IPC_FRAME_S32_LE, mix_s32, mix_s32_gain } #endif }; diff --git a/src/audio/mixin_mixout/mixin_mixout_hifi5.c b/src/audio/mixin_mixout/mixin_mixout_hifi5.c index 6d82ec54d50d..31f238b5ca70 100644 --- a/src/audio/mixin_mixout/mixin_mixout_hifi5.c +++ b/src/audio/mixin_mixout/mixin_mixout_hifi5.c @@ -28,6 +28,8 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe ae_int16 *dst = (ae_int16 *)sink->ptr + start_sample; ae_int16 *src = source->ptr; + ARG_UNUSED(gain); + assert(mixed_samples >= start_sample); samples_to_mix = AE_MIN32(mixed_samples - start_sample, sample_count); samples_to_copy = sample_count - samples_to_mix; @@ -118,6 +120,8 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe int32_t *dst = (int32_t *)sink->ptr + start_sample; int32_t *src = source->ptr; + ARG_UNUSED(gain); + assert(mixed_samples >= start_sample); samples_to_mix = AE_MIN32(mixed_samples - start_sample, sample_count); samples_to_copy = sample_count - samples_to_mix; @@ -202,6 +206,8 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe int32_t *dst = (int32_t *)sink->ptr + start_sample; int32_t *src = source->ptr; + ARG_UNUSED(gain); + assert(mixed_samples >= start_sample); samples_to_mix = AE_MIN32(mixed_samples - start_sample, sample_count); samples_to_copy = sample_count - samples_to_mix; @@ -268,15 +274,16 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe #endif /* CONFIG_FORMAT_S32LE */ +/* TODO: implement mixing functions with gain support!*/ const struct mix_func_map mix_func_map[] = { #if CONFIG_FORMAT_S16LE - { SOF_IPC_FRAME_S16_LE, mix_s16 }, + { SOF_IPC_FRAME_S16_LE, mix_s16, mix_s16 }, #endif #if CONFIG_FORMAT_S24LE - { SOF_IPC_FRAME_S24_4LE, mix_s24 }, + { SOF_IPC_FRAME_S24_4LE, mix_s24, mix_s24 }, #endif #if CONFIG_FORMAT_S32LE - { SOF_IPC_FRAME_S32_LE, mix_s32 } + { SOF_IPC_FRAME_S32_LE, mix_s32, mix_s32 } #endif };