Skip to content

Commit

Permalink
ipc4: Add cross-core binding support
Browse files Browse the repository at this point in the history
Implements binding of two pipelines from different cores so stream could
travel cross-core.

The feature is disabled by default, set CONFIG_CROSS_CORE_STREAM=y to
enable.

Signed-off-by: Serhiy Katsyuba <[email protected]>
  • Loading branch information
serhiy-katsyuba-intel committed Oct 17, 2023
1 parent e08b2c2 commit 6281fec
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 23 deletions.
25 changes: 25 additions & 0 deletions src/include/sof/schedule/ll_schedule_domain.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ struct ll_schedule_domain_ops {
struct task *task, uint32_t num_tasks);
void (*domain_enable)(struct ll_schedule_domain *domain, int core);
void (*domain_disable)(struct ll_schedule_domain *domain, int core);
#if CONFIG_CROSS_CORE_STREAM
/*
* Unlike domain_disable(), these are intended to temporary block LL from
* starting its next cycle. Triggering (e.g., by means of timer interrupt)
* is still enabled and registered but execution of next cycle is blocked.
* Once unblocked, if triggering were previously registered in a blocked
* state -- next cycle execution could start immediately.
*/
void (*domain_block)(struct ll_schedule_domain *domain);
void (*domain_unblock)(struct ll_schedule_domain *domain);
#endif
void (*domain_set)(struct ll_schedule_domain *domain, uint64_t start);
void (*domain_clear)(struct ll_schedule_domain *domain);
bool (*domain_is_pending)(struct ll_schedule_domain *domain,
Expand Down Expand Up @@ -192,6 +203,20 @@ static inline void domain_disable(struct ll_schedule_domain *domain, int core)
}
}

#if CONFIG_CROSS_CORE_STREAM
static inline void domain_block(struct ll_schedule_domain *domain)
{
if (domain->ops->domain_block)
domain->ops->domain_block(domain);
}

static inline void domain_unblock(struct ll_schedule_domain *domain)
{
if (domain->ops->domain_unblock)
domain->ops->domain_unblock(domain);
}
#endif

static inline bool domain_is_pending(struct ll_schedule_domain *domain,
struct task *task, struct comp_dev **comp)
{
Expand Down
155 changes: 132 additions & 23 deletions src/ipc/ipc4/helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,10 +337,78 @@ static struct comp_buffer *ipc4_create_buffer(struct comp_dev *src, bool is_shar
ipc_buf.size = buf_size;
ipc_buf.comp.id = IPC4_COMP_ID(src_queue, dst_queue);
ipc_buf.comp.pipeline_id = src->ipc_config.pipeline_id;
ipc_buf.comp.core = src->ipc_config.core;
ipc_buf.comp.core = cpu_get_id();
return buffer_new(&ipc_buf, is_shared);
}

#if CONFIG_CROSS_CORE_STREAM
/*
* Disabling interrupts to block next LL cycle works much faster comparing using
* of condition variable and mutex. Since same core binding is the most typical
* case, let's use slower cond_var blocking mechanism only for not so typical
* cross-core binding.
*
* Note, disabling interrupts to block LL for cross-core binding case will not work
* as .bind() handlers are called on corresponding cores using IDC tasks. IDC requires
* interrupts to be enabled. Only disabling timer interrupt instead of all interrupts
* might work. However, as CPU could go to some power down mode while waiting for
* blocking IDC call response, it's not clear how safe is to assume CPU can wakeup
* without timer interrupt. It depends on blocking IDC waiting implementation. That
* is why additional cond_var mechanism to block LL was introduced which does not
* disable any interrupts.
*/

#define ll_block(cross_core_bind) \
do { \
if (cross_core_bind) \
domain_block(sof_get()->platform_timer_domain); \
else \
irq_local_disable(flags); \
} while (0)

#define ll_unblock(cross_core_bind) \
do { \
if (cross_core_bind) \
domain_unblock(sof_get()->platform_timer_domain); \
else \
irq_local_enable(flags); \
} while (0)

/* Calling both ll_block() and ll_wait_finished_on_core() makes sure LL will not start its
* next cycle and its current cycle on specified core has finished.
*/
static int ll_wait_finished_on_core(struct comp_dev *dev)
{
/* To make sure (blocked) LL has finished its current cycle, it is
* enough to send any blocking IDC to the core. Since IDC task has lower
* priority then LL thread and cannot preempt it, execution of IDC task
* happens when LL thread is not active waiting for its next cycle.
*/

int ret;
struct ipc4_base_module_cfg dummy;

if (cpu_is_me(dev->ipc_config.core))
return 0;

/* Any blocking IDC that does not change component state could be utilized */
ret = comp_ipc4_get_attribute_remote(dev, COMP_ATTR_BASE_CONFIG, &dummy);
if (ret < 0) {
tr_err(&ipc_tr, "comp_ipc4_get_attribute_remote() failed for module %#x",
dev_comp_id(dev));
return ret;
}

return 0;
}

#else

#define ll_block(cross_core_bind) irq_local_disable(flags)
#define ll_unblock(cross_core_bind) irq_local_enable(flags)

#endif

int ipc_comp_connect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
{
struct ipc4_module_bind_unbind *bu;
Expand All @@ -364,14 +432,15 @@ int ipc_comp_connect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
return IPC4_INVALID_RESOURCE_ID;
}

bool is_shared = source->ipc_config.core != sink->ipc_config.core;
bool cross_core_bind = source->ipc_config.core != sink->ipc_config.core;

/* Pass IPC to target core if the buffer won't be shared and will be used
* on different core
/* If both components are on same core -- process IPC on that core,
* otherwise stay on core 0
*/
if (!cpu_is_me(source->ipc_config.core) && !is_shared)
if (!cpu_is_me(source->ipc_config.core) && !cross_core_bind)
return ipc4_process_on_core(source->ipc_config.core, false);

/* these might call comp_ipc4_get_attribute_remote() if necessary */
ret = comp_get_attribute(source, COMP_ATTR_BASE_CONFIG, &source_src_cfg);
if (ret < 0) {
tr_err(&ipc_tr, "failed to get base config for module %#x", dev_comp_id(source));
Expand All @@ -397,7 +466,7 @@ int ipc_comp_connect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
else
buf_size = sink_src_cfg.ibs * 2;

buffer = ipc4_create_buffer(source, is_shared, buf_size, bu->extension.r.src_queue,
buffer = ipc4_create_buffer(source, cross_core_bind, buf_size, bu->extension.r.src_queue,
bu->extension.r.dst_queue);
if (!buffer) {
tr_err(&ipc_tr, "failed to allocate buffer to bind %d to %d", src_id, sink_id);
Expand All @@ -418,12 +487,26 @@ int ipc_comp_connect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
source_set_min_available(audio_stream_get_source(&buffer->stream), sink_src_cfg.ibs);

/*
* Connect and bind the buffer to both source and sink components with the interrupts
* disabled to prevent the IPC task getting preempted which could result in buffers being
* only half connected when a pipeline task gets executed. A spinlock isn't required
* because all connected pipelines need to be on the same core.
* Connect and bind the buffer to both source and sink components with LL processing been
* blocked on corresponding core(s) to prevent IPC or IDC task getting preempted which
* could result in buffers being only half connected when a pipeline task gets executed.
*/
irq_local_disable(flags);
ll_block(cross_core_bind);

if (cross_core_bind) {
#if CONFIG_CROSS_CORE_STREAM
/* Make sure LL has finished on both cores */
if (!cpu_is_me(source->ipc_config.core))
if (ll_wait_finished_on_core(source) < 0)
goto free;
if (!cpu_is_me(sink->ipc_config.core))
if (ll_wait_finished_on_core(sink) < 0)
goto free;
#else
tr_err(&ipc_tr, "Cross-core binding is disabled");
goto free;
#endif
}

ret = comp_buffer_connect(source, source->ipc_config.core, buffer,
PPL_CONN_DIR_COMP_TO_BUFFER);
Expand All @@ -432,15 +515,14 @@ int ipc_comp_connect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
goto free;
}


ret = comp_buffer_connect(sink, sink->ipc_config.core, buffer,
PPL_CONN_DIR_BUFFER_TO_COMP);
if (ret < 0) {
tr_err(&ipc_tr, "failed to connect internal buffer to sink %d", sink_id);
goto e_sink_connect;
}


/* these might call comp_ipc4_bind_remote() if necessary */
ret = comp_bind(source, bu);
if (ret < 0)
goto e_src_bind;
Expand All @@ -461,7 +543,7 @@ int ipc_comp_connect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
source->direction_set = true;
}

irq_local_enable(flags);
ll_unblock(cross_core_bind);

return IPC4_SUCCESS;

Expand All @@ -472,7 +554,7 @@ int ipc_comp_connect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
e_sink_connect:
pipeline_disconnect(source, buffer, PPL_CONN_DIR_COMP_TO_BUFFER);
free:
irq_local_enable(flags);
ll_unblock(cross_core_bind);
buffer_free(buffer);
return IPC4_INVALID_RESOURCE_STATE;
}
Expand All @@ -491,6 +573,7 @@ int ipc_comp_disconnect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
uint32_t src_id, sink_id, buffer_id;
uint32_t flags;
int ret, ret1;
bool cross_core_unbind;

bu = (struct ipc4_module_bind_unbind *)_connect;
src_id = IPC4_COMP_ID(bu->primary.r.module_id, bu->primary.r.instance_id);
Expand All @@ -507,8 +590,12 @@ int ipc_comp_disconnect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)
return 0;
}

/* Pass IPC to target core if both modules has the same target core */
if (!cpu_is_me(src->ipc_config.core) && src->ipc_config.core == sink->ipc_config.core)
cross_core_unbind = src->ipc_config.core != sink->ipc_config.core;

/* Pass IPC to target core if both modules has the same target core,
* otherwise stay on core 0
*/
if (!cpu_is_me(src->ipc_config.core) && !cross_core_unbind)
return ipc4_process_on_core(src->ipc_config.core, false);

buffer_id = IPC4_COMP_ID(bu->extension.r.src_queue, bu->extension.r.dst_queue);
Expand All @@ -527,17 +614,39 @@ int ipc_comp_disconnect(struct ipc *ipc, ipc_pipe_comp_connect *_connect)

/*
* Disconnect and unbind buffer from source/sink components and continue to free the buffer
* even in case of errors. Disable interrupts during disconnect and unbinding to prevent
* the IPC task getting preempted which could result in buffers being only half connected
* when a pipeline task gets executed. A spinlock isn't required because all connected
* pipelines need to be on the same core.
* even in case of errors. Block LL processing during disconnect and unbinding to prevent
* IPC or IDC task getting preempted which could result in buffers being only half connected
* when a pipeline task gets executed.
*/
irq_local_disable(flags);
ll_block(cross_core_unbind);

if (cross_core_unbind) {
#if CONFIG_CROSS_CORE_STREAM
/* Make sure LL has finished on both cores */
if (!cpu_is_me(src->ipc_config.core))
if (ll_wait_finished_on_core(src) < 0) {
ll_unblock(cross_core_unbind);
return IPC4_FAILURE;
}
if (!cpu_is_me(sink->ipc_config.core))
if (ll_wait_finished_on_core(sink) < 0) {
ll_unblock(cross_core_unbind);
return IPC4_FAILURE;
}
#else
tr_err(&ipc_tr, "Cross-core binding is disabled");
ll_unblock(cross_core_unbind);
return IPC4_FAILURE;
#endif
}

pipeline_disconnect(src, buffer, PPL_CONN_DIR_COMP_TO_BUFFER);
pipeline_disconnect(sink, buffer, PPL_CONN_DIR_BUFFER_TO_COMP);
/* these might call comp_ipc4_bind_remote() if necessary */
ret = comp_unbind(src, bu);
ret1 = comp_unbind(sink, bu);
irq_local_enable(flags);

ll_unblock(cross_core_unbind);

buffer_free(buffer);

Expand Down
46 changes: 46 additions & 0 deletions src/schedule/zephyr_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ struct zephyr_domain {
struct k_timer timer;
struct zephyr_domain_thread domain_thread[CONFIG_CORE_COUNT];
struct ll_schedule_domain *ll_domain;
#if CONFIG_CROSS_CORE_STREAM
atomic_t block;
struct k_mutex block_mutex;
struct k_condvar block_condvar;
#endif
};

/* perf measurement windows size 2^x */
Expand All @@ -67,6 +72,16 @@ static void zephyr_domain_thread_fn(void *p1, void *p2, void *p3)
/* immediately go to sleep, waiting to be woken up by the timer */
k_sem_take(&dt->sem, K_FOREVER);

#if CONFIG_CROSS_CORE_STREAM
if (atomic_get(&zephyr_domain->block)) {
k_mutex_lock(&zephyr_domain->block_mutex, K_FOREVER);
if (atomic_get(&zephyr_domain->block))
k_condvar_wait(&zephyr_domain->block_condvar,
&zephyr_domain->block_mutex, K_FOREVER);
k_mutex_unlock(&zephyr_domain->block_mutex);
}
#endif

cycles0 = k_cycle_get_32();
dt->handler(dt->arg);
cycles1 = k_cycle_get_32();
Expand Down Expand Up @@ -221,9 +236,34 @@ static int zephyr_domain_unregister(struct ll_schedule_domain *domain,
return 0;
}

#if CONFIG_CROSS_CORE_STREAM
static void zephyr_domain_block(struct ll_schedule_domain *domain)
{
struct zephyr_domain *zephyr_domain = ll_sch_domain_get_pdata(domain);

k_mutex_lock(&zephyr_domain->block_mutex, K_FOREVER);
atomic_set(&zephyr_domain->block, 1);
k_mutex_unlock(&zephyr_domain->block_mutex);
}

static void zephyr_domain_unblock(struct ll_schedule_domain *domain)
{
struct zephyr_domain *zephyr_domain = ll_sch_domain_get_pdata(domain);

k_mutex_lock(&zephyr_domain->block_mutex, K_FOREVER);
atomic_set(&zephyr_domain->block, 0);
k_condvar_broadcast(&zephyr_domain->block_condvar);
k_mutex_unlock(&zephyr_domain->block_mutex);
}
#endif

static const struct ll_schedule_domain_ops zephyr_domain_ops = {
.domain_register = zephyr_domain_register,
.domain_unregister = zephyr_domain_unregister,
#if CONFIG_CROSS_CORE_STREAM
.domain_block = zephyr_domain_block,
.domain_unblock = zephyr_domain_unblock,
#endif
};

struct ll_schedule_domain *zephyr_domain_init(int clk)
Expand All @@ -239,6 +279,12 @@ struct ll_schedule_domain *zephyr_domain_init(int clk)

zephyr_domain->ll_domain = domain;

#if CONFIG_CROSS_CORE_STREAM
atomic_set(&zephyr_domain->block, 0);
k_mutex_init(&zephyr_domain->block_mutex);
k_condvar_init(&zephyr_domain->block_condvar);
#endif

ll_sch_domain_set_pdata(domain, zephyr_domain);

return domain;
Expand Down
11 changes: 11 additions & 0 deletions zephyr/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,15 @@ config ZEPHYR_DP_SCHEDULER
DP modules can be located in dieffrent cores than LL pipeline modules, may have
different tick (i.e. 300ms for speech reccognition, etc.)

config CROSS_CORE_STREAM
bool "Enable cross-core connected pipelines"
default y if IPC_MAJOR_4
help
Enables support for pipelines from different cores to be
connected together cross-core. So stream can travel from one
core to another. Note, this is different from "multicore"
support. In SOF "multicore" support means different streams
can be processed on different cores, however, each stream
is processed entirely on single core.

endif

0 comments on commit 6281fec

Please sign in to comment.