StarPU Internal Handbook
|
Data Structures | |
struct | _starpu_worker |
struct | _starpu_combined_worker |
struct | _starpu_worker_set |
struct | _starpu_machine_topology |
struct | _starpu_machine_config |
struct | _starpu_machine_config.bindid_workers |
Enumerations | |
enum | initialization { UNINITIALIZED, CHANGING, INITIALIZED } |
Functions | |
void | _starpu_set_argc_argv (int *argc, char ***argv) |
int * | _starpu_get_argc () |
char *** | _starpu_get_argv () |
void | _starpu_conf_check_environment (struct starpu_conf *conf) |
void | _starpu_may_pause (void) |
static unsigned | _starpu_machine_is_running (void) |
void | _starpu_worker_init (struct _starpu_worker *workerarg, struct _starpu_machine_config *pconfig) |
uint32_t | _starpu_worker_exists (struct starpu_task *) |
uint32_t | _starpu_can_submit_cuda_task (void) |
uint32_t | _starpu_can_submit_cpu_task (void) |
uint32_t | _starpu_can_submit_opencl_task (void) |
unsigned | _starpu_worker_can_block (unsigned memnode, struct _starpu_worker *worker) |
void | _starpu_block_worker (int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex) |
void | _starpu_driver_start (struct _starpu_worker *worker, unsigned fut_key, unsigned sync) |
void | _starpu_worker_start (struct _starpu_worker *worker, unsigned fut_key, unsigned sync) |
static unsigned | _starpu_worker_get_count (void) |
static void | _starpu_set_local_worker_key (struct _starpu_worker *worker) |
static struct _starpu_worker * | _starpu_get_local_worker_key (void) |
static void | _starpu_set_local_worker_set_key (struct _starpu_worker_set *worker) |
static struct _starpu_worker_set * | _starpu_get_local_worker_set_key (void) |
static struct _starpu_worker * | _starpu_get_worker_struct (unsigned id) |
static struct _starpu_sched_ctx * | _starpu_get_sched_ctx_struct (unsigned id) |
struct _starpu_combined_worker * | _starpu_get_combined_worker_struct (unsigned id) |
static struct _starpu_machine_config * | _starpu_get_machine_config (void) |
static int | _starpu_get_disable_kernels (void) |
static enum _starpu_worker_status | _starpu_worker_get_status (int workerid) |
static void | _starpu_worker_set_status (int workerid, enum _starpu_worker_status status) |
static struct _starpu_sched_ctx * | _starpu_get_initial_sched_ctx (void) |
int | starpu_worker_get_nids_by_type (enum starpu_worker_archtype type, int *workerids, int maxsize) |
int | starpu_worker_get_nids_ctx_free_by_type (enum starpu_worker_archtype type, int *workerids, int maxsize) |
static unsigned | _starpu_worker_mutex_is_sched_mutex (int workerid, starpu_pthread_mutex_t *mutex) |
static int | _starpu_worker_get_nsched_ctxs (int workerid) |
static unsigned | _starpu_get_nsched_ctxs (void) |
static int | _starpu_worker_get_id (void) |
static unsigned | __starpu_worker_get_id_check (const char *f, int l) |
enum starpu_node_kind | _starpu_worker_get_node_kind (enum starpu_worker_archtype type) |
void | _starpu_worker_set_stream_ctx (unsigned workerid, struct _starpu_sched_ctx *sched_ctx) |
struct _starpu_sched_ctx * | _starpu_worker_get_ctx_stream (unsigned stream_workerid) |
static void | _starpu_worker_request_blocking_in_parallel (struct _starpu_worker *const worker) |
static void | _starpu_worker_request_unblocking_in_parallel (struct _starpu_worker *const worker) |
static void | _starpu_worker_process_block_in_parallel_requests (struct _starpu_worker *const worker) |
static void | _starpu_worker_enter_sched_op (struct _starpu_worker *const worker) |
void | _starpu_worker_apply_deferred_ctx_changes (void) |
static void | _starpu_worker_leave_sched_op (struct _starpu_worker *const worker) |
static int | _starpu_worker_sched_op_pending (void) |
static void | _starpu_worker_enter_changing_ctx_op (struct _starpu_worker *const worker) |
static void | _starpu_worker_leave_changing_ctx_op (struct _starpu_worker *const worker) |
static void | _starpu_worker_relax_on (void) |
static void | _starpu_worker_relax_on_locked (struct _starpu_worker *worker) |
static void | _starpu_worker_relax_off (void) |
static void | _starpu_worker_relax_off_locked (void) |
static int | _starpu_worker_get_relax_state (void) |
static void | _starpu_worker_lock (int workerid) |
static int | _starpu_worker_trylock (int workerid) |
static void | _starpu_worker_unlock (int workerid) |
static void | _starpu_worker_lock_self (void) |
static void | _starpu_worker_unlock_self (void) |
static int | _starpu_wake_worker_relax (int workerid) |
int | starpu_wake_worker_relax_light (int workerid) |
void | _starpu_worker_refuse_task (struct _starpu_worker *worker, struct starpu_task *task) |
Variables | |
int | _starpu_worker_parallel_blocks |
struct _starpu_machine_config _starpu_config | STARPU_ATTRIBUTE_INTERNAL |
struct _starpu_worker |
This is initialized by _starpu_worker_init()
Data Fields | ||
---|---|---|
struct _starpu_machine_config * | config | |
starpu_pthread_mutex_t | mutex | |
enum starpu_worker_archtype | arch |
what is the type of worker ? |
uint32_t | worker_mask |
what is the type of worker ? |
struct starpu_perfmodel_arch | perf_arch |
in case there are different models of the same arch |
starpu_pthread_t | worker_thread |
the thread which runs the worker |
unsigned | devid |
which cpu/gpu/etc is controlled by the worker ? |
unsigned | subworkerid |
which sub-worker this one is for the cpu/gpu |
int | bindid |
which cpu is the driver bound to ? (logical index) |
int | workerid |
uniquely identify the worker among all processing units types |
int | combined_workerid |
combined worker currently using this worker |
int | current_rank |
current rank in case the worker is used in a parallel fashion |
int | worker_size |
size of the worker in case we use a combined worker |
starpu_pthread_cond_t | started_cond |
indicate when the worker is ready |
starpu_pthread_cond_t | ready_cond |
indicate when the worker is ready |
unsigned | memory_node |
which memory node is the worker associated with ? |
unsigned | numa_memory_node |
which numa memory node is the worker associated with? (logical index) |
starpu_pthread_cond_t | sched_cond |
condition variable used for passive waiting operations on worker STARPU_PTHREAD_COND_BROADCAST must be used instead of STARPU_PTHREAD_COND_SIGNAL, since the condition is shared for multiple purpose |
starpu_pthread_mutex_t | sched_mutex |
mutex protecting sched_cond |
unsigned | state_relax_refcnt |
mark scheduling sections where other workers can safely access the worker state |
unsigned | state_sched_op_pending |
a task pop is ongoing even though sched_mutex may temporarily be unlocked |
unsigned | state_changing_ctx_waiting |
a thread is waiting for operations such as pop to complete before acquiring sched_mutex and modifying the worker ctx |
unsigned | state_changing_ctx_notice |
the worker ctx is about to change or being changed, wait for flag to be cleared before starting new scheduling operations |
unsigned | state_blocked_in_parallel |
worker is currently blocked on a parallel section |
unsigned | state_blocked_in_parallel_observed |
the blocked state of the worker has been observed by another worker during a relaxed section |
unsigned | state_block_in_parallel_req |
a request for state transition from unblocked to blocked is pending |
unsigned | state_block_in_parallel_ack |
a block request has been honored |
unsigned | state_unblock_in_parallel_req |
a request for state transition from blocked to unblocked is pending |
unsigned | state_unblock_in_parallel_ack |
an unblock request has been honored |
unsigned | block_in_parallel_ref_count |
cumulative blocking depth
|
starpu_pthread_t | thread_changing_ctx |
thread currently changing a sched_ctx containing the worker |
struct _starpu_ctx_change_list | ctx_change_list |
list of deferred context changes when the current thread is a worker, _and_ this worker is in a scheduling operation, new ctx changes are queued to this list for subsequent processing once worker completes the ongoing scheduling operation |
struct starpu_task_list | local_tasks |
this queue contains tasks that have been explicitely submitted to that queue |
struct starpu_task ** | local_ordered_tasks |
this queue contains tasks that have been explicitely submitted to that queue with an explicit order |
unsigned | local_ordered_tasks_size |
this records the size of local_ordered_tasks |
unsigned | current_ordered_task |
this records the index (within local_ordered_tasks) of the next ordered task to be executed |
unsigned | current_ordered_task_order |
this records the order of the next ordered task to be executed |
struct starpu_task * | current_task |
task currently executed by this worker (non-pipelined version) |
struct starpu_task * | current_tasks[STARPU_MAX_PIPELINE] |
tasks currently executed by this worker (pipelined version) |
starpu_pthread_wait_t | wait | |
struct timespec | cl_start |
Codelet start time of the task currently running |
struct timespec | cl_end |
Codelet end time of the last task running |
unsigned char | first_task |
Index of first task in the pipeline |
unsigned char | ntasks |
number of tasks in the pipeline |
unsigned char | pipeline_length |
number of tasks to be put in the pipeline |
unsigned char | pipeline_stuck |
whether a task prevents us from pipelining |
struct _starpu_worker_set * | set |
in case this worker belongs to a set |
unsigned | worker_is_running | |
unsigned | worker_is_initialized | |
enum _starpu_worker_status | status |
what is the worker doing now ? (eg. CALLBACK) |
unsigned | state_keep_awake |
!0 if a task has been pushed to the worker and the task has not yet been seen by the worker, the worker should no go to sleep before processing this task |
char | name[128] | |
char | short_name[32] | |
unsigned | run_by_starpu |
Is this run by StarPU or directly by the application ? |
struct _starpu_driver_ops * | driver_ops | |
struct _starpu_sched_ctx_list * | sched_ctx_list | |
int | tmp_sched_ctx | |
unsigned | nsched_ctxs |
the no of contexts a worker belongs to |
struct _starpu_barrier_counter | tasks_barrier |
wait for the tasks submitted |
unsigned | has_prev_init |
had already been inited in another ctx |
unsigned | removed_from_ctx[STARPU_NMAX_SCHED_CTXS+1] | |
unsigned | spinning_backoff |
number of cycles to pause when spinning |
unsigned | nb_buffers_transferred |
number of piece of data already send to worker |
unsigned | nb_buffers_totransfer |
number of piece of data already send to worker |
struct starpu_task * | task_transferring |
The buffers of this task are being sent |
unsigned | shares_tasks_lists[STARPU_NMAX_SCHED_CTXS+1] |
indicate whether the workers shares tasks lists with other workers in this case when removing him from a context it disapears instantly |
unsigned | poped_in_ctx[STARPU_NMAX_SCHED_CTXS+1] |
boolean to chose the next ctx a worker will pop into |
unsigned | reverse_phase[2] |
boolean indicating at which moment we checked all ctxs and change phase for the booleab poped_in_ctx one for each of the 2 priorities |
unsigned | pop_ctx_priority |
indicate which priority of ctx is currently active: the values are 0 or 1 |
unsigned | is_slave_somewhere |
bool to indicate if the worker is slave in a ctx |
struct _starpu_sched_ctx * | stream_ctx | |
hwloc_bitmap_t | hwloc_cpu_set | |
hwloc_obj_t | hwloc_obj | |
char | padding[STARPU_CACHELINE_SIZE] |
Keep this last, to make sure to separate worker data in separate cache lines. |
struct _starpu_combined_worker |
Data Fields | ||
---|---|---|
struct starpu_perfmodel_arch | perf_arch |
in case there are different models of the same arch |
uint32_t | worker_mask |
what is the type of workers ? |
int | worker_size | |
unsigned | memory_node |
which memory node is associated that worker to ? |
int | combined_workerid[STARPU_NMAXWORKERS] | |
hwloc_bitmap_t | hwloc_cpu_set | |
char | padding[STARPU_CACHELINE_SIZE] |
Keep this last, to make sure to separate worker data in separate cache lines. |
struct _starpu_worker_set |
in case a single CPU worker may control multiple accelerators
Data Fields | ||
---|---|---|
starpu_pthread_mutex_t | mutex | |
starpu_pthread_t | worker_thread |
the thread which runs the worker |
unsigned | nworkers | |
unsigned | started |
Only one thread for the whole set |
void * | retval | |
struct _starpu_worker * | workers | |
starpu_pthread_cond_t | ready_cond |
indicate when the set is ready |
unsigned | set_is_initialized |
struct _starpu_machine_topology |
Data Fields | ||
---|---|---|
unsigned | nworkers |
Total number of workers. |
unsigned | ncombinedworkers |
Total number of combined workers. |
unsigned | nsched_ctxs | |
hwloc_topology_t | hwtopology |
Topology as detected by hwloc. |
struct starpu_tree * | tree |
custom hwloc tree |
unsigned | nhwcpus |
Total number of CPU cores, as detected by the topology code. May be different from the actual number of CPU workers. |
unsigned | nhwpus |
Total number of PUs (i.e. threads), as detected by the topology code. May be different from the actual number of PU workers. |
unsigned | nhwcudagpus |
Total number of CUDA devices, as detected. May be different from the actual number of CUDA workers. |
unsigned | nhwopenclgpus |
Total number of OpenCL devices, as detected. May be different from the actual number of OpenCL workers. |
unsigned | nhwmpi |
Total number of MPI nodes, as detected. May be different from the actual number of node workers. |
unsigned | ncpus |
Actual number of CPU workers used by StarPU. |
unsigned | ncudagpus |
Actual number of CUDA GPUs used by StarPU. |
unsigned | nworkerpercuda | |
int | cuda_th_per_stream | |
int | cuda_th_per_dev | |
unsigned | nopenclgpus |
Actual number of OpenCL workers used by StarPU. |
unsigned | nmpidevices |
Actual number of MPI workers used by StarPU. |
unsigned | nhwmpidevices | |
unsigned | nhwmpicores[STARPU_MAXMPIDEVS] |
Each MPI node has its set of cores. |
unsigned | nmpicores[STARPU_MAXMPIDEVS] | |
unsigned | nhwmicdevices |
Topology of MP nodes (MIC) as well as necessary objects to communicate with them. |
unsigned | nmicdevices | |
unsigned | nhwmiccores[STARPU_MAXMICDEVS] |
Each MIC node has its set of cores. |
unsigned | nmiccores[STARPU_MAXMICDEVS] | |
unsigned | workers_bindid[STARPU_NMAXWORKERS] |
Indicates the successive logical PU identifier that should be used to bind the workers. It is either filled according to the user's explicit parameters (from starpu_conf) or according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a round-robin policy is used to distributed the workers over the cores. |
unsigned | workers_cuda_gpuid[STARPU_NMAXWORKERS] |
Indicates the successive CUDA identifier that should be used by the CUDA driver. It is either filled according to the user's explicit parameters (from starpu_conf) or according to the STARPU_WORKERS_CUDAID env. variable. Otherwise, they are taken in ID order. |
unsigned | workers_opencl_gpuid[STARPU_NMAXWORKERS] |
Indicates the successive OpenCL identifier that should be used by the OpenCL driver. It is either filled according to the user's explicit parameters (from starpu_conf) or according to the STARPU_WORKERS_OPENCLID env. variable. Otherwise, they are taken in ID order. |
unsigned | workers_mpi_ms_deviceid[STARPU_NMAXWORKERS] |
TODO unsigned workers_mic_deviceid[STARPU_NMAXWORKERS]; |
struct _starpu_machine_config |
Data Fields | ||
---|---|---|
struct _starpu_machine_topology | topology | |
int | cpu_depth | |
int | pu_depth | |
int | current_bindid |
Where to bind next worker ? |
char | currently_bound[STARPU_NMAXWORKERS] | |
char | currently_shared[STARPU_NMAXWORKERS] | |
int | current_cuda_gpuid |
Which GPU(s) do we use for CUDA ? |
int | current_opencl_gpuid |
Which GPU(s) do we use for OpenCL ? |
int | current_mic_deviceid |
Which MIC do we use? |
int | current_mpi_deviceid |
Which MPI do we use? |
int | cpus_nodeid |
Memory node for cpus, if only one |
int | cuda_nodeid |
Memory node for CUDA, if only one |
int | opencl_nodeid |
Memory node for OpenCL, if only one |
int | mic_nodeid |
Memory node for MIC, if only one |
int | mpi_nodeid |
Memory node for MPI, if only one |
char | padding1[STARPU_CACHELINE_SIZE] |
Separate out previous variables from per-worker data. |
struct _starpu_worker | workers[STARPU_NMAXWORKERS] |
Basic workers : each of this worker is running its own driver and can be combined with other basic workers. |
struct _starpu_combined_worker | combined_workers[STARPU_NMAX_COMBINEDWORKERS] |
Combined workers: these worker are a combination of basic workers that can run parallel tasks together. |
starpu_pthread_mutex_t | submitted_mutex | |
char | padding2[STARPU_CACHELINE_SIZE] |
Separate out previous mutex from the rest of the data. |
struct _starpu_machine_config | bindid_workers |
Translation table from bindid to worker IDs |
unsigned | nbindid |
size of bindid_workers |
uint32_t | worker_mask |
This bitmask indicates which kinds of worker are available. For instance it is possible to test if there is a CUDA worker with the result of (worker_mask & STARPU_CUDA). |
struct starpu_conf | conf |
either the user given configuration passed to starpu_init or a default configuration |
unsigned | running |
this flag is set until the runtime is stopped |
int | disable_kernels | |
int | pause_depth |
Number of calls to starpu_pause() - calls to starpu_resume(). When >0, StarPU should pause. |
struct _starpu_sched_ctx | sched_ctxs[STARPU_NMAX_SCHED_CTXS+1] |
all the sched ctx of the current instance of starpu |
unsigned | submitting |
this flag is set until the application is finished submitting tasks |
int | watchdog_ok |
struct _starpu_machine_config.bindid_workers |
void _starpu_set_argc_argv | ( | int * | argc, |
char *** | argv | ||
) |
Three functions to manage argv, argc
void _starpu_conf_check_environment | ( | struct starpu_conf * | conf | ) |
Fill conf with environment variables
void _starpu_may_pause | ( | void | ) |
Called by the driver when it is ready to pause
|
inlinestatic |
Has starpu_shutdown already been called ?
void _starpu_worker_init | ( | struct _starpu_worker * | workerarg, |
struct _starpu_machine_config * | pconfig | ||
) |
initialise a worker
uint32_t _starpu_worker_exists | ( | struct starpu_task * | ) |
Check if there is a worker that may execute the task.
uint32_t _starpu_can_submit_cuda_task | ( | void | ) |
Is there a worker that can execute CUDA code ?
uint32_t _starpu_can_submit_cpu_task | ( | void | ) |
Is there a worker that can execute CPU code ?
uint32_t _starpu_can_submit_opencl_task | ( | void | ) |
Is there a worker that can execute OpenCL code ?
unsigned _starpu_worker_can_block | ( | unsigned | memnode, |
struct _starpu_worker * | worker | ||
) |
Check whether there is anything that the worker should do instead of sleeping (waiting on something to happen).
void _starpu_block_worker | ( | int | workerid, |
starpu_pthread_cond_t * | cond, | ||
starpu_pthread_mutex_t * | mutex | ||
) |
This function must be called to block a worker. It puts the worker in a sleeping state until there is some event that forces the worker to wake up.
void _starpu_driver_start | ( | struct _starpu_worker * | worker, |
unsigned | fut_key, | ||
unsigned | sync | ||
) |
This function initializes the current driver for the given worker
void _starpu_worker_start | ( | struct _starpu_worker * | worker, |
unsigned | fut_key, | ||
unsigned | sync | ||
) |
This function initializes the current thread for the given worker
|
inlinestatic |
The _starpu_worker structure describes all the state of a StarPU worker. This function sets the pthread key which stores a pointer to this structure.
|
static |
Returns the _starpu_worker structure that describes the state of the current worker.
|
inlinestatic |
The _starpu_worker_set structure describes all the state of a StarPU worker_set. This function sets the pthread key which stores a pointer to this structure.
|
static |
Returns the _starpu_worker_set structure that describes the state of the current worker_set.
|
static |
Returns the _starpu_worker structure that describes the state of the specified worker.
|
static |
Returns the starpu_sched_ctx structure that describes the state of the specified ctx
|
static |
Returns the structure that describes the overall machine configuration (eg. all workers and topology).
|
inlinestatic |
Return whether kernels should be run (<=0) or not (>0)
|
inlinestatic |
Retrieve the status which indicates what the worker is currently doing.
|
inlinestatic |
Change the status of the worker which indicates what the worker is currently doing (eg. executing a callback).
|
static |
We keep an initial sched ctx which might be used in case no other ctx is available
int starpu_worker_get_nids_ctx_free_by_type | ( | enum starpu_worker_archtype | type, |
int * | workerids, | ||
int | maxsize | ||
) |
returns workers not belonging to any context, be careful no mutex is used, the list might not be updated
|
inlinestatic |
Get the total number of sched_ctxs created till now
|
inlinestatic |
Inlined version when building the core.
|
inlinestatic |
Similar behaviour to starpu_worker_get_id() but fails when called from outside a worker This returns an unsigned object on purpose, so that the caller is sure to get a positive value
|
inlinestatic |
Send a request to the worker to block, before a parallel task is about to begin.
Must be called with worker's sched_mutex held.
|
inlinestatic |
Send a request to the worker to unblock, after a parallel task is complete.
Must be called with worker's sched_mutex held.
|
inlinestatic |
Called by the the worker to process incoming requests to block or unblock on parallel task boundaries.
Must be called with worker's sched_mutex held.
|
inlinestatic |
Mark the beginning of a scheduling operation by the worker. No worker blocking operations on parallel tasks and no scheduling context change operations must be performed on contexts containing the worker, on contexts about to add the worker and on contexts about to remove the worker, while the scheduling operation is in process. The sched mutex of the worker may only be acquired permanently by another thread when no scheduling operation is in process, or when a scheduling operation is in process _and_ worker->state_relax_refcnt!=0. If a scheduling operation is in process _and_ worker->state_relax_refcnt==0, a thread other than the worker must wait on condition worker->sched_cond for worker->state_relax_refcnt!=0 to become true, before acquiring the worker sched mutex permanently.
Must be called with worker's sched_mutex held.
void _starpu_worker_apply_deferred_ctx_changes | ( | void | ) |
Mark the end of a scheduling operation by the worker.
Must be called with worker's sched_mutex held.
|
inlinestatic |
Must be called before altering a context related to the worker whether about adding the worker to a context, removing it from a context or modifying the set of workers of a context of which the worker is a member, to mark the beginning of a context change operation. The sched mutex of the worker must be held before calling this function.
Must be called with worker's sched_mutex held.
|
inlinestatic |
Mark the end of a context change operation.
Must be called with worker's sched_mutex held.
|
inlinestatic |
Temporarily allow other worker to access current worker state, when still scheduling, but the scheduling has not yet been made or is already done
|
inlinestatic |
Same, but with current worker mutex already held
|
inlinestatic |
lock a worker for observing contents
notes:
void _starpu_worker_refuse_task | ( | struct _starpu_worker * | worker, |
struct starpu_task * | task | ||
) |
Allow a worker pulling a task it cannot execute to properly refuse it and send it back to the scheduler.