KMR
|
KMR Interface. More...
#include <stdio.h>
#include <stddef.h>
#include <inttypes.h>
#include <sys/types.h>
#include <string.h>
#include <assert.h>
Go to the source code of this file.
Classes | |
struct | kmr_code_line |
Information of Source Code Line. More... | |
struct | kmr_ctx |
KMR Context. More... | |
struct | kmr_file_option |
Options to Mapping on Files. More... | |
struct | kmr_keyed_record |
Keyed-Record for Sorting. More... | |
struct | kmr_kv_box |
Handy Copy of a Key-Value Field. More... | |
union | kmr_kvs |
Key-Value Stream (abstract). More... | |
struct | kmr_kvs_block |
struct | kmr_kvs_dummy |
Key-Value Stream (DUMMY); Mandatory Entries. More... | |
struct | kmr_kvs_entry |
struct | kmr_kvs_list |
struct | kmr_kvs_list_head |
struct | kmr_kvs_oncore |
Key-Value Stream. More... | |
struct | kmr_kvs_pushoff |
Key-Value Stream with Shuffling at Addition of Key-Values. More... | |
struct | kmr_map_ms_state |
State during kmr_map_ms(). More... | |
struct | kmr_ntuple |
N-Tuple. More... | |
struct | kmr_ntuple_entry |
N-Tuple Argument. More... | |
struct | kmr_option |
Options to Mapping, Shuffling, and Reduction. More... | |
struct | kmr_pushoff_buffers |
Record of Push-Off Key-Value Stream for a Rank. More... | |
struct | kmr_spawn_info |
Spawning Info. More... | |
struct | kmr_spawn_option |
Options to Mapping by Spawns. More... | |
union | kmr_unit_sized |
Unit-Sized Storage. More... | |
Macros | |
#define | KMR_API_ID KMR_API_ID0(KMR_H) |
#define | KMR_API_ID0(X) KMR_API_ID1(X) |
#define | KMR_API_ID1(X) kmr_api_ ## X |
#define | KMR_BR0 { |
#define | KMR_BR1 } |
#define | kmr_create_kvs(MR, KF, VF) kmr_create_kvs7((MR), (KF), (VF), kmr_noopt, __FILE__, __LINE__, __func__) |
Makes a new key-value stream (of type KMR_KVS) with the specified field datatypes. More... | |
#define | kmr_create_kvs_(MR, IGNORE) |
Makes a new key-value stream (of type KMR_KVS). More... | |
#define | kmr_create_kvsx(MR, KF, VF, OPT) kmr_create_kvs7((MR), (KF), (VF), (OPT), __FILE__, __LINE__, __func__) |
Makes a new key-value stream (of type KMR_KVS) with the specified field datatypes. More... | |
#define | KMR_H 20160425 |
#define | kmr_init() kmr_init_2(KMR_API_ID) |
Sets up the environment. More... | |
#define | KMR_JOB_NAME_LEN 256 |
#define | kmr_kv_cake kmr_kv_box |
#define | KMR_KVS_MAGIC_OK(X) |
#define | kmr_map(KVI, KVO, ARG, OPT, M) |
Maps simply. More... | |
#define | kmr_reduce(KVI, KVO, ARG, OPT, R) |
Reduces key-value pairs. More... | |
#define | kmr_sort_a_batch(X0, X1, X2, X3) kmr_sort_locally(X0,X1,X2,X3) |
#define | KMR_TAG_SPAWN_REPLY 500 |
#define | KMR_TAG_SPAWN_REPLY1 501 |
Typedefs | |
typedef struct kmr_ctx | KMR |
typedef union kmr_kvs | KMR_KVS |
typedef int(* | kmr_mapfn_t) (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long index) |
Map-function Type. More... | |
typedef int(* | kmr_redfn_t) (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg) |
Reduce-function Type. More... | |
Enumerations | |
enum | kmr_kv_field { KMR_KV_BAD, KMR_KV_OPAQUE, KMR_KV_CSTRING, KMR_KV_INTEGER, KMR_KV_FLOAT8, KMR_KV_POINTER_OWNED, KMR_KV_POINTER_UNMANAGED } |
Datatypes of Keys or Values. More... | |
enum | kmr_kvs_magic { KMR_KVS_BAD, KMR_KVS_ONCORE, KMR_KVS_PUSHOFF, KMR_KVS_ONCORE_PACKED } |
Functions | |
int | kmr_add_identity_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long i) |
Adds a given key-value pair unmodified. More... | |
int | kmr_add_kv (KMR_KVS *kvs, const struct kmr_kv_box kv) |
Adds a key-value pair. More... | |
int | kmr_add_kv1 (KMR_KVS *kvs, void *k, int klen, void *v, int vlen) |
Adds a key-value pair as given directly by a pointer. More... | |
int | kmr_add_kv_done (KMR_KVS *kvs) |
Marks finished adding key-value pairs. More... | |
int | kmr_add_kv_quick_ (KMR_KVS *kvs, const struct kmr_kv_box kv) |
int | kmr_add_kv_space (KMR_KVS *kvs, const struct kmr_kv_box kv, void **keyp, void **valuep) |
Adds a key-value pair, but only allocates a space and returns the pointers to the key and the value parts. More... | |
int | kmr_add_ntuple (KMR_KVS *kvo, void *k, int klen, struct kmr_ntuple *u) |
Adds an n-tuple U with a given key K and KLEN in a key-value stream KVO. More... | |
int | kmr_add_string (KMR_KVS *kvs, const char *k, const char *v) |
Adds a key-value pair of strings. More... | |
int | kmr_assign_file (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Assigns files to ranks based on data locality. More... | |
void | kmr_check_pushoff_fast_notice_ (KMR *mr) |
Check if fast-notice works. More... | |
int | kmr_choose_first_part (KMR_KVS *kvi, KMR_KVS *kvo, long n, struct kmr_option opt) |
Chooses the first N entries from a key-value stream KVI. More... | |
int | kmr_concatenate_kvs (KMR_KVS *kvs[], int nkvs, KMR_KVS *kvo, struct kmr_option opt) |
Concatenates a number of KVSes to one. More... | |
int | kmr_copy_info_to_kvs (MPI_Info src, KMR_KVS *kvo) |
Copies mpi-info entires into kvs. More... | |
int | kmr_copy_kvs_to_info (KMR_KVS *kvi, MPI_Info dst) |
Copies kvs entires into mpi-info. More... | |
int | kmr_copy_to_array_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long i) |
Copies the entry in the array. More... | |
KMR * | kmr_create_context (const MPI_Comm comm, const MPI_Info conf, const char *name) |
Makes a new KMR context (a context has type KMR). More... | |
KMR * | kmr_create_context_world (void) |
KMR * | kmr_create_dummy_context (void) |
KMR_KVS * | kmr_create_kvs7 (KMR *mr, enum kmr_kv_field k, enum kmr_kv_field v, struct kmr_option opt, const char *, const int, const char *) |
Makes a new key-value stream with the specified field data-types. More... | |
KMR_KVS * | kmr_create_pushoff_kvs (KMR *mr, enum kmr_kv_field kf, enum kmr_kv_field vf, struct kmr_option opt, const char *, const int, const char *) |
Makes a new key-value stream with the specified field data-types. More... | |
int | kmr_distribute (KMR_KVS *kvi, KMR_KVS *kvo, _Bool cyclic, struct kmr_option opt) |
Distributes key-values so that each rank has approximately the same number of pairs. More... | |
static void | kmr_dummy_dummy_dummy_ (void) |
int | kmr_dump_keyed_records (const struct kmr_keyed_record *ev, KMR_KVS *kvi) |
int | kmr_dump_kv (struct kmr_kv_box kv, const KMR_KVS *kvs, char *buf, int buflen) |
Dumps contents of a key-value. More... | |
int | kmr_dump_kvs (KMR_KVS *kvs, int flag) |
Dumps contents of a key-value stream to stdout. More... | |
int | kmr_dump_kvs_stats (KMR_KVS *, int level) |
Dumps contents of a key-value stream, with values are pairs. More... | |
void | kmr_dump_opaque (const char *p, int siz, char *buf, int buflen) |
Puts the string of the key or value field into a buffer BUF as printable string. More... | |
void | kmr_dump_slot (union kmr_unit_sized e, int len, enum kmr_kv_field data, char *buf, int buflen) |
int | kmr_file_enumerate (KMR *mr, char **names, int n, KMR_KVS *kvo, struct kmr_file_option fopt) |
int | kmr_fin (void) |
Clears the environment. More... | |
void | kmr_fin_pushoff_fast_notice_ (void) |
int | kmr_find_key (KMR_KVS *kvi, struct kmr_kv_box ki, struct kmr_kv_box *vo) |
Finds a key-value pair for a key. More... | |
int | kmr_find_string (KMR_KVS *kvi, const char *k, const char **vq) |
Finds the key K in the key-value stream KVS. More... | |
int | kmr_free_context (KMR *mr) |
Releases a context created with kmr_create_context(). More... | |
int | kmr_free_kvs (KMR_KVS *kvs) |
Releases a key-value stream (type KMR_KVS). More... | |
KMR * | kmr_get_context_of_kvs (KMR_KVS const *kvs) |
int | kmr_get_element_count (KMR_KVS *kvs, long *v) |
Gets the total number of key-value pairs. More... | |
MPI_Comm * | kmr_get_spawner_communicator (KMR *mr, long index) |
Obtains (a reference to) a parent inter-communicator of a spawned process. More... | |
int | kmr_histogram_count_by_ranks (KMR_KVS *kvs, long *frq, double *var, _Bool rankzeroonly) |
Fills an integer array FRQ[i] with the count of the elements of each rank. More... | |
int | kmr_init_2 (int ignore) |
void | kmr_init_pushoff_fast_notice_ (MPI_Comm, _Bool verbose) |
Initializes RDMA for fast-notice. More... | |
int | kmr_legal_minimum_field_size (KMR *mr, enum kmr_kv_field f) |
Returns a minimum byte size of the field: 8 for INTEGER and FLOAT8, 0 for others. More... | |
int | kmr_local_element_count (KMR_KVS *kvs, long *v) |
Gets the number of key-value pairs locally on each rank. More... | |
int | kmr_map9 (_Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m, const char *, const int, const char *) |
Maps simply. More... | |
int | kmr_map_file_names (KMR *mr, char **names, int n, struct kmr_file_option fopt, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m) |
Maps on file names. More... | |
int | kmr_map_for_some (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m) |
Maps until some key-value are added. More... | |
int | kmr_map_getline (KMR *mr, FILE *f, long limit, _Bool largebuffering, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m) |
Calls a map-function M for each line by getline() on an input F. More... | |
int | kmr_map_getline_in_memory_ (KMR *mr, void *b, size_t sz, long limit, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m) |
int | kmr_map_ms (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m) |
Maps in master-slave mode. More... | |
int | kmr_map_ms_commands (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, struct kmr_spawn_option sopt, kmr_mapfn_t m) |
Maps in master-slave mode, specialized to run serial commands. More... | |
int | kmr_map_on_rank_zero (KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m) |
Maps on rank0 only. More... | |
int | kmr_map_once (KMR_KVS *kvo, void *arg, struct kmr_option opt, _Bool rank_zero_only, kmr_mapfn_t m) |
Maps once. More... | |
int | kmr_map_parallel_processes (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn) |
Maps on processes started by MPI_Comm_spawn() to run independent MPI processes, which will not communicate to the parent. More... | |
int | kmr_map_processes (_Bool nonmpi, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn) |
Maps on processes started by MPI_Comm_spawn() to run independent processes. More... | |
int | kmr_map_rank_by_rank (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m) |
Maps sequentially with rank by rank for debugging. More... | |
int | kmr_map_serial_processes (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn) |
Maps on processes started by MPI_Comm_spawn() to run serial processes. More... | |
int | kmr_map_skipping (long from, long stride, long limit, _Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m) |
Maps by skipping the number of entries. More... | |
int | kmr_map_via_spawn (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn) |
Maps on processes started by MPI_Comm_spawn(). More... | |
int | kmr_match (KMR_KVS *kvi0, KMR_KVS *kvi1, KMR_KVS *kvo, struct kmr_option opt) |
Makes key-value pairs as products of the two values in two key-value stream. More... | |
int | kmr_move_kvs (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Moves the contents of the input KVI to the output KVO. More... | |
struct kmr_ntuple_entry | kmr_nth_ntuple (struct kmr_ntuple *u, int nth) |
Returns an NTH entry of an n-tuple. More... | |
int | kmr_pairing (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Replaces a value part with a key-value pairing. More... | |
void | kmr_print_statistics_on_pushoff (KMR *mr, char *titlestring) |
int | kmr_product_ntuples (KMR_KVS *kvo, struct kmr_ntuple **vv[2], long cnt[2], int newmarker, int slots[][2], int nslots, int keys[][2], int nkeys) |
Makes a direct product of the two sets of n-tuples VV[0] and VV[1] with their counts in CNT[0] and CNT[1]. More... | |
int | kmr_put_ntuple (KMR *mr, struct kmr_ntuple *u, const int sz, const void *v, const int vlen) |
Adds an entry V with LEN in an n-tuple U whose size is limited to SIZE. More... | |
int | kmr_put_ntuple_entry (KMR *mr, struct kmr_ntuple *u, const int sz, struct kmr_ntuple_entry e) |
Adds an n-tuple entry E in an n-tuple U whose size is limited to SIZE. More... | |
int | kmr_put_ntuple_long (KMR *mr, struct kmr_ntuple *u, const int sz, long v) |
Adds an integer value in an n-tuple U whose size is limited to SIZE. More... | |
int | kmr_ranking (KMR_KVS *kvi, KMR_KVS *kvo, long *count, struct kmr_option opt) |
Assigns a ranking to key-value pairs, and returns the number of the total elements in COUNT. More... | |
int | kmr_read_file_by_segments (KMR *mr, char *file, int color, void **buffer, off_t *readsize) |
Reads one file by segments and reassembles by all-gather. More... | |
int | kmr_read_files_reassemble (KMR *mr, char *file, int color, off_t offset, off_t bytes, void **buffer, off_t *readsize) |
Reassembles files reading by ranks. More... | |
int | kmr_receive_kvs_from_spawned_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long index) |
Collects key-value pairs generated by spawned processes. More... | |
int | kmr_reduce9 (_Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r, const char *, const int, const char *) |
Reduces key-value pairs. More... | |
int | kmr_reduce_as_one (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r) |
Calls a reduce-function once as if all key-value pairs had the same key. More... | |
int | kmr_reduce_for_some (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r) |
Reduces until some key-value are added. More... | |
int | kmr_replicate (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Replicates key-value pairs to be visible on all ranks, that is, it has the effect of bcast or all-gather. More... | |
int | kmr_reply_to_spawner (KMR *mr) |
Sends a reply message in the spawned process, which tells it is ready to finish and may have some data to send to the spawner in kmr_map_via_spawn(). More... | |
void | kmr_reset_ntuple (struct kmr_ntuple *u, int n, int marker) |
Resets an n-tuple U with N entries and a MARKER. More... | |
int | kmr_restore_kvs (KMR_KVS *kvo, void *data, size_t sz, struct kmr_option opt) |
Unpacks locally the contents of a key-value stream from a byte array. More... | |
int | kmr_retrieve_keyed_records (KMR_KVS *kvs, struct kmr_keyed_record *ev, long n, _Bool shuffling, _Bool ranking) |
Fills keyed records in an array for sorting. More... | |
int | kmr_retrieve_kvs_entries (KMR_KVS *kvs, struct kmr_kvs_entry **ev, long n) |
Fills local key-value entries in an array for inspection. More... | |
int | kmr_reverse (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Makes a new pair by swapping the key and the value in each pair. More... | |
int | kmr_save_kvs (KMR_KVS *kvi, void **dataq, size_t *szq, struct kmr_option opt) |
Packs locally the contents of a key-value stream to a byte array. More... | |
int | kmr_scan_locally (KMR_KVS *kvi, KMR_KVS *carryin, KMR_KVS *kvo, KMR_KVS *carryout, kmr_redfn_t r) |
Scans every key-value with a reduce-function locally (independently on each rank). More... | |
int | kmr_scan_on_values (KMR_KVS *kvi, KMR_KVS *kvo, KMR_KVS *total, kmr_redfn_t r) |
Prefix-scans every key-value with a reduce-function (non-self-inclusively) and generates the final value in TOTAL (it generates the same value on all the ranks in the TOTAL). More... | |
int | kmr_send_kvs_to_spawner (KMR *mr, KMR_KVS *kvs) |
Sends the KVS from a spawned process to the map-function of the spawner. More... | |
int | kmr_separate_ntuples (KMR *mr, const struct kmr_kv_box kv[], const long n, struct kmr_ntuple **vv[2], long cnt[2], int markers[2], _Bool disallow_other_entries) |
Separates the n-tuples stored in the value part of KV into the two sets by their marker values. More... | |
int | kmr_shuffle (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Shuffles key-value pairs to the appropriate destination ranks. More... | |
int | kmr_shuffle_leveling_pair_count (KMR_KVS *kvi, KMR_KVS *kvo) |
Shuffles key-values so that each rank has approximately the same number of pairs. More... | |
int | kmr_size_ntuple (struct kmr_ntuple *u) |
Returns the storage size of an n-tuple. More... | |
int | kmr_size_ntuple_by_lengths (int n, int len[]) |
Returns the storage size of an n-tuple for N entries with LEN[i] size each. More... | |
int | kmr_sort (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Sorts a key-value stream globally. More... | |
int | kmr_sort_by_one (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Sort by rank0, a degenerated case for small number of keys. More... | |
int | kmr_sort_large (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Sorts a key-value stream by the regular or the random sampling-sort. More... | |
int | kmr_sort_locally (KMR_KVS *kvi, KMR_KVS *kvo, _Bool shuffling, struct kmr_option opt) |
Reorders key-value pairs in a single rank. More... | |
int | kmr_sort_small (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Sorts a key-value stream, by partitioning to equal ranges. More... | |
int | kmr_take_one (KMR_KVS *kvi, struct kmr_kv_box *kv) |
Extracts a single key-value pair locally in the key-value stream KVI. More... | |
int | kmr_unpairing (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Extracts a key-value pair from a pairing in the value part, discarding the original key. More... | |
Variables | |
int | KMR_API_ID |
static const struct kmr_file_option | kmr_fnoopt = {0, 0, 0, 0} |
union { | |
unsigned long bits | |
struct kmr_file_option o | |
} | kmr_foptmask = {{1, 1, 1, 1}} |
static const size_t | kmr_kvs_block_header = offsetof(struct kmr_kvs_block, data) |
static const size_t | kmr_kvs_entry_header = offsetof(struct kmr_kvs_entry, c) |
Size of an Entry Header. More... | |
static const struct kmr_option | kmr_noopt = {0, 0, 0, 0, 0, 0, 0} |
union { | |
unsigned long bits | |
struct kmr_option o | |
} | kmr_optmask = {{1, 1, 1, 1, 1, 1, 1}} |
static const struct kmr_spawn_option | kmr_snoopt = {0, 0, 0, 0, 0} |
union { | |
unsigned long bits | |
struct kmr_spawn_option o | |
} | kmr_soptmask = {{1, 1, 1, 1, 1}} |
const int | kmr_version |
KMR Interface.
GENERAL NOTES. (1) The sizes of key-value fields are rounded up to 8-byte boundary.
Definition in file kmr.h.
#define kmr_create_kvsx | ( | MR, | |
KF, | |||
VF, | |||
OPT | |||
) | kmr_create_kvs7((MR), (KF), (VF), (OPT), __FILE__, __LINE__, __func__) |
#define kmr_create_kvs | ( | MR, | |
KF, | |||
VF | |||
) | kmr_create_kvs7((MR), (KF), (VF), kmr_noopt, __FILE__, __LINE__, __func__) |
#define kmr_create_kvs_ | ( | MR, | |
IGNORE | |||
) |
Makes a new key-value stream (of type KMR_KVS).
#define kmr_map | ( | KVI, | |
KVO, | |||
ARG, | |||
OPT, | |||
M | |||
) |
Maps simply.
See kmr_map9().
#define kmr_reduce | ( | KVI, | |
KVO, | |||
ARG, | |||
OPT, | |||
R | |||
) |
Reduces key-value pairs.
See kmr_reduce9().
#define KMR_KVS_MAGIC_OK | ( | X | ) |
#define kmr_init | ( | ) | kmr_init_2(KMR_API_ID) |
typedef int(* kmr_mapfn_t) (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long index) |
Map-function Type.
A map-function gets a key-value pair as struct kmr_kv_box KV. KVI is the input key-value stream, but it can be usually ignored (its potential usage is to check the content type of the key and value fields). KVO is the output key-value stream. The pointer ARG is one just passed to kmr_map(), which has no specific purpose and is used to pass any argument to a map-function. INDEX is the count of map-function calls, and it usually equals to the index of a key-value pair in the input. It is assured distinct, and can be used for race-free accesses to the pointer ARG.
typedef int(* kmr_redfn_t) (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg) |
Reduce-function Type.
A reduce-function gets key-value pairs as an array KV of struct kmr_kv_box. N is the number of key-value pairs. KVI is the the input key-value stream, but it can be usually ignored. KVO is the output key-value stream. The pointer ARG is one just passed to kmr_reduce(), which has no specific purpose and is used to pass any argument to a reduce-function.
enum kmr_kv_field |
Datatypes of Keys or Values.
It indicates the field data of keys or values. KMR_KV_OPAQUE is a variable-sized byte vector, and KMR_KV_CSTRING is a non-wide C string, and they are dealt with in exactly the same way. KMR_KV_INTEGER is a long integer, and KMR_KV_FLOAT8 is a double. The datatypes are mostly uninterpreted in mapping/reducing, except for in sorting. There are two other types for pointers. Pointers can be stored as they are (unlike opaque data, which are embedded in the field), but converted to opaque ones before communication. KMR_KV_POINTER_OWNED is an allocated pointer, and the data will be freed on consuming a key-value stream. KMR_KV_POINTER_UNMANAGED is a pointer to a possibly shared data.
KMR* kmr_create_context | ( | const MPI_Comm | comm, |
const MPI_Info | conf, | ||
const char * | identifying_name | ||
) |
Makes a new KMR context (a context has type KMR).
A KMR context is a record of common information to all key-value streams. COMM is a communicator for use inside. It dups the given communicator inside, to avoid conflicts with other calls to MPI functions. MPI should be initialized with a thread support level of either MPI_THREAD_SERIALIZED or MPI_THREAD_MULTIPLE. CONF specifies configuration options. It should be freed after a call. The options can differ on each rank, (in this version). The configuration options are first taken from a file with a name specified by the environment variable "KMROPTION" on rank0, and they are merged with the explicitly given ones. The KMROPTION file has the file format of Java properties (but only in Latin characters). Refer to JDK documents on "java.util.Properties" (on "load" method) for the file format. The explicitly given ones have precedence. IDENTIFYING_NAME is just recorded in the context, and has no specific use. It may be null.
int kmr_free_context | ( | KMR * | mr | ) |
Releases a context created with kmr_create_context().
KMR_KVS* kmr_create_kvs7 | ( | KMR * | mr, |
enum kmr_kv_field | kf, | ||
enum kmr_kv_field | vf, | ||
struct kmr_option | opt, | ||
const char * | file, | ||
const int | line, | ||
const char * | func | ||
) |
int kmr_free_kvs | ( | KMR_KVS * | kvs | ) |
Releases a key-value stream (type KMR_KVS).
Normally, mapper/shuffler/reducer consumes and frees the input key-value stream, and explicit calls are unnecessary. Here, mapper/shuffler/reducer includes kmr_map(), kmr_map_on_rank_zero(), kmr_map_ms(), kmr_shuffle(), kmr_replicate(), kmr_reduce(), and kmr_reduce_as_one().
int kmr_move_kvs | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Moves the contents of the input KVI to the output KVO.
It consumes the input KVI. Calling kmr_map() with a null map-function has the same effect. Effective-options: TAKE_CKPT. See struct kmr_option.
int kmr_concatenate_kvs | ( | KMR_KVS * | kvs[], |
int | nkvs, | ||
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Concatenates a number of KVSes to one.
Inputs are consumed. (It is fast because the key-value data is stored internally as a list of data blocks, and this routine just links them). Note that concatenating KVS can in effect be performed by consecutive calls to kmr_map() with the KEEP_OPEN option using the same output KVS. Effective-options: none.
int kmr_add_kv | ( | KMR_KVS * | kvs, |
const struct kmr_kv_box | kv | ||
) |
int kmr_add_kv1 | ( | KMR_KVS * | kvs, |
void * | k, | ||
int | klen, | ||
void * | v, | ||
int | vlen | ||
) |
int kmr_add_kv_space | ( | KMR_KVS * | kvs, |
const struct kmr_kv_box | kv, | ||
void ** | keyp, | ||
void ** | valuep | ||
) |
Adds a key-value pair, but only allocates a space and returns the pointers to the key and the value parts.
It may enable to create a large key/value data directly in the space. It does not return a proper value if a key/value field is not a pointer. (It cannot be used with a "push-off" key-value stream, because its buffer will be sent out and late fill-in the buffer causes a race).
int kmr_add_kv_done | ( | KMR_KVS * | kvs | ) |
Marks finished adding key-value pairs.
Further addition will be prohibited. Normally, mapper/shuffler/reducer finishes the output key-value stream by itself, and explicit calls are unnecessary. Here, mapper/shuffler/reducer includes kmr_map(), kmr_map_on_rank_zero(), kmr_map_ms(), kmr_shuffle(), kmr_replicate(), and kmr_reduce().
int kmr_add_string | ( | KMR_KVS * | kvs, |
const char * | k, | ||
const char * | v | ||
) |
int kmr_map9 | ( | _Bool | stop_when_some_added, |
KMR_KVS * | kvi, | ||
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_mapfn_t | m, | ||
const char * | file, | ||
const int | line, | ||
const char * | func | ||
) |
Maps simply.
It consumes the input key-value stream KVI unless INSPECT option is marked. The output key-value stream KVO can be null, but in that case, a map-function cannot add key-value pairs. The pointer ARG is just passed to a map-function as a general argument, where accesses to it should be race-free, since a map-function is called by threads by default. M is the map-function. See the description on the type kmr_mapfn_t. It copeis the contents of the input KVI to the output KVO, when a map-function is null. During processing, it first makes an array pointing to the key-value entries in each data block, and works on it for ease threading/parallelization. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, COLLAPSE, TAKE_CKPT. See struct kmr_option.
int kmr_map_skipping | ( | long | from, |
long | stride, | ||
long | limit, | ||
_Bool | stop_when_some_added, | ||
KMR_KVS * | kvi, | ||
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_mapfn_t | m | ||
) |
int kmr_map_once | ( | KMR_KVS * | kvo, |
void * | arg, | ||
struct kmr_option | opt, | ||
_Bool | rank_zero_only, | ||
kmr_mapfn_t | m | ||
) |
Maps once.
It calls a map-function once with a dummy key-value stream and a dummy key-value pair. See kmr_map(). Effective-options: KEEP_OPEN, TAKE_CKPT. See struct kmr_option.
int kmr_map_on_rank_zero | ( | KMR_KVS * | kvo, |
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_mapfn_t | m | ||
) |
Maps on rank0 only.
It calls a map-function once with a dummy key-value stream and a dummy key-value pair. It is used to avoid low-level conditionals like (myrank==0). See kmr_map(). Effective-options: KEEP_OPEN, TAKE_CKPT. See struct kmr_option.
int kmr_map_rank_by_rank | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_mapfn_t | m | ||
) |
int kmr_map_ms | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_mapfn_t | m | ||
) |
Maps in master-slave mode.
The input key-value stream should be empty except on rank0 where the master is running (the contents on the slave ranks are ignored). It consumes the input key-value stream. The master does delivery only. The master returns frequently to give a chance to check-pointing, etc. The master returns immaturely each time one pair is delivered, and those returns are marked by MPI_ERR_ROOT indicating more tasks remain. In contrast, slaves return only after all tasks done. The enough state to have to keep during kmr_map_ms() for check-pointing is in the key-value streams KVI and KVO on the master. Note that this totally diverges from bulk-synchronous execution. It does not accept key-value field types KMR_KV_POINTER_OWNED or KMR_KV_POINTER_UNMANAGED. Effective-options: NOTHREADING, KEEP_OPEN. See struct kmr_option.
Definition at line 310 of file kmrmapms.c.
int kmr_map_ms_commands | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
struct kmr_spawn_option | sopt, | ||
kmr_mapfn_t | m | ||
) |
Maps in master-slave mode, specialized to run serial commands.
It fork-execs commands specified by key-values, then calls a map-function at finishes of the commands. It takes the commands in the same way as kmr_map_via_spawn(). The commands never be MPI programs. It is implemented with kmr_map_ms(); see the comments on kmr_map_ms().
Definition at line 2198 of file kmrmapms.c.
int kmr_map_for_some | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_mapfn_t | m | ||
) |
Maps until some key-value are added.
It stops processing, when the output is non-empty. It does not guarantee singleness. Existence/emptiness be checked by kmr_get_element_count().
Definition at line 1170 of file kmrmoreops.c.
int kmr_map_via_spawn | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
MPI_Info | info, | ||
struct kmr_spawn_option | opt, | ||
kmr_mapfn_t | mapfn | ||
) |
Maps on processes started by MPI_Comm_spawn().
It is intended to run custom MPI programs which will return a reply as MPI messages. Consider other variations to run independent processes, when the spawned processes will not interact with the parent: kmr_map_processes() or kmr_map_ms_commands().
The spawner (parent) spawns processes specified by key-value pairs. The key part is ignored, and the value part is a list of null-separated strings which constitutes a command and arguments. The option SEPARATOR_SPACE changes the separator character to whitespaces. If the first string is "maxprocs=n", then the number of processes is taken from this string. Or, an MPI_Info entry "maxprocs" in INFO is used, and "maxprocs" is common to all spawns. It is an error if neither is specified. The multile spawners (more than one ranks can have entries to spawn) divide the universe of processes evenly among them, and tries to control the number of the simultaneously running processes in the range.
The option REPLY_EACH or REPLY_ROOT lets the spawner wait for the reply messages from the spawned processes, and then the spawner calls the map-function. A reply message is of the tag KMR_TAG_SPAWN_REPLY=500 and length zero, and kmr_reply_to_spawner() can be used to send this reply. When none of REPLY_EACH or REPLY_ROOT are specified, the spawner immediately calls the map-function one-by-one in the FIFO order (before the spawned processes finish). In that case, no load-balance is taken. The map-function should wait for the spawned processes to finish, otherwise, the spawner starts next spawns continuously and runs out the processes, which causes the MPI runtime to signal an error.
Communication between the spawned processes and the map-function of the spawner is through the inter-communicator. The parent inter-communicator of the spawned processes can be taken by MPI_Comm_get_parent() as usual. The inter-communicator at the spawner side can be obtained by calling kmr_get_spawner_communicator() inside a map-function.
The INFO argument is passed to MPI_Comm_spawn() unchanged.
NOTE: There is no way to check the availability of processes for spawning in the MPI specification and MPI implementations. And, the MPI runtime signals errors when it runs out the processes. Thus, it puts a sleep (1 sec) in between MPI_Comm_spawn() calls to allow clean-ups in the MPI runtime and to avoid timing issues.
INTERFACE CHANGE: Set mr->spawn_pass_intercomm_in_argument=1 to enables the old interface, where the map-function MAPFN is called with the kmr_spawn_state structure as the general argument. The argument ARG passed to the mapper is stored in the MAPARG slot in the kmr_spawn_state structure. When TAKE_CKPT option is specified, a checkpoint data file of the output key-value stream is saved if both CKPT_ENABLE and CKPT_SELECTIVE global options are set.
Definition at line 1870 of file kmrmapms.c.
int kmr_reply_to_spawner | ( | KMR * | mr | ) |
Sends a reply message in the spawned process, which tells it is ready to finish and may have some data to send to the spawner in kmr_map_via_spawn().
Definition at line 1776 of file kmrmapms.c.
MPI_Comm* kmr_get_spawner_communicator | ( | KMR * | mr, |
long | index | ||
) |
Obtains (a reference to) a parent inter-communicator of a spawned process.
It is used inside a map-function of kmr_map_via_spawn(); Pass INDEX the same argument to a map-function. It returns a reference for the side-effect of freeing a communicator in a map-function.
Definition at line 1799 of file kmrmapms.c.
int kmr_map_processes | ( | _Bool | nonmpi, |
KMR_KVS * | kvi, | ||
KMR_KVS * | kvo, | ||
void * | arg, | ||
MPI_Info | info, | ||
struct kmr_spawn_option | opt, | ||
kmr_mapfn_t | mapfn | ||
) |
Maps on processes started by MPI_Comm_spawn() to run independent processes.
It either calls kmr_map_parallel_processes() or kmr_map_serial_processes() with regard to the NONMPI argument. See the comments of kmr_map_parallel_processes() and kmr_map_serial_processes().
Definition at line 1965 of file kmrmapms.c.
int kmr_map_parallel_processes | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
MPI_Info | info, | ||
struct kmr_spawn_option | opt, | ||
kmr_mapfn_t | mapfn | ||
) |
Maps on processes started by MPI_Comm_spawn() to run independent MPI processes, which will not communicate to the parent.
The programs need to be MPI. It is a variation of kmr_map_via_spawn(), and refer to the comment on it for the basic usage. Since the spawned program does not know the parent, there is no way to communicate from the spawner. The map-function is called after the processes have exited, so that the map-function can check the result files created by the spawned processes.
This function detects the end of spawned processes using a watch-program "kmrwatch0", by checking a closure of a socket to which "kmrwatch0" connected.
NOTE THAT THIS OPERATION WILL BLOCK INDEFINITELY AND FAIL, DEPENDING ON THE BEHAVIOR OF AN MPI IMPLEMENTATION. It is checked to work with Open MPI (1.6) and MPICH2 (1.5), but not with Intel MPI (4.1) and YAMPI2 (GridMPI 2.1). It depends on the behavior that MPI_Comm_free() on the parent and MPI_Finalize() on the child do not synchronize. The quote of the standard (MPI 2.x) says: "Though collective, MPI_Comm_free is anticipated that this operation will normally be implemented to be local, ..." The blocking situation can be checked by enabling tracing around calls to MPI_Comm_free() by (mr->trace_map_spawn=1).
NOTE (on MPI spawn implementations): Open MPI (1.6) allows to spawn non-MPI processes by passing an special MPI_Info. MPICH2 (1.5) does not allow to spawn non-MPI processes, because MPI_Comm_spawn() of the parent and MPI_Init() of the child synchronize. In Intel MPI (4.1) and YAMPI2 (GridMPI), the calls of MPI_Comm_free() on the parent and MPI_Finalize() or MPI_Comm_free() on the child synchronize, and thus, they require to call MPI_Comm_free() at an appropriate time on the parent.
Options REPLY_ROOT and REPLY_EACH have no effect. When TAKE_CKPT option is specified, a checkpoint data file of the output key-value stream is saved if both CKPT_ENABLE and CKPT_SELECTIVE global options are set.
Definition at line 1915 of file kmrmapms.c.
int kmr_map_serial_processes | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
MPI_Info | info, | ||
struct kmr_spawn_option | opt, | ||
kmr_mapfn_t | mapfn | ||
) |
Maps on processes started by MPI_Comm_spawn() to run serial processes.
This should NOT be used; Use kmr_map_ms_commands(), instead. Fork-execing in kmr_map_ms_commands() is simpler than spawning. See also the comment on kmr_map_via_spawn() and kmr_map_parallel_processes(). The map-function is called after the processes have exited, thus, there is no way to communicate from the map-function. Instead, the map-function can check the result files created by the spawned processes.
This function detects the end of spawned processes using a watch-program "kmrwatch0" which sends a reply to the parent in place of the serial program. Options REPLY_ROOT and REPLY_EACH have no effect. When TAKE_CKPT option is specified, a checkpoint data file of the output key-value stream is saved if both CKPT_ENABLE and CKPT_SELECTIVE global options are set.
Definition at line 1945 of file kmrmapms.c.
Sends the KVS from a spawned process to the map-function of the spawner.
It is paired with kmr_receive_kvs_from_spawned_fn().
Definition at line 2005 of file kmrmapms.c.
int kmr_receive_kvs_from_spawned_fn | ( | const struct kmr_kv_box | kv, |
const KMR_KVS * | kvi, | ||
KMR_KVS * | kvo, | ||
void * | arg, | ||
const long | index | ||
) |
Collects key-value pairs generated by spawned processes.
It is a map-function to be used with kmr_map_via_spawn() with the REPLY_EACH option. The spawned processes call kmr_send_kvs_to_spawner() to send generated key-value pairs, and this function receives and puts them into KVO. PROTOCOL: The reply consists of one or two messages with the tag KMR_TAG_SPAWN_REPLY1=501. One is the data size, which is followed by a marshaled key-value stream when the data size is non-zero.
Definition at line 2039 of file kmrmapms.c.
int kmr_sort_locally | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
_Bool | shuffling, | ||
struct kmr_option | opt | ||
) |
Reorders key-value pairs in a single rank.
It sorts pairs when SHUFFLING is false, or gathers pairs with the same hashed keys adjacent when SHUFFLING is true. It only respects for not ordering but just equality when shuffling. The sort-keys for shuffling are destination ranks for shuffling (taking a modulo of the hashed key with nprocs). As a sorting, it is NOT-STABLE due to quick-sort used inside. It converts pointer keys and values to opaque ones for sending.
Sorting on a key-value stream is by memcmp(), unless the keys are integer or floating-point numbers (ordering on integers and memcmp() are different). Sorting on non-numbers is performed in two steps: the first step sorts by the integer rankings, and the second by the specified comparator. And thus, the comparator is required to have a corresponding generator of integer rankings. It consumes the input key-value stream. Effective-options: NOTHREADING, INSPECT, KEY_AS_RANK.
int kmr_shuffle | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Shuffles key-value pairs to the appropriate destination ranks.
It first sorts pairs by the destination ranks of the keys, and then exchanges pairs with all-to-all communication. It converts pointer keys and values to opaque ones for sending during the sorting stage. Note that the key-value pairs are sorted by the hash-values prior to exchange. Effective-options: INSPECT, KEY_AS_RANK, TAKE_CKPT. See struct kmr_option.
int kmr_replicate | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Replicates key-value pairs to be visible on all ranks, that is, it has the effect of bcast or all-gather.
It gathers pairs on rank0 only by the option RANK_ZERO. It moves stably, keeping the ordering of ranks and the ordering of local key-value pairs. Effective-options: INSPECT, RANK_ZERO, TAKE_CKPT. See struct kmr_option.
int kmr_reduce9 | ( | _Bool | stop_when_some_added, |
KMR_KVS * | kvi, | ||
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_redfn_t | r, | ||
const char * | file, | ||
const int | line, | ||
const char * | func | ||
) |
Reduces key-value pairs.
It does not include shuffling, and thus, it requires being preceded by shuffling. Or, it works on local data (as a local combiner), if it is not preceded by shuffling. It always consumes the input key-value stream KVI. An output key-value stream KVO can be null. It passes an array of key-value pairs to a reduce-function whose keys are all equal (equality is by bits). The pointer ARG is just passed to a reduce-function as a general argument, where accesses to it should be race-free, since a reduce-function is called by threads by default. R is a reduce-function. See the description on the type kmr_redfn_t. A reduce-function may see a different input key-value stream (internally created one) instead of the one given. During reduction, it first scans adjacent equal keys, then calls a given reduce-function. Effective-options: NOTHREADING, INSPECT, TAKE_CKPT. See struct kmr_option.
int kmr_reduce_as_one | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_redfn_t | r | ||
) |
Calls a reduce-function once as if all key-value pairs had the same key.
See kmr_reduce(). Effective-options: INSPECT, TAKE_CKPT. See struct kmr_option.
int kmr_reduce_for_some | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_redfn_t | r | ||
) |
Reduces until some key-value are added.
It stops processing, when the output is non-empty. It does not guarantee singleness. Existence/emptiness be checked by kmr_get_element_count().
Definition at line 1183 of file kmrmoreops.c.
int kmr_map_file_names | ( | KMR * | mr, |
char ** | names, | ||
int | n, | ||
struct kmr_file_option | fopt, | ||
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_mapfn_t | m | ||
) |
Maps on file names.
NAMES specifies N file names. The map-function gets a file name in the key field (the value field is integer zero). File-option EACH_RANK specifies each rank independently to enumerate file names, otherwise to work on rank0 only. File-option SUBDIRECTORIES specifies to descend to subdirectories. It ignores files/directories whose name starting with dots. File-option LIST_FILE specifies to read contents of each file for file names. File consists of one file name per line, and ignores a line beginning with a "#". Whitespaces are trimed at the beginning and the end. LIST_FILE implies SUBDIRECTORIES. It enumerates names of regular files only. File-option SHUFFLE_FILES runs shuffling file names among ranks.
Definition at line 1372 of file kmrfiles.c.
int kmr_map_getline | ( | KMR * | mr, |
FILE * | f, | ||
long | limit, | ||
_Bool | largebuffering, | ||
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_mapfn_t | m | ||
) |
Calls a map-function M for each line by getline() on an input F.
A map-function gets a line number in key and a string in value (the index argument is the same as the key). Calls to getline() is limited to LIMIT lines (0 for unlimited). It is multi-threaded and the call order is arbitrary. ARG and OPT are passed verbatim to a map-function. Effective-options: NOTHREADING, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.
Definition at line 1561 of file kmrfiles.c.
int kmr_take_one | ( | KMR_KVS * | kvi, |
struct kmr_kv_box * | kv | ||
) |
Extracts a single key-value pair locally in the key-value stream KVI.
It is an error when zero or more than one entries are in the KVI. It does not consume the input KVS (INSPECT IMPLIED). The returned key-value entry must be used before freeing the input KVS, when it points to an opaque data.
int kmr_find_key | ( | KMR_KVS * | kvi, |
struct kmr_kv_box | ki, | ||
struct kmr_kv_box * | ko | ||
) |
Finds a key-value pair for a key.
It is an error when not exactly one entry is found. It does not consume the input KVS KVI. The returned key-value entry must be used before freeing the input KVS, when it points to an opaque data. It maps internally, so it is slow. It is tricky that the internally created KVS KVS0 points to the key-value area in the input KVS KVI.
Definition at line 43 of file kmrmoreops.c.
int kmr_find_string | ( | KMR_KVS * | kvi, |
const char * | k, | ||
const char ** | vq | ||
) |
Finds the key K in the key-value stream KVS.
It returns a pointer pointing inside the key-value stream. It is an error when not exactly one entry is found. It does not consume the input KVS. It maps internally, so slow.
Definition at line 73 of file kmrmoreops.c.
int kmr_copy_info_to_kvs | ( | MPI_Info | src, |
KMR_KVS * | kvo | ||
) |
int kmr_copy_kvs_to_info | ( | KMR_KVS * | kvi, |
MPI_Info | dst | ||
) |
int kmr_get_element_count | ( | KMR_KVS * | kvs, |
long * | v | ||
) |
Gets the total number of key-value pairs.
It uses replication and reduction.
Definition at line 114 of file kmrmoreops.c.
int kmr_local_element_count | ( | KMR_KVS * | kvs, |
long * | v | ||
) |
int kmr_add_identity_fn | ( | const struct kmr_kv_box | kv, |
const KMR_KVS * | kvi, | ||
KMR_KVS * | kvo, | ||
void * | arg, | ||
const long | i | ||
) |
int kmr_copy_to_array_fn | ( | const struct kmr_kv_box | kv, |
const KMR_KVS * | kvi, | ||
KMR_KVS * | kvo, | ||
void * | arg, | ||
const long | i | ||
) |
int kmr_save_kvs | ( | KMR_KVS * | kvs, |
void ** | dataq, | ||
size_t * | szq, | ||
struct kmr_option | opt | ||
) |
Packs locally the contents of a key-value stream to a byte array.
It is used to save or to send a key-value stream. It returns the allocated memory with its size, and it should be freed by the user. It may fail on allocating a buffer, and then it returns MPI_ERR_BUFFER. Its reverse is performed by kmr_restore_kvs().
int kmr_restore_kvs | ( | KMR_KVS * | kvo, |
void * | data, | ||
size_t | sz_, | ||
struct kmr_option | opt | ||
) |
Unpacks locally the contents of a key-value stream from a byte array.
It is a reverse of kmr_save_kvs().
int kmr_reverse | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Makes a new pair by swapping the key and the value in each pair.
That is, it makes new pairs (v0,k0) from (k0,v0). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.
Definition at line 159 of file kmrmoreops.c.
int kmr_pairing | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Replaces a value part with a key-value pairing.
That is, it makes new pairs (k0,(k0,v0)) from (k0,v0). See kmr_unpairing(). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.
Definition at line 212 of file kmrmoreops.c.
int kmr_unpairing | ( | KMR_KVS * | kvs, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Extracts a key-value pair from a pairing in the value part, discarding the original key.
It is the inverse of kmr_pairing. That is, it makes new pairs (k1,v1) from (k0,(k1,v1)). See kmr_pairing(). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.
Definition at line 234 of file kmrmoreops.c.
int kmr_sort | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Sorts a key-value stream globally.
It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It selects a sorting routine on the total number of keys. See kmr_sort_large(), kmr_sort_small(), or kmr_sort_by_one(). The results are stored as ascending ranks, thus the rank0 holds the minimum. Effective-options: INSPECT. See struct kmr_option.
Definition at line 575 of file kmrmoreops.c.
int kmr_sort_small | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Sorts a key-value stream, by partitioning to equal ranges.
It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It assumes uniform distribution, and partioning is simply determined by the range of keys (MIN-MAX range is divided by nprocs). Effective-options: NOTHREADING, INSPECT. See struct kmr_option.
Definition at line 388 of file kmrmoreops.c.
int kmr_sort_large | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Sorts a key-value stream by the regular or the random sampling-sort.
It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It can be used for "GraySort". Effective-options: NOTHREADING, INSPECT. See struct kmr_option.
Definition at line 469 of file kmrmoreops.c.
int kmr_sort_by_one | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Sort by rank0, a degenerated case for small number of keys.
It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. Effective-options: INSPECT. See struct kmr_option.
Definition at line 544 of file kmrmoreops.c.
int kmr_match | ( | KMR_KVS * | kvi0, |
KMR_KVS * | kvi1, | ||
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Makes key-value pairs as products of the two values in two key-value stream.
It creates a set of key-value pairs (ai,bj) of the pairs (key,ai) from KVS0 and (key,bj) from KVS1 for the matching key. It makes a direct-product of the values when multiple values exist for a matching key. That is, for example, given a set {(k,a0), (k,a1), (k,a2)} in KVS0 and {(k,b3), (k,b4)} in KVS1 for some distinct key, it creates {(a0,b3), (a0,b4), (a1,b3), (a1,b4), (a2,b3), (a2,b4)}. Effective-options: NOTHREADNG. See struct kmr_option.
Definition at line 696 of file kmrmoreops.c.
int kmr_ranking | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
long * | count, | ||
struct kmr_option | opt | ||
) |
Assigns a ranking to key-value pairs, and returns the number of the total elements in COUNT.
Ranking is a position in the key-value stream. That is, for example, given a sequence {(k0,v0), (k1,v1), (k2,v2)}, it creates {(0,(k0,v0)), (1,(k1,v1)), (2,(k2,v2))}. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN. See struct kmr_option.
Definition at line 764 of file kmrmoreops.c.
int kmr_distribute | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
_Bool | cyclic, | ||
struct kmr_option | opt | ||
) |
Distributes key-values so that each rank has approximately the same number of pairs.
It is used to level the load of mapping among ranks by calling it before mapping. kmr_shuffle() can be sufficient to distribute pairs in most cases, but sometimes it results in uneven distribution because shuffling is based on hashing on the keys. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN. See struct kmr_option.
Definition at line 835 of file kmrmoreops.c.
Shuffles key-values so that each rank has approximately the same number of pairs.
It collects the same keys on a rank (cf. kmr_distribute()).
Definition at line 1074 of file kmrmoreops.c.
int kmr_scan_locally | ( | KMR_KVS * | kvi, |
KMR_KVS * | carryin, | ||
KMR_KVS * | kvo, | ||
KMR_KVS * | carryout, | ||
kmr_redfn_t | r | ||
) |
Scans every key-value with a reduce-function locally (independently on each rank).
It works in the order in the KVS. It ignores differences of the keys. It gets the start value from CARRYIN and puts the final value to CARRYOUT. The output has the same number of entries as the input. The carry-in and carry-out have one entry. The carry-out can be null. The reduce-function is called on each key-value pair as the right operand with the previous value as the left operand, and it should output a single value. The key part of the output is ignored and a pair is stored under the original key.
int kmr_scan_on_values | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
KMR_KVS * | total, | ||
kmr_redfn_t | r | ||
) |
Prefix-scans every key-value with a reduce-function (non-self-inclusively) and generates the final value in TOTAL (it generates the same value on all the ranks in the TOTAL).
The key-values are scanned in the order in the KVS as they are concatenated in the rank-order. The reduce-function should be associative and free of side-effects (because it is called multiple times on the same data). The reduce-function should output a single key-value when given any number of key-value pairs. Furthermore, it should output an identity element when it is given zero key-value pairs.
Definition at line 943 of file kmrmoreops.c.
int kmr_choose_first_part | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
long | n, | ||
struct kmr_option | opt | ||
) |
Chooses the first N entries from a key-value stream KVI.
The option nothreading is implied to keep the ordering. Effective-options: INSPECT, KEEP_OPEN. See struct kmr_option.
Definition at line 1145 of file kmrmoreops.c.
int kmr_legal_minimum_field_size | ( | KMR * | mr, |
enum kmr_kv_field | f | ||
) |
int kmr_histogram_count_by_ranks | ( | KMR_KVS * | kvs, |
long * | frq, | ||
double * | var, | ||
_Bool | rankzeroonly | ||
) |
Fills an integer array FRQ[i] with the count of the elements of each rank.
The array FRQ be as large as nprocs. It also fills VAR[0]=average, VAR[1]=variance, VAR[2]=min, and VAR[3]=max. FRQ or VAR can be null.
Definition at line 1569 of file kmrmoreops.c.
int kmr_read_files_reassemble | ( | KMR * | mr, |
char * | file, | ||
int | color, | ||
off_t | offset, | ||
off_t | bytes, | ||
void ** | buffer, | ||
off_t * | size | ||
) |
Reassembles files reading by ranks.
It is intended to reassembles a file from files split into segments. FILE is a file name. A file name can be null, when the rank does not participate reading (COLOR=-1). COLOR groups ranks (be COLOR>=-1). The files on the ranks with the same COLOR are concatenated, where concatenation is ordered by the rank-order. Read is performed for OFFSET and BYTES on each file. BYTES can be -1 to read an entire file. BUFFER and SIZE are set to the malloced buffer and the size on return. Ranks with non-null FILE retrieve a file (ingest), while ranks with non-zero BUFFER receive contents (digest). Ranks with COLOR=-1 do not participate in file reading. REMARK ON K: It reads a specified file by each rank, assuming the files reside in specific I/O-groups to the ranks.
Definition at line 653 of file kmrfiles.c.
int kmr_read_file_by_segments | ( | KMR * | mr, |
char * | file, | ||
int | color, | ||
void ** | buffer, | ||
off_t * | size | ||
) |
Reads one file by segments and reassembles by all-gather.
FILE is a file name. COLOR groups ranks (be COLOR>=-1). The ranks with the same COLOR collaborate to read a file, and thus, they must specify the same file (with an identical inode number). BUFFER and SIZE are set to the malloced buffer and the size on return. Ranks with non-zero FILE retrieve a file (ingest). Ranks with non-zero BUFFER receive contents (digest). Ranks with COLOR=-1 do not participate in file reading, and then arguments should be FILE=0 and BUFFER=0.
Definition at line 1021 of file kmrfiles.c.
int kmr_retrieve_kvs_entries | ( | KMR_KVS * | kvs, |
struct kmr_kvs_entry ** | ev, | ||
long | n | ||
) |
int kmr_retrieve_keyed_records | ( | KMR_KVS * | kvs, |
struct kmr_keyed_record * | ev, | ||
long | n, | ||
_Bool | shuffling, | ||
_Bool | ranking | ||
) |
int kmr_dump_kv | ( | const struct kmr_kv_box | kv, |
const KMR_KVS * | kvs, | ||
char * | buf, | ||
int | buflen | ||
) |
int kmr_dump_kvs | ( | KMR_KVS * | kvs, |
int | flag | ||
) |
int kmr_dump_kvs_stats | ( | KMR_KVS * | kvs, |
int | level | ||
) |
void kmr_dump_opaque | ( | const char * | p, |
int | sz, | ||
char * | buf, | ||
int | buflen | ||
) |
void kmr_reset_ntuple | ( | struct kmr_ntuple * | u, |
int | n, | ||
int | marker | ||
) |
Resets an n-tuple U with N entries and a MARKER.
Definition at line 1234 of file kmrmoreops.c.
int kmr_put_ntuple | ( | KMR * | mr, |
struct kmr_ntuple * | u, | ||
const int | size, | ||
const void * | v, | ||
const int | len | ||
) |
Adds an entry V with LEN in an n-tuple U whose size is limited to SIZE.
An n-tuple should be initialized by kmr_reset_ntuple() first. Note it fills with zeros the gap of the alignment padding, allowing the n-tuples be used as opaque keys.
Definition at line 1252 of file kmrmoreops.c.
int kmr_put_ntuple_long | ( | KMR * | mr, |
struct kmr_ntuple * | u, | ||
const int | sz, | ||
long | v | ||
) |
Adds an integer value in an n-tuple U whose size is limited to SIZE.
See kmr_put_ntuple().
Definition at line 1274 of file kmrmoreops.c.
int kmr_put_ntuple_entry | ( | KMR * | mr, |
struct kmr_ntuple * | u, | ||
const int | sz, | ||
struct kmr_ntuple_entry | e | ||
) |
Adds an n-tuple entry E in an n-tuple U whose size is limited to SIZE.
See kmr_put_ntuple().
Definition at line 1284 of file kmrmoreops.c.
struct kmr_ntuple_entry kmr_nth_ntuple | ( | struct kmr_ntuple * | u, |
int | nth | ||
) |
Returns an NTH entry of an n-tuple.
It returns a pair of a length and a pointer.
Definition at line 1197 of file kmrmoreops.c.
int kmr_size_ntuple | ( | struct kmr_ntuple * | u | ) |
Returns the storage size of an n-tuple.
Definition at line 1211 of file kmrmoreops.c.
int kmr_size_ntuple_by_lengths | ( | int | n, |
int | len[] | ||
) |
Returns the storage size of an n-tuple for N entries with LEN[i] size each.
Definition at line 1221 of file kmrmoreops.c.
int kmr_add_ntuple | ( | KMR_KVS * | kvo, |
void * | k, | ||
int | klen, | ||
struct kmr_ntuple * | u | ||
) |
Adds an n-tuple U with a given key K and KLEN in a key-value stream KVO.
Definition at line 1295 of file kmrmoreops.c.
int kmr_separate_ntuples | ( | KMR * | mr, |
const struct kmr_kv_box | kv[], | ||
const long | n, | ||
struct kmr_ntuple ** | vv[2], | ||
long | cnt[2], | ||
int | markers[2], | ||
_Bool | disallow_other_entries | ||
) |
Separates the n-tuples stored in the value part of KV into the two sets by their marker values.
It is intended to be used in reduce functions. It separates the n-tuples to the first set by marker=MARKERS[0] and to the second set by marker=MARKERS[1]. It returns two malloced arrays in VV with their sizes in CNT. The arrays VV[0] and VV[1] should be freed by the caller.
Definition at line 1318 of file kmrmoreops.c.
int kmr_product_ntuples | ( | KMR_KVS * | kvo, |
struct kmr_ntuple ** | vv[2], | ||
long | cnt[2], | ||
int | marker, | ||
int | slots[][2], | ||
int | nslots, | ||
int | keys[][2], | ||
int | nkeys | ||
) |
Makes a direct product of the two sets of n-tuples VV[0] and VV[1] with their counts in CNT[0] and CNT[1].
It is intended to be used in reduce functions. The resulting n-tuples are created by SLOTS, which chooses i-th entry of the n-tuples by the SLOTS[i][0]-th entry from the the SLOTS[i][1] set, 0 from the first set and 1 from the second set. The product n-tuples have MARKER and are inserted into KVO under the new key. The new key is selected like values using KEYS[j][0] and KEYS[j][1]. The key is not an n-tuple when NKEYS=1, or an n-tuple of KEYS[j] entries. The n-tuple key has zero as a marker. Note that it does not remove duplicate entries.
Definition at line 1528 of file kmrmoreops.c.
KMR_KVS* kmr_create_pushoff_kvs | ( | KMR * | mr, |
enum kmr_kv_field | kf, | ||
enum kmr_kv_field | vf, | ||
struct kmr_option | opt, | ||
const char * | file, | ||
const int | line, | ||
const char * | func | ||
) |
Makes a new key-value stream with the specified field data-types.
It cannot be used with checkpointing. It allocates by the size of the union, which is larger than the necessary for replacement later by an on-core KVS at kmr_add_kv_done(). See kmr_add_kv_done_pushoff().
Definition at line 85 of file kmraltkvs.c.
void kmr_init_pushoff_fast_notice_ | ( | MPI_Comm | comm, |
_Bool | verbose | ||
) |
Initializes RDMA for fast-notice.
Fast-notice is RDMA-based event notification to tell readiness of MPI messages. It is only usable with communicators having the same processes.
Definition at line 726 of file kmraltkvs.c.
void kmr_check_pushoff_fast_notice_ | ( | KMR * | mr | ) |
Check if fast-notice works.
Check be at immediately after initialization.
Definition at line 808 of file kmraltkvs.c.
int kmr_assign_file | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Assigns files to ranks based on data locality.
It assumes that values of key-value pairs in the input KVS are file paths and it shuffles the key-value pairs and writes results to the output KVS so that the files are assigned to near ranks. If the value of a key-value pair is file paths separated by '\0', it will find a rank near from all the files specified in the value. Currently, it only works on the K computer. On the other systems, it just performs kmr_shuffle(). Effective-options: INSPECT, TAKE_CKPT. See struct kmr_option.
|
static |