KMR
Classes | Macros | Typedefs | Enumerations | Functions | Variables
kmr.h File Reference

KMR Interface. More...

#include <stdio.h>
#include <stddef.h>
#include <inttypes.h>
#include <sys/types.h>
#include <string.h>
#include <assert.h>

Go to the source code of this file.

Classes

struct  kmr_code_line
 Information of Source Code Line. More...
 
struct  kmr_ctx
 KMR Context. More...
 
struct  kmr_file_option
 Options to Mapping on Files. More...
 
struct  kmr_keyed_record
 Keyed-Record for Sorting. More...
 
struct  kmr_kv_box
 Handy Copy of a Key-Value Field. More...
 
union  kmr_kvs
 Key-Value Stream (abstract). More...
 
struct  kmr_kvs_block
 
struct  kmr_kvs_dummy
 Key-Value Stream (DUMMY); Mandatory Entries. More...
 
struct  kmr_kvs_entry
 
struct  kmr_kvs_list
 
struct  kmr_kvs_list_head
 
struct  kmr_kvs_oncore
 Key-Value Stream. More...
 
struct  kmr_kvs_pushoff
 Key-Value Stream with Shuffling at Addition of Key-Values. More...
 
struct  kmr_map_ms_state
 State during kmr_map_ms(). More...
 
struct  kmr_ntuple
 N-Tuple. More...
 
struct  kmr_ntuple_entry
 N-Tuple Argument. More...
 
struct  kmr_option
 Options to Mapping, Shuffling, and Reduction. More...
 
struct  kmr_pushoff_buffers
 Record of Push-Off Key-Value Stream for a Rank. More...
 
struct  kmr_spawn_info
 Spawning Info. More...
 
struct  kmr_spawn_option
 Options to Mapping by Spawns. More...
 
union  kmr_unit_sized
 Unit-Sized Storage. More...
 

Macros

#define KMR_API_ID   KMR_API_ID0(KMR_H)
 
#define KMR_API_ID0(X)   KMR_API_ID1(X)
 
#define KMR_API_ID1(X)   kmr_api_ ## X
 
#define KMR_BR0   {
 
#define KMR_BR1   }
 
#define kmr_create_kvs(MR, KF, VF)   kmr_create_kvs7((MR), (KF), (VF), kmr_noopt, __FILE__, __LINE__, __func__)
 Makes a new key-value stream (of type KMR_KVS) with the specified field datatypes. More...
 
#define kmr_create_kvs_(MR, IGNORE)
 Makes a new key-value stream (of type KMR_KVS). More...
 
#define kmr_create_kvsx(MR, KF, VF, OPT)   kmr_create_kvs7((MR), (KF), (VF), (OPT), __FILE__, __LINE__, __func__)
 Makes a new key-value stream (of type KMR_KVS) with the specified field datatypes. More...
 
#define KMR_H   20160425
 
#define kmr_init()   kmr_init_2(KMR_API_ID)
 Sets up the environment. More...
 
#define KMR_JOB_NAME_LEN   256
 
#define kmr_kv_cake   kmr_kv_box
 
#define KMR_KVS_MAGIC_OK(X)
 
#define kmr_map(KVI, KVO, ARG, OPT, M)
 Maps simply. More...
 
#define kmr_reduce(KVI, KVO, ARG, OPT, R)
 Reduces key-value pairs. More...
 
#define kmr_sort_a_batch(X0, X1, X2, X3)   kmr_sort_locally(X0,X1,X2,X3)
 
#define KMR_TAG_SPAWN_REPLY   500
 
#define KMR_TAG_SPAWN_REPLY1   501
 

Typedefs

typedef struct kmr_ctx KMR
 
typedef union kmr_kvs KMR_KVS
 
typedef int(* kmr_mapfn_t) (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long index)
 Map-function Type. More...
 
typedef int(* kmr_redfn_t) (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg)
 Reduce-function Type. More...
 

Enumerations

enum  kmr_kv_field {
  KMR_KV_BAD, KMR_KV_OPAQUE, KMR_KV_CSTRING, KMR_KV_INTEGER,
  KMR_KV_FLOAT8, KMR_KV_POINTER_OWNED, KMR_KV_POINTER_UNMANAGED
}
 Datatypes of Keys or Values. More...
 
enum  kmr_kvs_magic { KMR_KVS_BAD, KMR_KVS_ONCORE, KMR_KVS_PUSHOFF, KMR_KVS_ONCORE_PACKED }
 

Functions

int kmr_add_identity_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long i)
 Adds a given key-value pair unmodified. More...
 
int kmr_add_kv (KMR_KVS *kvs, const struct kmr_kv_box kv)
 Adds a key-value pair. More...
 
int kmr_add_kv1 (KMR_KVS *kvs, void *k, int klen, void *v, int vlen)
 Adds a key-value pair as given directly by a pointer. More...
 
int kmr_add_kv_done (KMR_KVS *kvs)
 Marks finished adding key-value pairs. More...
 
int kmr_add_kv_quick_ (KMR_KVS *kvs, const struct kmr_kv_box kv)
 
int kmr_add_kv_space (KMR_KVS *kvs, const struct kmr_kv_box kv, void **keyp, void **valuep)
 Adds a key-value pair, but only allocates a space and returns the pointers to the key and the value parts. More...
 
int kmr_add_ntuple (KMR_KVS *kvo, void *k, int klen, struct kmr_ntuple *u)
 Adds an n-tuple U with a given key K and KLEN in a key-value stream KVO. More...
 
int kmr_add_string (KMR_KVS *kvs, const char *k, const char *v)
 Adds a key-value pair of strings. More...
 
int kmr_assign_file (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Assigns files to ranks based on data locality. More...
 
void kmr_check_pushoff_fast_notice_ (KMR *mr)
 Check if fast-notice works. More...
 
int kmr_choose_first_part (KMR_KVS *kvi, KMR_KVS *kvo, long n, struct kmr_option opt)
 Chooses the first N entries from a key-value stream KVI. More...
 
int kmr_concatenate_kvs (KMR_KVS *kvs[], int nkvs, KMR_KVS *kvo, struct kmr_option opt)
 Concatenates a number of KVSes to one. More...
 
int kmr_copy_info_to_kvs (MPI_Info src, KMR_KVS *kvo)
 Copies mpi-info entires into kvs. More...
 
int kmr_copy_kvs_to_info (KMR_KVS *kvi, MPI_Info dst)
 Copies kvs entires into mpi-info. More...
 
int kmr_copy_to_array_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long i)
 Copies the entry in the array. More...
 
KMRkmr_create_context (const MPI_Comm comm, const MPI_Info conf, const char *name)
 Makes a new KMR context (a context has type KMR). More...
 
KMRkmr_create_context_world (void)
 
KMRkmr_create_dummy_context (void)
 
KMR_KVSkmr_create_kvs7 (KMR *mr, enum kmr_kv_field k, enum kmr_kv_field v, struct kmr_option opt, const char *, const int, const char *)
 Makes a new key-value stream with the specified field data-types. More...
 
KMR_KVSkmr_create_pushoff_kvs (KMR *mr, enum kmr_kv_field kf, enum kmr_kv_field vf, struct kmr_option opt, const char *, const int, const char *)
 Makes a new key-value stream with the specified field data-types. More...
 
int kmr_distribute (KMR_KVS *kvi, KMR_KVS *kvo, _Bool cyclic, struct kmr_option opt)
 Distributes key-values so that each rank has approximately the same number of pairs. More...
 
static void kmr_dummy_dummy_dummy_ (void)
 
int kmr_dump_keyed_records (const struct kmr_keyed_record *ev, KMR_KVS *kvi)
 
int kmr_dump_kv (struct kmr_kv_box kv, const KMR_KVS *kvs, char *buf, int buflen)
 Dumps contents of a key-value. More...
 
int kmr_dump_kvs (KMR_KVS *kvs, int flag)
 Dumps contents of a key-value stream to stdout. More...
 
int kmr_dump_kvs_stats (KMR_KVS *, int level)
 Dumps contents of a key-value stream, with values are pairs. More...
 
void kmr_dump_opaque (const char *p, int siz, char *buf, int buflen)
 Puts the string of the key or value field into a buffer BUF as printable string. More...
 
void kmr_dump_slot (union kmr_unit_sized e, int len, enum kmr_kv_field data, char *buf, int buflen)
 
int kmr_file_enumerate (KMR *mr, char **names, int n, KMR_KVS *kvo, struct kmr_file_option fopt)
 
int kmr_fin (void)
 Clears the environment. More...
 
void kmr_fin_pushoff_fast_notice_ (void)
 
int kmr_find_key (KMR_KVS *kvi, struct kmr_kv_box ki, struct kmr_kv_box *vo)
 Finds a key-value pair for a key. More...
 
int kmr_find_string (KMR_KVS *kvi, const char *k, const char **vq)
 Finds the key K in the key-value stream KVS. More...
 
int kmr_free_context (KMR *mr)
 Releases a context created with kmr_create_context(). More...
 
int kmr_free_kvs (KMR_KVS *kvs)
 Releases a key-value stream (type KMR_KVS). More...
 
KMRkmr_get_context_of_kvs (KMR_KVS const *kvs)
 
int kmr_get_element_count (KMR_KVS *kvs, long *v)
 Gets the total number of key-value pairs. More...
 
MPI_Comm * kmr_get_spawner_communicator (KMR *mr, long index)
 Obtains (a reference to) a parent inter-communicator of a spawned process. More...
 
int kmr_histogram_count_by_ranks (KMR_KVS *kvs, long *frq, double *var, _Bool rankzeroonly)
 Fills an integer array FRQ[i] with the count of the elements of each rank. More...
 
int kmr_init_2 (int ignore)
 
void kmr_init_pushoff_fast_notice_ (MPI_Comm, _Bool verbose)
 Initializes RDMA for fast-notice. More...
 
int kmr_legal_minimum_field_size (KMR *mr, enum kmr_kv_field f)
 Returns a minimum byte size of the field: 8 for INTEGER and FLOAT8, 0 for others. More...
 
int kmr_local_element_count (KMR_KVS *kvs, long *v)
 Gets the number of key-value pairs locally on each rank. More...
 
int kmr_map9 (_Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m, const char *, const int, const char *)
 Maps simply. More...
 
int kmr_map_file_names (KMR *mr, char **names, int n, struct kmr_file_option fopt, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 Maps on file names. More...
 
int kmr_map_for_some (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 Maps until some key-value are added. More...
 
int kmr_map_getline (KMR *mr, FILE *f, long limit, _Bool largebuffering, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 Calls a map-function M for each line by getline() on an input F. More...
 
int kmr_map_getline_in_memory_ (KMR *mr, void *b, size_t sz, long limit, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 
int kmr_map_ms (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 Maps in master-slave mode. More...
 
int kmr_map_ms_commands (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, struct kmr_spawn_option sopt, kmr_mapfn_t m)
 Maps in master-slave mode, specialized to run serial commands. More...
 
int kmr_map_on_rank_zero (KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 Maps on rank0 only. More...
 
int kmr_map_once (KMR_KVS *kvo, void *arg, struct kmr_option opt, _Bool rank_zero_only, kmr_mapfn_t m)
 Maps once. More...
 
int kmr_map_parallel_processes (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
 Maps on processes started by MPI_Comm_spawn() to run independent MPI processes, which will not communicate to the parent. More...
 
int kmr_map_processes (_Bool nonmpi, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
 Maps on processes started by MPI_Comm_spawn() to run independent processes. More...
 
int kmr_map_rank_by_rank (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 Maps sequentially with rank by rank for debugging. More...
 
int kmr_map_serial_processes (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
 Maps on processes started by MPI_Comm_spawn() to run serial processes. More...
 
int kmr_map_skipping (long from, long stride, long limit, _Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 Maps by skipping the number of entries. More...
 
int kmr_map_via_spawn (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
 Maps on processes started by MPI_Comm_spawn(). More...
 
int kmr_match (KMR_KVS *kvi0, KMR_KVS *kvi1, KMR_KVS *kvo, struct kmr_option opt)
 Makes key-value pairs as products of the two values in two key-value stream. More...
 
int kmr_move_kvs (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Moves the contents of the input KVI to the output KVO. More...
 
struct kmr_ntuple_entry kmr_nth_ntuple (struct kmr_ntuple *u, int nth)
 Returns an NTH entry of an n-tuple. More...
 
int kmr_pairing (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Replaces a value part with a key-value pairing. More...
 
void kmr_print_statistics_on_pushoff (KMR *mr, char *titlestring)
 
int kmr_product_ntuples (KMR_KVS *kvo, struct kmr_ntuple **vv[2], long cnt[2], int newmarker, int slots[][2], int nslots, int keys[][2], int nkeys)
 Makes a direct product of the two sets of n-tuples VV[0] and VV[1] with their counts in CNT[0] and CNT[1]. More...
 
int kmr_put_ntuple (KMR *mr, struct kmr_ntuple *u, const int sz, const void *v, const int vlen)
 Adds an entry V with LEN in an n-tuple U whose size is limited to SIZE. More...
 
int kmr_put_ntuple_entry (KMR *mr, struct kmr_ntuple *u, const int sz, struct kmr_ntuple_entry e)
 Adds an n-tuple entry E in an n-tuple U whose size is limited to SIZE. More...
 
int kmr_put_ntuple_long (KMR *mr, struct kmr_ntuple *u, const int sz, long v)
 Adds an integer value in an n-tuple U whose size is limited to SIZE. More...
 
int kmr_ranking (KMR_KVS *kvi, KMR_KVS *kvo, long *count, struct kmr_option opt)
 Assigns a ranking to key-value pairs, and returns the number of the total elements in COUNT. More...
 
int kmr_read_file_by_segments (KMR *mr, char *file, int color, void **buffer, off_t *readsize)
 Reads one file by segments and reassembles by all-gather. More...
 
int kmr_read_files_reassemble (KMR *mr, char *file, int color, off_t offset, off_t bytes, void **buffer, off_t *readsize)
 Reassembles files reading by ranks. More...
 
int kmr_receive_kvs_from_spawned_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long index)
 Collects key-value pairs generated by spawned processes. More...
 
int kmr_reduce9 (_Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r, const char *, const int, const char *)
 Reduces key-value pairs. More...
 
int kmr_reduce_as_one (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r)
 Calls a reduce-function once as if all key-value pairs had the same key. More...
 
int kmr_reduce_for_some (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r)
 Reduces until some key-value are added. More...
 
int kmr_replicate (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Replicates key-value pairs to be visible on all ranks, that is, it has the effect of bcast or all-gather. More...
 
int kmr_reply_to_spawner (KMR *mr)
 Sends a reply message in the spawned process, which tells it is ready to finish and may have some data to send to the spawner in kmr_map_via_spawn(). More...
 
void kmr_reset_ntuple (struct kmr_ntuple *u, int n, int marker)
 Resets an n-tuple U with N entries and a MARKER. More...
 
int kmr_restore_kvs (KMR_KVS *kvo, void *data, size_t sz, struct kmr_option opt)
 Unpacks locally the contents of a key-value stream from a byte array. More...
 
int kmr_retrieve_keyed_records (KMR_KVS *kvs, struct kmr_keyed_record *ev, long n, _Bool shuffling, _Bool ranking)
 Fills keyed records in an array for sorting. More...
 
int kmr_retrieve_kvs_entries (KMR_KVS *kvs, struct kmr_kvs_entry **ev, long n)
 Fills local key-value entries in an array for inspection. More...
 
int kmr_reverse (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Makes a new pair by swapping the key and the value in each pair. More...
 
int kmr_save_kvs (KMR_KVS *kvi, void **dataq, size_t *szq, struct kmr_option opt)
 Packs locally the contents of a key-value stream to a byte array. More...
 
int kmr_scan_locally (KMR_KVS *kvi, KMR_KVS *carryin, KMR_KVS *kvo, KMR_KVS *carryout, kmr_redfn_t r)
 Scans every key-value with a reduce-function locally (independently on each rank). More...
 
int kmr_scan_on_values (KMR_KVS *kvi, KMR_KVS *kvo, KMR_KVS *total, kmr_redfn_t r)
 Prefix-scans every key-value with a reduce-function (non-self-inclusively) and generates the final value in TOTAL (it generates the same value on all the ranks in the TOTAL). More...
 
int kmr_send_kvs_to_spawner (KMR *mr, KMR_KVS *kvs)
 Sends the KVS from a spawned process to the map-function of the spawner. More...
 
int kmr_separate_ntuples (KMR *mr, const struct kmr_kv_box kv[], const long n, struct kmr_ntuple **vv[2], long cnt[2], int markers[2], _Bool disallow_other_entries)
 Separates the n-tuples stored in the value part of KV into the two sets by their marker values. More...
 
int kmr_shuffle (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Shuffles key-value pairs to the appropriate destination ranks. More...
 
int kmr_shuffle_leveling_pair_count (KMR_KVS *kvi, KMR_KVS *kvo)
 Shuffles key-values so that each rank has approximately the same number of pairs. More...
 
int kmr_size_ntuple (struct kmr_ntuple *u)
 Returns the storage size of an n-tuple. More...
 
int kmr_size_ntuple_by_lengths (int n, int len[])
 Returns the storage size of an n-tuple for N entries with LEN[i] size each. More...
 
int kmr_sort (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Sorts a key-value stream globally. More...
 
int kmr_sort_by_one (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Sort by rank0, a degenerated case for small number of keys. More...
 
int kmr_sort_large (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Sorts a key-value stream by the regular or the random sampling-sort. More...
 
int kmr_sort_locally (KMR_KVS *kvi, KMR_KVS *kvo, _Bool shuffling, struct kmr_option opt)
 Reorders key-value pairs in a single rank. More...
 
int kmr_sort_small (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Sorts a key-value stream, by partitioning to equal ranges. More...
 
int kmr_take_one (KMR_KVS *kvi, struct kmr_kv_box *kv)
 Extracts a single key-value pair locally in the key-value stream KVI. More...
 
int kmr_unpairing (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Extracts a key-value pair from a pairing in the value part, discarding the original key. More...
 

Variables

int KMR_API_ID
 
static const struct kmr_file_option kmr_fnoopt = {0, 0, 0, 0}
 
union {
   unsigned long   bits
 
   struct kmr_file_option   o
 
kmr_foptmask = {{1, 1, 1, 1}}
 
static const size_t kmr_kvs_block_header = offsetof(struct kmr_kvs_block, data)
 
static const size_t kmr_kvs_entry_header = offsetof(struct kmr_kvs_entry, c)
 Size of an Entry Header. More...
 
static const struct kmr_option kmr_noopt = {0, 0, 0, 0, 0, 0, 0}
 
union {
   unsigned long   bits
 
   struct kmr_option   o
 
kmr_optmask = {{1, 1, 1, 1, 1, 1, 1}}
 
static const struct kmr_spawn_option kmr_snoopt = {0, 0, 0, 0, 0}
 
union {
   unsigned long   bits
 
   struct kmr_spawn_option   o
 
kmr_soptmask = {{1, 1, 1, 1, 1}}
 
const int kmr_version
 

Detailed Description

KMR Interface.

GENERAL NOTES. (1) The sizes of key-value fields are rounded up to 8-byte boundary.

Definition in file kmr.h.

Macro Definition Documentation

#define kmr_create_kvsx (   MR,
  KF,
  VF,
  OPT 
)    kmr_create_kvs7((MR), (KF), (VF), (OPT), __FILE__, __LINE__, __func__)

Makes a new key-value stream (of type KMR_KVS) with the specified field datatypes.

Definition at line 65 of file kmr.h.

#define kmr_create_kvs (   MR,
  KF,
  VF 
)    kmr_create_kvs7((MR), (KF), (VF), kmr_noopt, __FILE__, __LINE__, __func__)

Makes a new key-value stream (of type KMR_KVS) with the specified field datatypes.

Definition at line 71 of file kmr.h.

#define kmr_create_kvs_ (   MR,
  IGNORE 
)
Value:
kmr_create_kvs7((MR), KMR_KV_BAD, KMR_KV_BAD, kmr_noopt, \
__FILE__, __LINE__, __func__)
KMR_KVS * kmr_create_kvs7(KMR *mr, enum kmr_kv_field k, enum kmr_kv_field v, struct kmr_option opt, const char *, const int, const char *)
Makes a new key-value stream with the specified field data-types.
Definition: kmrbase.c:510

Makes a new key-value stream (of type KMR_KVS).

Definition at line 76 of file kmr.h.

#define kmr_map (   KVI,
  KVO,
  ARG,
  OPT,
 
)
Value:
kmr_map9(0, (KVI), (KVO), (ARG), (OPT), (M), \
__FILE__, __LINE__, __func__)
int kmr_map9(_Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m, const char *, const int, const char *)
Maps simply.
Definition: kmrbase.c:1289

Maps simply.

See kmr_map9().

Definition at line 82 of file kmr.h.

#define kmr_reduce (   KVI,
  KVO,
  ARG,
  OPT,
 
)
Value:
kmr_reduce9(0, (KVI), (KVO), (ARG), (OPT), (R), \
__FILE__, __LINE__, __func__)
int kmr_reduce9(_Bool stop_when_some_added, KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r, const char *, const int, const char *)
Reduces key-value pairs.
Definition: kmrbase.c:2549

Reduces key-value pairs.

See kmr_reduce9().

Definition at line 88 of file kmr.h.

#define KMR_KVS_MAGIC_OK (   X)
Value:
((X) == KMR_KVS_ONCORE || (X) == KMR_KVS_ONCORE_PACKED \
|| (X) == KMR_KVS_PUSHOFF)

Definition at line 399 of file kmr.h.

#define kmr_init ( )    kmr_init_2(KMR_API_ID)

Sets up the environment.

Currently it does nothing.

Definition at line 747 of file kmr.h.

Typedef Documentation

typedef int(* kmr_mapfn_t) (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg, const long index)

Map-function Type.

A map-function gets a key-value pair as struct kmr_kv_box KV. KVI is the input key-value stream, but it can be usually ignored (its potential usage is to check the content type of the key and value fields). KVO is the output key-value stream. The pointer ARG is one just passed to kmr_map(), which has no specific purpose and is used to pass any argument to a map-function. INDEX is the count of map-function calls, and it usually equals to the index of a key-value pair in the input. It is assured distinct, and can be used for race-free accesses to the pointer ARG.

Definition at line 689 of file kmr.h.

typedef int(* kmr_redfn_t) (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvi, KMR_KVS *kvo, void *arg)

Reduce-function Type.

A reduce-function gets key-value pairs as an array KV of struct kmr_kv_box. N is the number of key-value pairs. KVI is the the input key-value stream, but it can be usually ignored. KVO is the output key-value stream. The pointer ARG is one just passed to kmr_reduce(), which has no specific purpose and is used to pass any argument to a reduce-function.

Definition at line 700 of file kmr.h.

Enumeration Type Documentation

Datatypes of Keys or Values.

It indicates the field data of keys or values. KMR_KV_OPAQUE is a variable-sized byte vector, and KMR_KV_CSTRING is a non-wide C string, and they are dealt with in exactly the same way. KMR_KV_INTEGER is a long integer, and KMR_KV_FLOAT8 is a double. The datatypes are mostly uninterpreted in mapping/reducing, except for in sorting. There are two other types for pointers. Pointers can be stored as they are (unlike opaque data, which are embedded in the field), but converted to opaque ones before communication. KMR_KV_POINTER_OWNED is an allocated pointer, and the data will be freed on consuming a key-value stream. KMR_KV_POINTER_UNMANAGED is a pointer to a possibly shared data.

Definition at line 325 of file kmr.h.

Function Documentation

int kmr_fin ( void  )

Clears the environment.

Definition at line 124 of file kmrbase.c.

KMR* kmr_create_context ( const MPI_Comm  comm,
const MPI_Info  conf,
const char *  identifying_name 
)

Makes a new KMR context (a context has type KMR).

A KMR context is a record of common information to all key-value streams. COMM is a communicator for use inside. It dups the given communicator inside, to avoid conflicts with other calls to MPI functions. MPI should be initialized with a thread support level of either MPI_THREAD_SERIALIZED or MPI_THREAD_MULTIPLE. CONF specifies configuration options. It should be freed after a call. The options can differ on each rank, (in this version). The configuration options are first taken from a file with a name specified by the environment variable "KMROPTION" on rank0, and they are merged with the explicitly given ones. The KMROPTION file has the file format of Java properties (but only in Latin characters). Refer to JDK documents on "java.util.Properties" (on "load" method) for the file format. The explicitly given ones have precedence. IDENTIFYING_NAME is just recorded in the context, and has no specific use. It may be null.

Definition at line 147 of file kmrbase.c.

int kmr_free_context ( KMR mr)

Releases a context created with kmr_create_context().

Definition at line 326 of file kmrbase.c.

KMR_KVS* kmr_create_kvs7 ( KMR mr,
enum kmr_kv_field  kf,
enum kmr_kv_field  vf,
struct kmr_option  opt,
const char *  file,
const int  line,
const char *  func 
)

Makes a new key-value stream with the specified field data-types.

Definition at line 510 of file kmrbase.c.

int kmr_free_kvs ( KMR_KVS kvs)

Releases a key-value stream (type KMR_KVS).

Normally, mapper/shuffler/reducer consumes and frees the input key-value stream, and explicit calls are unnecessary. Here, mapper/shuffler/reducer includes kmr_map(), kmr_map_on_rank_zero(), kmr_map_ms(), kmr_shuffle(), kmr_replicate(), kmr_reduce(), and kmr_reduce_as_one().

Definition at line 621 of file kmrbase.c.

int kmr_move_kvs ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Moves the contents of the input KVI to the output KVO.

It consumes the input KVI. Calling kmr_map() with a null map-function has the same effect. Effective-options: TAKE_CKPT. See struct kmr_option.

Definition at line 534 of file kmrbase.c.

int kmr_concatenate_kvs ( KMR_KVS kvs[],
int  nkvs,
KMR_KVS kvo,
struct kmr_option  opt 
)

Concatenates a number of KVSes to one.

Inputs are consumed. (It is fast because the key-value data is stored internally as a list of data blocks, and this routine just links them). Note that concatenating KVS can in effect be performed by consecutive calls to kmr_map() with the KEEP_OPEN option using the same output KVS. Effective-options: none.

Definition at line 2696 of file kmrbase.c.

int kmr_add_kv ( KMR_KVS kvs,
const struct kmr_kv_box  kv 
)

Adds a key-value pair.

(It is with serialization when a map-function is threaded).

Definition at line 751 of file kmrbase.c.

int kmr_add_kv1 ( KMR_KVS kvs,
void *  k,
int  klen,
void *  v,
int  vlen 
)

Adds a key-value pair as given directly by a pointer.

An integer or a double be passed by a pointer (thus like &v).

Definition at line 779 of file kmrbase.c.

int kmr_add_kv_space ( KMR_KVS kvs,
const struct kmr_kv_box  kv,
void **  keyp,
void **  valuep 
)

Adds a key-value pair, but only allocates a space and returns the pointers to the key and the value parts.

It may enable to create a large key/value data directly in the space. It does not return a proper value if a key/value field is not a pointer. (It cannot be used with a "push-off" key-value stream, because its buffer will be sent out and late fill-in the buffer causes a race).

Definition at line 843 of file kmrbase.c.

int kmr_add_kv_done ( KMR_KVS kvs)

Marks finished adding key-value pairs.

Further addition will be prohibited. Normally, mapper/shuffler/reducer finishes the output key-value stream by itself, and explicit calls are unnecessary. Here, mapper/shuffler/reducer includes kmr_map(), kmr_map_on_rank_zero(), kmr_map_ms(), kmr_shuffle(), kmr_replicate(), and kmr_reduce().

Definition at line 881 of file kmrbase.c.

int kmr_add_string ( KMR_KVS kvs,
const char *  k,
const char *  v 
)

Adds a key-value pair of strings.

The key and value fields should be of opaque data.

Definition at line 913 of file kmrbase.c.

int kmr_map9 ( _Bool  stop_when_some_added,
KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m,
const char *  file,
const int  line,
const char *  func 
)

Maps simply.

It consumes the input key-value stream KVI unless INSPECT option is marked. The output key-value stream KVO can be null, but in that case, a map-function cannot add key-value pairs. The pointer ARG is just passed to a map-function as a general argument, where accesses to it should be race-free, since a map-function is called by threads by default. M is the map-function. See the description on the type kmr_mapfn_t. It copeis the contents of the input KVI to the output KVO, when a map-function is null. During processing, it first makes an array pointing to the key-value entries in each data block, and works on it for ease threading/parallelization. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, COLLAPSE, TAKE_CKPT. See struct kmr_option.

Definition at line 1289 of file kmrbase.c.

int kmr_map_skipping ( long  from,
long  stride,
long  limit,
_Bool  stop_when_some_added,
KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m 
)

Maps by skipping the number of entries.

It calls a map-function on entries from FROM, skipping by STRIDE, up to LIMIT non-inclusive. See kmr_map().

Definition at line 1134 of file kmrbase.c.

int kmr_map_once ( KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
_Bool  rank_zero_only,
kmr_mapfn_t  m 
)

Maps once.

It calls a map-function once with a dummy key-value stream and a dummy key-value pair. See kmr_map(). Effective-options: KEEP_OPEN, TAKE_CKPT. See struct kmr_option.

Definition at line 1402 of file kmrbase.c.

int kmr_map_on_rank_zero ( KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m 
)

Maps on rank0 only.

It calls a map-function once with a dummy key-value stream and a dummy key-value pair. It is used to avoid low-level conditionals like (myrank==0). See kmr_map(). Effective-options: KEEP_OPEN, TAKE_CKPT. See struct kmr_option.

Definition at line 1456 of file kmrbase.c.

int kmr_map_rank_by_rank ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m 
)

Maps sequentially with rank by rank for debugging.

See kmr_map.

Definition at line 1339 of file kmrbase.c.

int kmr_map_ms ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m 
)

Maps in master-slave mode.

The input key-value stream should be empty except on rank0 where the master is running (the contents on the slave ranks are ignored). It consumes the input key-value stream. The master does delivery only. The master returns frequently to give a chance to check-pointing, etc. The master returns immaturely each time one pair is delivered, and those returns are marked by MPI_ERR_ROOT indicating more tasks remain. In contrast, slaves return only after all tasks done. The enough state to have to keep during kmr_map_ms() for check-pointing is in the key-value streams KVI and KVO on the master. Note that this totally diverges from bulk-synchronous execution. It does not accept key-value field types KMR_KV_POINTER_OWNED or KMR_KV_POINTER_UNMANAGED. Effective-options: NOTHREADING, KEEP_OPEN. See struct kmr_option.

Definition at line 310 of file kmrmapms.c.

int kmr_map_ms_commands ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
struct kmr_spawn_option  sopt,
kmr_mapfn_t  m 
)

Maps in master-slave mode, specialized to run serial commands.

It fork-execs commands specified by key-values, then calls a map-function at finishes of the commands. It takes the commands in the same way as kmr_map_via_spawn(). The commands never be MPI programs. It is implemented with kmr_map_ms(); see the comments on kmr_map_ms().

Definition at line 2198 of file kmrmapms.c.

int kmr_map_for_some ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m 
)

Maps until some key-value are added.

It stops processing, when the output is non-empty. It does not guarantee singleness. Existence/emptiness be checked by kmr_get_element_count().

Definition at line 1170 of file kmrmoreops.c.

int kmr_map_via_spawn ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
MPI_Info  info,
struct kmr_spawn_option  opt,
kmr_mapfn_t  mapfn 
)

Maps on processes started by MPI_Comm_spawn().

It is intended to run custom MPI programs which will return a reply as MPI messages. Consider other variations to run independent processes, when the spawned processes will not interact with the parent: kmr_map_processes() or kmr_map_ms_commands().
The spawner (parent) spawns processes specified by key-value pairs. The key part is ignored, and the value part is a list of null-separated strings which constitutes a command and arguments. The option SEPARATOR_SPACE changes the separator character to whitespaces. If the first string is "maxprocs=n", then the number of processes is taken from this string. Or, an MPI_Info entry "maxprocs" in INFO is used, and "maxprocs" is common to all spawns. It is an error if neither is specified. The multile spawners (more than one ranks can have entries to spawn) divide the universe of processes evenly among them, and tries to control the number of the simultaneously running processes in the range.
The option REPLY_EACH or REPLY_ROOT lets the spawner wait for the reply messages from the spawned processes, and then the spawner calls the map-function. A reply message is of the tag KMR_TAG_SPAWN_REPLY=500 and length zero, and kmr_reply_to_spawner() can be used to send this reply. When none of REPLY_EACH or REPLY_ROOT are specified, the spawner immediately calls the map-function one-by-one in the FIFO order (before the spawned processes finish). In that case, no load-balance is taken. The map-function should wait for the spawned processes to finish, otherwise, the spawner starts next spawns continuously and runs out the processes, which causes the MPI runtime to signal an error.
Communication between the spawned processes and the map-function of the spawner is through the inter-communicator. The parent inter-communicator of the spawned processes can be taken by MPI_Comm_get_parent() as usual. The inter-communicator at the spawner side can be obtained by calling kmr_get_spawner_communicator() inside a map-function.
The INFO argument is passed to MPI_Comm_spawn() unchanged.
NOTE: There is no way to check the availability of processes for spawning in the MPI specification and MPI implementations. And, the MPI runtime signals errors when it runs out the processes. Thus, it puts a sleep (1 sec) in between MPI_Comm_spawn() calls to allow clean-ups in the MPI runtime and to avoid timing issues.
INTERFACE CHANGE: Set mr->spawn_pass_intercomm_in_argument=1 to enables the old interface, where the map-function MAPFN is called with the kmr_spawn_state structure as the general argument. The argument ARG passed to the mapper is stored in the MAPARG slot in the kmr_spawn_state structure. When TAKE_CKPT option is specified, a checkpoint data file of the output key-value stream is saved if both CKPT_ENABLE and CKPT_SELECTIVE global options are set.

Definition at line 1870 of file kmrmapms.c.

int kmr_reply_to_spawner ( KMR mr)

Sends a reply message in the spawned process, which tells it is ready to finish and may have some data to send to the spawner in kmr_map_via_spawn().

Definition at line 1776 of file kmrmapms.c.

MPI_Comm* kmr_get_spawner_communicator ( KMR mr,
long  index 
)

Obtains (a reference to) a parent inter-communicator of a spawned process.

It is used inside a map-function of kmr_map_via_spawn(); Pass INDEX the same argument to a map-function. It returns a reference for the side-effect of freeing a communicator in a map-function.

Definition at line 1799 of file kmrmapms.c.

int kmr_map_processes ( _Bool  nonmpi,
KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
MPI_Info  info,
struct kmr_spawn_option  opt,
kmr_mapfn_t  mapfn 
)

Maps on processes started by MPI_Comm_spawn() to run independent processes.

It either calls kmr_map_parallel_processes() or kmr_map_serial_processes() with regard to the NONMPI argument. See the comments of kmr_map_parallel_processes() and kmr_map_serial_processes().

Definition at line 1965 of file kmrmapms.c.

int kmr_map_parallel_processes ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
MPI_Info  info,
struct kmr_spawn_option  opt,
kmr_mapfn_t  mapfn 
)

Maps on processes started by MPI_Comm_spawn() to run independent MPI processes, which will not communicate to the parent.

The programs need to be MPI. It is a variation of kmr_map_via_spawn(), and refer to the comment on it for the basic usage. Since the spawned program does not know the parent, there is no way to communicate from the spawner. The map-function is called after the processes have exited, so that the map-function can check the result files created by the spawned processes.
This function detects the end of spawned processes using a watch-program "kmrwatch0", by checking a closure of a socket to which "kmrwatch0" connected.
NOTE THAT THIS OPERATION WILL BLOCK INDEFINITELY AND FAIL, DEPENDING ON THE BEHAVIOR OF AN MPI IMPLEMENTATION. It is checked to work with Open MPI (1.6) and MPICH2 (1.5), but not with Intel MPI (4.1) and YAMPI2 (GridMPI 2.1). It depends on the behavior that MPI_Comm_free() on the parent and MPI_Finalize() on the child do not synchronize. The quote of the standard (MPI 2.x) says: "Though collective, MPI_Comm_free is anticipated that this operation will normally be implemented to be local, ..." The blocking situation can be checked by enabling tracing around calls to MPI_Comm_free() by (mr->trace_map_spawn=1).
NOTE (on MPI spawn implementations): Open MPI (1.6) allows to spawn non-MPI processes by passing an special MPI_Info. MPICH2 (1.5) does not allow to spawn non-MPI processes, because MPI_Comm_spawn() of the parent and MPI_Init() of the child synchronize. In Intel MPI (4.1) and YAMPI2 (GridMPI), the calls of MPI_Comm_free() on the parent and MPI_Finalize() or MPI_Comm_free() on the child synchronize, and thus, they require to call MPI_Comm_free() at an appropriate time on the parent.
Options REPLY_ROOT and REPLY_EACH have no effect. When TAKE_CKPT option is specified, a checkpoint data file of the output key-value stream is saved if both CKPT_ENABLE and CKPT_SELECTIVE global options are set.

Definition at line 1915 of file kmrmapms.c.

int kmr_map_serial_processes ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
MPI_Info  info,
struct kmr_spawn_option  opt,
kmr_mapfn_t  mapfn 
)

Maps on processes started by MPI_Comm_spawn() to run serial processes.

This should NOT be used; Use kmr_map_ms_commands(), instead. Fork-execing in kmr_map_ms_commands() is simpler than spawning. See also the comment on kmr_map_via_spawn() and kmr_map_parallel_processes(). The map-function is called after the processes have exited, thus, there is no way to communicate from the map-function. Instead, the map-function can check the result files created by the spawned processes.
This function detects the end of spawned processes using a watch-program "kmrwatch0" which sends a reply to the parent in place of the serial program. Options REPLY_ROOT and REPLY_EACH have no effect. When TAKE_CKPT option is specified, a checkpoint data file of the output key-value stream is saved if both CKPT_ENABLE and CKPT_SELECTIVE global options are set.

Definition at line 1945 of file kmrmapms.c.

int kmr_send_kvs_to_spawner ( KMR mr,
KMR_KVS kvs 
)

Sends the KVS from a spawned process to the map-function of the spawner.

It is paired with kmr_receive_kvs_from_spawned_fn().

Definition at line 2005 of file kmrmapms.c.

int kmr_receive_kvs_from_spawned_fn ( const struct kmr_kv_box  kv,
const KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
const long  index 
)

Collects key-value pairs generated by spawned processes.

It is a map-function to be used with kmr_map_via_spawn() with the REPLY_EACH option. The spawned processes call kmr_send_kvs_to_spawner() to send generated key-value pairs, and this function receives and puts them into KVO. PROTOCOL: The reply consists of one or two messages with the tag KMR_TAG_SPAWN_REPLY1=501. One is the data size, which is followed by a marshaled key-value stream when the data size is non-zero.

Definition at line 2039 of file kmrmapms.c.

int kmr_sort_locally ( KMR_KVS kvi,
KMR_KVS kvo,
_Bool  shuffling,
struct kmr_option  opt 
)

Reorders key-value pairs in a single rank.

It sorts pairs when SHUFFLING is false, or gathers pairs with the same hashed keys adjacent when SHUFFLING is true. It only respects for not ordering but just equality when shuffling. The sort-keys for shuffling are destination ranks for shuffling (taking a modulo of the hashed key with nprocs). As a sorting, it is NOT-STABLE due to quick-sort used inside. It converts pointer keys and values to opaque ones for sending.
Sorting on a key-value stream is by memcmp(), unless the keys are integer or floating-point numbers (ordering on integers and memcmp() are different). Sorting on non-numbers is performed in two steps: the first step sorts by the integer rankings, and the second by the specified comparator. And thus, the comparator is required to have a corresponding generator of integer rankings. It consumes the input key-value stream. Effective-options: NOTHREADING, INSPECT, KEY_AS_RANK.

Definition at line 1993 of file kmrbase.c.

int kmr_shuffle ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Shuffles key-value pairs to the appropriate destination ranks.

It first sorts pairs by the destination ranks of the keys, and then exchanges pairs with all-to-all communication. It converts pointer keys and values to opaque ones for sending during the sorting stage. Note that the key-value pairs are sorted by the hash-values prior to exchange. Effective-options: INSPECT, KEY_AS_RANK, TAKE_CKPT. See struct kmr_option.

Definition at line 2036 of file kmrbase.c.

int kmr_replicate ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Replicates key-value pairs to be visible on all ranks, that is, it has the effect of bcast or all-gather.

It gathers pairs on rank0 only by the option RANK_ZERO. It moves stably, keeping the ordering of ranks and the ordering of local key-value pairs. Effective-options: INSPECT, RANK_ZERO, TAKE_CKPT. See struct kmr_option.

Definition at line 2182 of file kmrbase.c.

int kmr_reduce9 ( _Bool  stop_when_some_added,
KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_redfn_t  r,
const char *  file,
const int  line,
const char *  func 
)

Reduces key-value pairs.

It does not include shuffling, and thus, it requires being preceded by shuffling. Or, it works on local data (as a local combiner), if it is not preceded by shuffling. It always consumes the input key-value stream KVI. An output key-value stream KVO can be null. It passes an array of key-value pairs to a reduce-function whose keys are all equal (equality is by bits). The pointer ARG is just passed to a reduce-function as a general argument, where accesses to it should be race-free, since a reduce-function is called by threads by default. R is a reduce-function. See the description on the type kmr_redfn_t. A reduce-function may see a different input key-value stream (internally created one) instead of the one given. During reduction, it first scans adjacent equal keys, then calls a given reduce-function. Effective-options: NOTHREADING, INSPECT, TAKE_CKPT. See struct kmr_option.

Definition at line 2549 of file kmrbase.c.

int kmr_reduce_as_one ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_redfn_t  r 
)

Calls a reduce-function once as if all key-value pairs had the same key.

See kmr_reduce(). Effective-options: INSPECT, TAKE_CKPT. See struct kmr_option.

Definition at line 2625 of file kmrbase.c.

int kmr_reduce_for_some ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_redfn_t  r 
)

Reduces until some key-value are added.

It stops processing, when the output is non-empty. It does not guarantee singleness. Existence/emptiness be checked by kmr_get_element_count().

Definition at line 1183 of file kmrmoreops.c.

int kmr_map_file_names ( KMR mr,
char **  names,
int  n,
struct kmr_file_option  fopt,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m 
)

Maps on file names.

NAMES specifies N file names. The map-function gets a file name in the key field (the value field is integer zero). File-option EACH_RANK specifies each rank independently to enumerate file names, otherwise to work on rank0 only. File-option SUBDIRECTORIES specifies to descend to subdirectories. It ignores files/directories whose name starting with dots. File-option LIST_FILE specifies to read contents of each file for file names. File consists of one file name per line, and ignores a line beginning with a "#". Whitespaces are trimed at the beginning and the end. LIST_FILE implies SUBDIRECTORIES. It enumerates names of regular files only. File-option SHUFFLE_FILES runs shuffling file names among ranks.

Definition at line 1372 of file kmrfiles.c.

int kmr_map_getline ( KMR mr,
FILE *  f,
long  limit,
_Bool  largebuffering,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m 
)

Calls a map-function M for each line by getline() on an input F.

A map-function gets a line number in key and a string in value (the index argument is the same as the key). Calls to getline() is limited to LIMIT lines (0 for unlimited). It is multi-threaded and the call order is arbitrary. ARG and OPT are passed verbatim to a map-function. Effective-options: NOTHREADING, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.

Definition at line 1561 of file kmrfiles.c.

int kmr_take_one ( KMR_KVS kvi,
struct kmr_kv_box kv 
)

Extracts a single key-value pair locally in the key-value stream KVI.

It is an error when zero or more than one entries are in the KVI. It does not consume the input KVS (INSPECT IMPLIED). The returned key-value entry must be used before freeing the input KVS, when it points to an opaque data.

Definition at line 1369 of file kmrbase.c.

int kmr_find_key ( KMR_KVS kvi,
struct kmr_kv_box  ki,
struct kmr_kv_box ko 
)

Finds a key-value pair for a key.

It is an error when not exactly one entry is found. It does not consume the input KVS KVI. The returned key-value entry must be used before freeing the input KVS, when it points to an opaque data. It maps internally, so it is slow. It is tricky that the internally created KVS KVS0 points to the key-value area in the input KVS KVI.

Definition at line 43 of file kmrmoreops.c.

int kmr_find_string ( KMR_KVS kvi,
const char *  k,
const char **  vq 
)

Finds the key K in the key-value stream KVS.

It returns a pointer pointing inside the key-value stream. It is an error when not exactly one entry is found. It does not consume the input KVS. It maps internally, so slow.

Definition at line 73 of file kmrmoreops.c.

int kmr_copy_info_to_kvs ( MPI_Info  src,
KMR_KVS kvo 
)

Copies mpi-info entires into kvs.

Definition at line 982 of file kmrutil.c.

int kmr_copy_kvs_to_info ( KMR_KVS kvi,
MPI_Info  dst 
)

Copies kvs entires into mpi-info.

It assumes keys/values are strings (no checks). It consumes KVI.

Definition at line 1034 of file kmrutil.c.

int kmr_get_element_count ( KMR_KVS kvs,
long *  v 
)

Gets the total number of key-value pairs.

It uses replication and reduction.

Definition at line 114 of file kmrmoreops.c.

int kmr_local_element_count ( KMR_KVS kvs,
long *  v 
)

Gets the number of key-value pairs locally on each rank.

Definition at line 349 of file kmrutil.c.

int kmr_add_identity_fn ( const struct kmr_kv_box  kv,
const KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
const long  i 
)

Adds a given key-value pair unmodified.

It is a map-function.

Definition at line 937 of file kmrbase.c.

int kmr_copy_to_array_fn ( const struct kmr_kv_box  kv,
const KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
const long  i 
)

Copies the entry in the array.

It should be used with the INSPECT option for map, because the array entries may point into the input key-value stream. It is a map-function.

Definition at line 934 of file kmrutil.c.

int kmr_save_kvs ( KMR_KVS kvs,
void **  dataq,
size_t *  szq,
struct kmr_option  opt 
)

Packs locally the contents of a key-value stream to a byte array.

It is used to save or to send a key-value stream. It returns the allocated memory with its size, and it should be freed by the user. It may fail on allocating a buffer, and then it returns MPI_ERR_BUFFER. Its reverse is performed by kmr_restore_kvs().

Definition at line 968 of file kmrbase.c.

int kmr_restore_kvs ( KMR_KVS kvo,
void *  data,
size_t  sz_,
struct kmr_option  opt 
)

Unpacks locally the contents of a key-value stream from a byte array.

It is a reverse of kmr_save_kvs().

Definition at line 1034 of file kmrbase.c.

int kmr_reverse ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Makes a new pair by swapping the key and the value in each pair.

That is, it makes new pairs (v0,k0) from (k0,v0). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.

Definition at line 159 of file kmrmoreops.c.

int kmr_pairing ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Replaces a value part with a key-value pairing.

That is, it makes new pairs (k0,(k0,v0)) from (k0,v0). See kmr_unpairing(). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.

Definition at line 212 of file kmrmoreops.c.

int kmr_unpairing ( KMR_KVS kvs,
KMR_KVS kvo,
struct kmr_option  opt 
)

Extracts a key-value pair from a pairing in the value part, discarding the original key.

It is the inverse of kmr_pairing. That is, it makes new pairs (k1,v1) from (k0,(k1,v1)). See kmr_pairing(). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.

Definition at line 234 of file kmrmoreops.c.

int kmr_sort ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Sorts a key-value stream globally.

It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It selects a sorting routine on the total number of keys. See kmr_sort_large(), kmr_sort_small(), or kmr_sort_by_one(). The results are stored as ascending ranks, thus the rank0 holds the minimum. Effective-options: INSPECT. See struct kmr_option.

Definition at line 575 of file kmrmoreops.c.

int kmr_sort_small ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Sorts a key-value stream, by partitioning to equal ranges.

It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It assumes uniform distribution, and partioning is simply determined by the range of keys (MIN-MAX range is divided by nprocs). Effective-options: NOTHREADING, INSPECT. See struct kmr_option.

Definition at line 388 of file kmrmoreops.c.

int kmr_sort_large ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Sorts a key-value stream by the regular or the random sampling-sort.

It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It can be used for "GraySort". Effective-options: NOTHREADING, INSPECT. See struct kmr_option.

Definition at line 469 of file kmrmoreops.c.

int kmr_sort_by_one ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Sort by rank0, a degenerated case for small number of keys.

It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. Effective-options: INSPECT. See struct kmr_option.

Definition at line 544 of file kmrmoreops.c.

int kmr_match ( KMR_KVS kvi0,
KMR_KVS kvi1,
KMR_KVS kvo,
struct kmr_option  opt 
)

Makes key-value pairs as products of the two values in two key-value stream.

It creates a set of key-value pairs (ai,bj) of the pairs (key,ai) from KVS0 and (key,bj) from KVS1 for the matching key. It makes a direct-product of the values when multiple values exist for a matching key. That is, for example, given a set {(k,a0), (k,a1), (k,a2)} in KVS0 and {(k,b3), (k,b4)} in KVS1 for some distinct key, it creates {(a0,b3), (a0,b4), (a1,b3), (a1,b4), (a2,b3), (a2,b4)}. Effective-options: NOTHREADNG. See struct kmr_option.

Definition at line 696 of file kmrmoreops.c.

int kmr_ranking ( KMR_KVS kvi,
KMR_KVS kvo,
long *  count,
struct kmr_option  opt 
)

Assigns a ranking to key-value pairs, and returns the number of the total elements in COUNT.

Ranking is a position in the key-value stream. That is, for example, given a sequence {(k0,v0), (k1,v1), (k2,v2)}, it creates {(0,(k0,v0)), (1,(k1,v1)), (2,(k2,v2))}. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN. See struct kmr_option.

Definition at line 764 of file kmrmoreops.c.

int kmr_distribute ( KMR_KVS kvi,
KMR_KVS kvo,
_Bool  cyclic,
struct kmr_option  opt 
)

Distributes key-values so that each rank has approximately the same number of pairs.

It is used to level the load of mapping among ranks by calling it before mapping. kmr_shuffle() can be sufficient to distribute pairs in most cases, but sometimes it results in uneven distribution because shuffling is based on hashing on the keys. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN. See struct kmr_option.

Definition at line 835 of file kmrmoreops.c.

int kmr_shuffle_leveling_pair_count ( KMR_KVS kvi,
KMR_KVS kvo 
)

Shuffles key-values so that each rank has approximately the same number of pairs.

It collects the same keys on a rank (cf. kmr_distribute()).

Definition at line 1074 of file kmrmoreops.c.

int kmr_scan_locally ( KMR_KVS kvi,
KMR_KVS carryin,
KMR_KVS kvo,
KMR_KVS carryout,
kmr_redfn_t  r 
)

Scans every key-value with a reduce-function locally (independently on each rank).

It works in the order in the KVS. It ignores differences of the keys. It gets the start value from CARRYIN and puts the final value to CARRYOUT. The output has the same number of entries as the input. The carry-in and carry-out have one entry. The carry-out can be null. The reduce-function is called on each key-value pair as the right operand with the previous value as the left operand, and it should output a single value. The key part of the output is ignored and a pair is stored under the original key.

Definition at line 2880 of file kmrbase.c.

int kmr_scan_on_values ( KMR_KVS kvi,
KMR_KVS kvo,
KMR_KVS total,
kmr_redfn_t  r 
)

Prefix-scans every key-value with a reduce-function (non-self-inclusively) and generates the final value in TOTAL (it generates the same value on all the ranks in the TOTAL).

The key-values are scanned in the order in the KVS as they are concatenated in the rank-order. The reduce-function should be associative and free of side-effects (because it is called multiple times on the same data). The reduce-function should output a single key-value when given any number of key-value pairs. Furthermore, it should output an identity element when it is given zero key-value pairs.

Definition at line 943 of file kmrmoreops.c.

int kmr_choose_first_part ( KMR_KVS kvi,
KMR_KVS kvo,
long  n,
struct kmr_option  opt 
)

Chooses the first N entries from a key-value stream KVI.

The option nothreading is implied to keep the ordering. Effective-options: INSPECT, KEEP_OPEN. See struct kmr_option.

Definition at line 1145 of file kmrmoreops.c.

int kmr_legal_minimum_field_size ( KMR mr,
enum kmr_kv_field  f 
)

Returns a minimum byte size of the field: 8 for INTEGER and FLOAT8, 0 for others.

Definition at line 2847 of file kmrbase.c.

int kmr_histogram_count_by_ranks ( KMR_KVS kvs,
long *  frq,
double *  var,
_Bool  rankzeroonly 
)

Fills an integer array FRQ[i] with the count of the elements of each rank.

The array FRQ be as large as nprocs. It also fills VAR[0]=average, VAR[1]=variance, VAR[2]=min, and VAR[3]=max. FRQ or VAR can be null.

Definition at line 1569 of file kmrmoreops.c.

int kmr_read_files_reassemble ( KMR mr,
char *  file,
int  color,
off_t  offset,
off_t  bytes,
void **  buffer,
off_t *  size 
)

Reassembles files reading by ranks.

It is intended to reassembles a file from files split into segments. FILE is a file name. A file name can be null, when the rank does not participate reading (COLOR=-1). COLOR groups ranks (be COLOR>=-1). The files on the ranks with the same COLOR are concatenated, where concatenation is ordered by the rank-order. Read is performed for OFFSET and BYTES on each file. BYTES can be -1 to read an entire file. BUFFER and SIZE are set to the malloced buffer and the size on return. Ranks with non-null FILE retrieve a file (ingest), while ranks with non-zero BUFFER receive contents (digest). Ranks with COLOR=-1 do not participate in file reading. REMARK ON K: It reads a specified file by each rank, assuming the files reside in specific I/O-groups to the ranks.

Definition at line 653 of file kmrfiles.c.

int kmr_read_file_by_segments ( KMR mr,
char *  file,
int  color,
void **  buffer,
off_t *  size 
)

Reads one file by segments and reassembles by all-gather.

FILE is a file name. COLOR groups ranks (be COLOR>=-1). The ranks with the same COLOR collaborate to read a file, and thus, they must specify the same file (with an identical inode number). BUFFER and SIZE are set to the malloced buffer and the size on return. Ranks with non-zero FILE retrieve a file (ingest). Ranks with non-zero BUFFER receive contents (digest). Ranks with COLOR=-1 do not participate in file reading, and then arguments should be FILE=0 and BUFFER=0.

Definition at line 1021 of file kmrfiles.c.

int kmr_retrieve_kvs_entries ( KMR_KVS kvs,
struct kmr_kvs_entry **  ev,
long  n 
)

Fills local key-value entries in an array for inspection.

The returned pointers point to the inside of the KVS. The array EV should be as large as N. It implies inspect.

Definition at line 2801 of file kmrbase.c.

int kmr_retrieve_keyed_records ( KMR_KVS kvs,
struct kmr_keyed_record ev,
long  n,
_Bool  shuffling,
_Bool  ranking 
)

Fills keyed records in an array for sorting.

The array EV should be as large as N. It implies inspect.

Definition at line 2820 of file kmrbase.c.

int kmr_dump_kv ( const struct kmr_kv_box  kv,
const KMR_KVS kvs,
char *  buf,
int  buflen 
)

Dumps contents of a key-value.

Definition at line 1585 of file kmrutil.c.

int kmr_dump_kvs ( KMR_KVS kvs,
int  flag 
)

Dumps contents of a key-value stream to stdout.

Argument FLAG is nothing, ignored.

Definition at line 1609 of file kmrutil.c.

int kmr_dump_kvs_stats ( KMR_KVS kvs,
int  level 
)

Dumps contents of a key-value stream, with values are pairs.

Prints the total number of key-value pairs. It prints on the rank0 only.

Definition at line 1659 of file kmrutil.c.

void kmr_dump_opaque ( const char *  p,
int  sz,
char *  buf,
int  buflen 
)

Puts the string of the key or value field into a buffer BUF as printable string.

Ellipses appear if string does not fit in the buffer.

Definition at line 1511 of file kmrutil.c.

void kmr_reset_ntuple ( struct kmr_ntuple u,
int  n,
int  marker 
)

Resets an n-tuple U with N entries and a MARKER.

Definition at line 1234 of file kmrmoreops.c.

int kmr_put_ntuple ( KMR mr,
struct kmr_ntuple u,
const int  size,
const void *  v,
const int  len 
)

Adds an entry V with LEN in an n-tuple U whose size is limited to SIZE.

An n-tuple should be initialized by kmr_reset_ntuple() first. Note it fills with zeros the gap of the alignment padding, allowing the n-tuples be used as opaque keys.

Definition at line 1252 of file kmrmoreops.c.

int kmr_put_ntuple_long ( KMR mr,
struct kmr_ntuple u,
const int  sz,
long  v 
)

Adds an integer value in an n-tuple U whose size is limited to SIZE.

See kmr_put_ntuple().

Definition at line 1274 of file kmrmoreops.c.

int kmr_put_ntuple_entry ( KMR mr,
struct kmr_ntuple u,
const int  sz,
struct kmr_ntuple_entry  e 
)

Adds an n-tuple entry E in an n-tuple U whose size is limited to SIZE.

See kmr_put_ntuple().

Definition at line 1284 of file kmrmoreops.c.

struct kmr_ntuple_entry kmr_nth_ntuple ( struct kmr_ntuple u,
int  nth 
)

Returns an NTH entry of an n-tuple.

It returns a pair of a length and a pointer.

Definition at line 1197 of file kmrmoreops.c.

int kmr_size_ntuple ( struct kmr_ntuple u)

Returns the storage size of an n-tuple.

Definition at line 1211 of file kmrmoreops.c.

int kmr_size_ntuple_by_lengths ( int  n,
int  len[] 
)

Returns the storage size of an n-tuple for N entries with LEN[i] size each.

Definition at line 1221 of file kmrmoreops.c.

int kmr_add_ntuple ( KMR_KVS kvo,
void *  k,
int  klen,
struct kmr_ntuple u 
)

Adds an n-tuple U with a given key K and KLEN in a key-value stream KVO.

Definition at line 1295 of file kmrmoreops.c.

int kmr_separate_ntuples ( KMR mr,
const struct kmr_kv_box  kv[],
const long  n,
struct kmr_ntuple **  vv[2],
long  cnt[2],
int  markers[2],
_Bool  disallow_other_entries 
)

Separates the n-tuples stored in the value part of KV into the two sets by their marker values.

It is intended to be used in reduce functions. It separates the n-tuples to the first set by marker=MARKERS[0] and to the second set by marker=MARKERS[1]. It returns two malloced arrays in VV with their sizes in CNT. The arrays VV[0] and VV[1] should be freed by the caller.

Definition at line 1318 of file kmrmoreops.c.

int kmr_product_ntuples ( KMR_KVS kvo,
struct kmr_ntuple **  vv[2],
long  cnt[2],
int  marker,
int  slots[][2],
int  nslots,
int  keys[][2],
int  nkeys 
)

Makes a direct product of the two sets of n-tuples VV[0] and VV[1] with their counts in CNT[0] and CNT[1].

It is intended to be used in reduce functions. The resulting n-tuples are created by SLOTS, which chooses i-th entry of the n-tuples by the SLOTS[i][0]-th entry from the the SLOTS[i][1] set, 0 from the first set and 1 from the second set. The product n-tuples have MARKER and are inserted into KVO under the new key. The new key is selected like values using KEYS[j][0] and KEYS[j][1]. The key is not an n-tuple when NKEYS=1, or an n-tuple of KEYS[j] entries. The n-tuple key has zero as a marker. Note that it does not remove duplicate entries.

Definition at line 1528 of file kmrmoreops.c.

KMR_KVS* kmr_create_pushoff_kvs ( KMR mr,
enum kmr_kv_field  kf,
enum kmr_kv_field  vf,
struct kmr_option  opt,
const char *  file,
const int  line,
const char *  func 
)

Makes a new key-value stream with the specified field data-types.

It cannot be used with checkpointing. It allocates by the size of the union, which is larger than the necessary for replacement later by an on-core KVS at kmr_add_kv_done(). See kmr_add_kv_done_pushoff().

Definition at line 85 of file kmraltkvs.c.

void kmr_init_pushoff_fast_notice_ ( MPI_Comm  comm,
_Bool  verbose 
)

Initializes RDMA for fast-notice.

Fast-notice is RDMA-based event notification to tell readiness of MPI messages. It is only usable with communicators having the same processes.

Definition at line 726 of file kmraltkvs.c.

void kmr_check_pushoff_fast_notice_ ( KMR mr)

Check if fast-notice works.

Check be at immediately after initialization.

Definition at line 808 of file kmraltkvs.c.

int kmr_assign_file ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Assigns files to ranks based on data locality.

It assumes that values of key-value pairs in the input KVS are file paths and it shuffles the key-value pairs and writes results to the output KVS so that the files are assigned to near ranks. If the value of a key-value pair is file paths separated by '\0', it will find a rank near from all the files specified in the value. Currently, it only works on the K computer. On the other systems, it just performs kmr_shuffle(). Effective-options: INSPECT, TAKE_CKPT. See struct kmr_option.

Definition at line 257 of file kmriolb.c.

Variable Documentation

const size_t kmr_kvs_entry_header = offsetof(struct kmr_kvs_entry, c)
static

Size of an Entry Header.

It is the size of the length fields of a key-value. It is also the size of the slack space for an end-of-block marker, where the end of entries in a block is marded by klen=-1 and vlen=-1.

Definition at line 382 of file kmr.h.