KMR
|
More Operatons on Key-Value Stream. More...
#include <mpi.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <errno.h>
#include <assert.h>
#include "kmr.h"
#include "kmrimpl.h"
Go to the source code of this file.
Classes | |
struct | kmr_ranking_to_rank |
Macros | |
#define | MAX(a, b) (((a)>(b))?(a):(b)) |
#define | MIN(a, b) (((a)<(b))?(a):(b)) |
#define | NEVERHERE 0 |
Functions | |
int | kmr_add_ntuple (KMR_KVS *kvo, void *k, int klen, struct kmr_ntuple *u) |
Adds an n-tuple U with a given key K and KLEN in a key-value stream KVO. More... | |
static int | kmr_add_pairing_under_key (KMR_KVS *kvo, int klen, union kmr_unit_sized k, const struct kmr_kv_box kv, enum kmr_kv_field keyf, enum kmr_kv_field valf) |
int | kmr_choose_first_part (KMR_KVS *kvi, KMR_KVS *kvo, long n, struct kmr_option opt) |
Chooses the first N entries from a key-value stream KVI. More... | |
static int | kmr_count_key_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p) |
static int | kmr_count_keys (KMR_KVS *kvi, KMR_KVS *kvo) |
int | kmr_distribute (KMR_KVS *kvi, KMR_KVS *kvo, _Bool cyclic, struct kmr_option opt) |
Distributes key-values so that each rank has approximately the same number of pairs. More... | |
int | kmr_find_key (KMR_KVS *kvi, struct kmr_kv_box ki, struct kmr_kv_box *ko) |
Finds a key-value pair for a key. More... | |
static int | kmr_find_key_fn (const struct kmr_kv_box kv, const KMR_KVS *kvs, KMR_KVS *kvo, void *p, const long i) |
int | kmr_find_string (KMR_KVS *kvi, const char *k, const char **vq) |
Finds the key K in the key-value stream KVS. More... | |
static int | kmr_first_n_elements_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i) |
int | kmr_get_element_count (KMR_KVS *kvs, long *v) |
Gets the total number of key-value pairs. More... | |
static int | kmr_get_element_count_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p) |
static int | kmr_get_integer_values_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p) |
int | kmr_histogram_count_by_ranks (KMR_KVS *kvs, long *frq, double *var, _Bool rankzeroonly) |
Fills an integer array FRQ[i] with the count of the elements of each rank. More... | |
static int | kmr_make_product_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvi, KMR_KVS *kvo, void *p) |
int | kmr_map_for_some (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m) |
Maps until some key-value are added. More... | |
int | kmr_match (KMR_KVS *kvi0, KMR_KVS *kvi1, KMR_KVS *kvo, struct kmr_option opt) |
Makes key-value pairs as products of the two values in two key-value stream. More... | |
static int | kmr_minmax2_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p) |
struct kmr_ntuple_entry | kmr_nth_ntuple (struct kmr_ntuple *u, int nth) |
Returns an NTH entry of an n-tuple. More... | |
int | kmr_pairing (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Replaces a value part with a key-value pairing. More... | |
int | kmr_pairing_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i) |
static int | kmr_partition_by_count_fn (const struct kmr_kv_box kv, const KMR_KVS *kvs, KMR_KVS *kvo, void *p, const long i) |
static int | kmr_partition_by_ranking_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i) |
int | kmr_product_ntuples (KMR_KVS *kvo, struct kmr_ntuple **vv[2], long cnt[2], int marker, int slots[][2], int nslots, int keys[][2], int nkeys) |
Makes a direct product of the two sets of n-tuples VV[0] and VV[1] with their counts in CNT[0] and CNT[1]. More... | |
static int | kmr_product_ntuples_by_space (KMR_KVS *kvo, struct kmr_ntuple **vv[2], long cnt[2], int marker, int slots[][2], int nslots, int keys[][2], int nkeys, _Bool byspace) |
static int | kmr_put_integer_to_array_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i) |
int | kmr_put_ntuple (KMR *mr, struct kmr_ntuple *u, const int size, const void *v, const int len) |
Adds an entry V with LEN in an n-tuple U whose size is limited to SIZE. More... | |
int | kmr_put_ntuple_entry (KMR *mr, struct kmr_ntuple *u, const int sz, struct kmr_ntuple_entry e) |
Adds an n-tuple entry E in an n-tuple U whose size is limited to SIZE. More... | |
int | kmr_put_ntuple_long (KMR *mr, struct kmr_ntuple *u, const int sz, long v) |
Adds an integer value in an n-tuple U whose size is limited to SIZE. More... | |
static int | kmr_rank_for_sort (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i) |
int | kmr_ranking (KMR_KVS *kvi, KMR_KVS *kvo, long *count, struct kmr_option opt) |
Assigns a ranking to key-value pairs, and returns the number of the total elements in COUNT. More... | |
static int | kmr_ranking_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i) |
static int | kmr_ranking_to_rank_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i) |
int | kmr_reduce_for_some (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r) |
Reduces until some key-value are added. More... | |
void | kmr_reset_ntuple (struct kmr_ntuple *u, int n, int marker) |
Resets an n-tuple U with N entries and a MARKER. More... | |
int | kmr_reverse (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Makes a new pair by swapping the key and the value in each pair. More... | |
int | kmr_reverse_fn (const struct kmr_kv_box kv, const KMR_KVS *kvs, KMR_KVS *kvo, void *p, const long i) |
static int | kmr_sample_kv (long nsamples, KMR_KVS *kvi, KMR_KVS *kvo) |
static int | kmr_sample_to_array (long nsamples, KMR_KVS *kvi, struct kmr_kv_box *bv) |
static int | kmr_scan_across_ranks_sequentially (KMR_KVS *kvi, KMR_KVS *kvo, KMR_KVS *total, kmr_redfn_t r) |
int | kmr_scan_on_values (KMR_KVS *kvi, KMR_KVS *kvo, KMR_KVS *total, kmr_redfn_t r) |
Prefix-scans every key-value with a reduce-function (non-self-inclusively) and generates the final value in TOTAL (it generates the same value on all the ranks in the TOTAL). More... | |
static int | kmr_scan_sum_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p) |
int | kmr_separate_ntuples (KMR *mr, const struct kmr_kv_box kv[], const long n, struct kmr_ntuple **vv[2], long cnt[2], int markers[2], _Bool disallow_other_entries) |
Separates the n-tuples stored in the value part of KV into the two sets by their marker values. More... | |
int | kmr_shuffle_leveling_pair_count (KMR_KVS *kvi, KMR_KVS *kvo) |
Shuffles key-values so that each rank has approximately the same number of pairs. More... | |
int | kmr_size_ntuple (struct kmr_ntuple *u) |
Returns the storage size of an n-tuple. More... | |
int | kmr_size_ntuple_by_lengths (int n, int len[]) |
Returns the storage size of an n-tuple for N entries with LEN[i] size each. More... | |
int | kmr_sort (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Sorts a key-value stream globally. More... | |
int | kmr_sort_by_one (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Sort by rank0, a degenerated case for small number of keys. More... | |
int | kmr_sort_large (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Sorts a key-value stream by the regular or the random sampling-sort. More... | |
int | kmr_sort_small (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt) |
Sorts a key-value stream, by partitioning to equal ranges. More... | |
static int | kmr_sum_key_counts_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p) |
static int | kmr_tag_value_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i) |
int | kmr_unpairing (KMR_KVS *kvs, KMR_KVS *kvo, struct kmr_option opt) |
Extracts a key-value pair from a pairing in the value part, discarding the original key. More... | |
int | kmr_unpairing_fn (const struct kmr_kv_box kv, const KMR_KVS *kvs, KMR_KVS *kvo, void *p, const long i) |
More Operatons on Key-Value Stream.
Definition in file kmrmoreops.c.
int kmr_find_key | ( | KMR_KVS * | kvi, |
struct kmr_kv_box | ki, | ||
struct kmr_kv_box * | ko | ||
) |
Finds a key-value pair for a key.
It is an error when not exactly one entry is found. It does not consume the input KVS KVI. The returned key-value entry must be used before freeing the input KVS, when it points to an opaque data. It maps internally, so it is slow. It is tricky that the internally created KVS KVS0 points to the key-value area in the input KVS KVI.
Definition at line 43 of file kmrmoreops.c.
int kmr_find_string | ( | KMR_KVS * | kvi, |
const char * | k, | ||
const char ** | vq | ||
) |
Finds the key K in the key-value stream KVS.
It returns a pointer pointing inside the key-value stream. It is an error when not exactly one entry is found. It does not consume the input KVS. It maps internally, so slow.
Definition at line 73 of file kmrmoreops.c.
int kmr_get_element_count | ( | KMR_KVS * | kvs, |
long * | v | ||
) |
Gets the total number of key-value pairs.
It uses replication and reduction.
Definition at line 114 of file kmrmoreops.c.
int kmr_reverse | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Makes a new pair by swapping the key and the value in each pair.
That is, it makes new pairs (v0,k0) from (k0,v0). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.
Definition at line 159 of file kmrmoreops.c.
int kmr_pairing | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Replaces a value part with a key-value pairing.
That is, it makes new pairs (k0,(k0,v0)) from (k0,v0). See kmr_unpairing(). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.
Definition at line 212 of file kmrmoreops.c.
int kmr_unpairing | ( | KMR_KVS * | kvs, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Extracts a key-value pair from a pairing in the value part, discarding the original key.
It is the inverse of kmr_pairing. That is, it makes new pairs (k1,v1) from (k0,(k1,v1)). See kmr_pairing(). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.
Definition at line 234 of file kmrmoreops.c.
int kmr_sort_small | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Sorts a key-value stream, by partitioning to equal ranges.
It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It assumes uniform distribution, and partioning is simply determined by the range of keys (MIN-MAX range is divided by nprocs). Effective-options: NOTHREADING, INSPECT. See struct kmr_option.
Definition at line 388 of file kmrmoreops.c.
int kmr_sort_large | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Sorts a key-value stream by the regular or the random sampling-sort.
It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It can be used for "GraySort". Effective-options: NOTHREADING, INSPECT. See struct kmr_option.
Definition at line 469 of file kmrmoreops.c.
int kmr_sort_by_one | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Sort by rank0, a degenerated case for small number of keys.
It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. Effective-options: INSPECT. See struct kmr_option.
Definition at line 544 of file kmrmoreops.c.
int kmr_sort | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Sorts a key-value stream globally.
It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It selects a sorting routine on the total number of keys. See kmr_sort_large(), kmr_sort_small(), or kmr_sort_by_one(). The results are stored as ascending ranks, thus the rank0 holds the minimum. Effective-options: INSPECT. See struct kmr_option.
Definition at line 575 of file kmrmoreops.c.
int kmr_match | ( | KMR_KVS * | kvi0, |
KMR_KVS * | kvi1, | ||
KMR_KVS * | kvo, | ||
struct kmr_option | opt | ||
) |
Makes key-value pairs as products of the two values in two key-value stream.
It creates a set of key-value pairs (ai,bj) of the pairs (key,ai) from KVS0 and (key,bj) from KVS1 for the matching key. It makes a direct-product of the values when multiple values exist for a matching key. That is, for example, given a set {(k,a0), (k,a1), (k,a2)} in KVS0 and {(k,b3), (k,b4)} in KVS1 for some distinct key, it creates {(a0,b3), (a0,b4), (a1,b3), (a1,b4), (a2,b3), (a2,b4)}. Effective-options: NOTHREADNG. See struct kmr_option.
Definition at line 696 of file kmrmoreops.c.
int kmr_ranking | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
long * | count, | ||
struct kmr_option | opt | ||
) |
Assigns a ranking to key-value pairs, and returns the number of the total elements in COUNT.
Ranking is a position in the key-value stream. That is, for example, given a sequence {(k0,v0), (k1,v1), (k2,v2)}, it creates {(0,(k0,v0)), (1,(k1,v1)), (2,(k2,v2))}. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN. See struct kmr_option.
Definition at line 764 of file kmrmoreops.c.
int kmr_distribute | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
_Bool | cyclic, | ||
struct kmr_option | opt | ||
) |
Distributes key-values so that each rank has approximately the same number of pairs.
It is used to level the load of mapping among ranks by calling it before mapping. kmr_shuffle() can be sufficient to distribute pairs in most cases, but sometimes it results in uneven distribution because shuffling is based on hashing on the keys. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN. See struct kmr_option.
Definition at line 835 of file kmrmoreops.c.
int kmr_scan_on_values | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
KMR_KVS * | total, | ||
kmr_redfn_t | r | ||
) |
Prefix-scans every key-value with a reduce-function (non-self-inclusively) and generates the final value in TOTAL (it generates the same value on all the ranks in the TOTAL).
The key-values are scanned in the order in the KVS as they are concatenated in the rank-order. The reduce-function should be associative and free of side-effects (because it is called multiple times on the same data). The reduce-function should output a single key-value when given any number of key-value pairs. Furthermore, it should output an identity element when it is given zero key-value pairs.
Definition at line 943 of file kmrmoreops.c.
Shuffles key-values so that each rank has approximately the same number of pairs.
It collects the same keys on a rank (cf. kmr_distribute()).
Definition at line 1074 of file kmrmoreops.c.
int kmr_choose_first_part | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
long | n, | ||
struct kmr_option | opt | ||
) |
Chooses the first N entries from a key-value stream KVI.
The option nothreading is implied to keep the ordering. Effective-options: INSPECT, KEEP_OPEN. See struct kmr_option.
Definition at line 1145 of file kmrmoreops.c.
int kmr_map_for_some | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_mapfn_t | m | ||
) |
Maps until some key-value are added.
It stops processing, when the output is non-empty. It does not guarantee singleness. Existence/emptiness be checked by kmr_get_element_count().
Definition at line 1170 of file kmrmoreops.c.
int kmr_reduce_for_some | ( | KMR_KVS * | kvi, |
KMR_KVS * | kvo, | ||
void * | arg, | ||
struct kmr_option | opt, | ||
kmr_redfn_t | r | ||
) |
Reduces until some key-value are added.
It stops processing, when the output is non-empty. It does not guarantee singleness. Existence/emptiness be checked by kmr_get_element_count().
Definition at line 1183 of file kmrmoreops.c.
struct kmr_ntuple_entry kmr_nth_ntuple | ( | struct kmr_ntuple * | u, |
int | nth | ||
) |
Returns an NTH entry of an n-tuple.
It returns a pair of a length and a pointer.
Definition at line 1197 of file kmrmoreops.c.
int kmr_size_ntuple | ( | struct kmr_ntuple * | u | ) |
Returns the storage size of an n-tuple.
Definition at line 1211 of file kmrmoreops.c.
int kmr_size_ntuple_by_lengths | ( | int | n, |
int | len[] | ||
) |
Returns the storage size of an n-tuple for N entries with LEN[i] size each.
Definition at line 1221 of file kmrmoreops.c.
void kmr_reset_ntuple | ( | struct kmr_ntuple * | u, |
int | n, | ||
int | marker | ||
) |
Resets an n-tuple U with N entries and a MARKER.
Definition at line 1234 of file kmrmoreops.c.
int kmr_put_ntuple | ( | KMR * | mr, |
struct kmr_ntuple * | u, | ||
const int | size, | ||
const void * | v, | ||
const int | len | ||
) |
Adds an entry V with LEN in an n-tuple U whose size is limited to SIZE.
An n-tuple should be initialized by kmr_reset_ntuple() first. Note it fills with zeros the gap of the alignment padding, allowing the n-tuples be used as opaque keys.
Definition at line 1252 of file kmrmoreops.c.
int kmr_put_ntuple_long | ( | KMR * | mr, |
struct kmr_ntuple * | u, | ||
const int | sz, | ||
long | v | ||
) |
Adds an integer value in an n-tuple U whose size is limited to SIZE.
See kmr_put_ntuple().
Definition at line 1274 of file kmrmoreops.c.
int kmr_put_ntuple_entry | ( | KMR * | mr, |
struct kmr_ntuple * | u, | ||
const int | sz, | ||
struct kmr_ntuple_entry | e | ||
) |
Adds an n-tuple entry E in an n-tuple U whose size is limited to SIZE.
See kmr_put_ntuple().
Definition at line 1284 of file kmrmoreops.c.
int kmr_add_ntuple | ( | KMR_KVS * | kvo, |
void * | k, | ||
int | klen, | ||
struct kmr_ntuple * | u | ||
) |
Adds an n-tuple U with a given key K and KLEN in a key-value stream KVO.
Definition at line 1295 of file kmrmoreops.c.
int kmr_separate_ntuples | ( | KMR * | mr, |
const struct kmr_kv_box | kv[], | ||
const long | n, | ||
struct kmr_ntuple ** | vv[2], | ||
long | cnt[2], | ||
int | markers[2], | ||
_Bool | disallow_other_entries | ||
) |
Separates the n-tuples stored in the value part of KV into the two sets by their marker values.
It is intended to be used in reduce functions. It separates the n-tuples to the first set by marker=MARKERS[0] and to the second set by marker=MARKERS[1]. It returns two malloced arrays in VV with their sizes in CNT. The arrays VV[0] and VV[1] should be freed by the caller.
Definition at line 1318 of file kmrmoreops.c.
int kmr_product_ntuples | ( | KMR_KVS * | kvo, |
struct kmr_ntuple ** | vv[2], | ||
long | cnt[2], | ||
int | marker, | ||
int | slots[][2], | ||
int | nslots, | ||
int | keys[][2], | ||
int | nkeys | ||
) |
Makes a direct product of the two sets of n-tuples VV[0] and VV[1] with their counts in CNT[0] and CNT[1].
It is intended to be used in reduce functions. The resulting n-tuples are created by SLOTS, which chooses i-th entry of the n-tuples by the SLOTS[i][0]-th entry from the the SLOTS[i][1] set, 0 from the first set and 1 from the second set. The product n-tuples have MARKER and are inserted into KVO under the new key. The new key is selected like values using KEYS[j][0] and KEYS[j][1]. The key is not an n-tuple when NKEYS=1, or an n-tuple of KEYS[j] entries. The n-tuple key has zero as a marker. Note that it does not remove duplicate entries.
Definition at line 1528 of file kmrmoreops.c.
int kmr_histogram_count_by_ranks | ( | KMR_KVS * | kvs, |
long * | frq, | ||
double * | var, | ||
_Bool | rankzeroonly | ||
) |
Fills an integer array FRQ[i] with the count of the elements of each rank.
The array FRQ be as large as nprocs. It also fills VAR[0]=average, VAR[1]=variance, VAR[2]=min, and VAR[3]=max. FRQ or VAR can be null.
Definition at line 1569 of file kmrmoreops.c.