KMR
Classes | Macros | Functions
kmrmoreops.c File Reference

More Operatons on Key-Value Stream. More...

#include <mpi.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <errno.h>
#include <assert.h>
#include "kmr.h"
#include "kmrimpl.h"

Go to the source code of this file.

Classes

struct  kmr_ranking_to_rank
 

Macros

#define MAX(a, b)   (((a)>(b))?(a):(b))
 
#define MIN(a, b)   (((a)<(b))?(a):(b))
 
#define NEVERHERE   0
 

Functions

int kmr_add_ntuple (KMR_KVS *kvo, void *k, int klen, struct kmr_ntuple *u)
 Adds an n-tuple U with a given key K and KLEN in a key-value stream KVO. More...
 
static int kmr_add_pairing_under_key (KMR_KVS *kvo, int klen, union kmr_unit_sized k, const struct kmr_kv_box kv, enum kmr_kv_field keyf, enum kmr_kv_field valf)
 
int kmr_choose_first_part (KMR_KVS *kvi, KMR_KVS *kvo, long n, struct kmr_option opt)
 Chooses the first N entries from a key-value stream KVI. More...
 
static int kmr_count_key_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p)
 
static int kmr_count_keys (KMR_KVS *kvi, KMR_KVS *kvo)
 
int kmr_distribute (KMR_KVS *kvi, KMR_KVS *kvo, _Bool cyclic, struct kmr_option opt)
 Distributes key-values so that each rank has approximately the same number of pairs. More...
 
int kmr_find_key (KMR_KVS *kvi, struct kmr_kv_box ki, struct kmr_kv_box *ko)
 Finds a key-value pair for a key. More...
 
static int kmr_find_key_fn (const struct kmr_kv_box kv, const KMR_KVS *kvs, KMR_KVS *kvo, void *p, const long i)
 
int kmr_find_string (KMR_KVS *kvi, const char *k, const char **vq)
 Finds the key K in the key-value stream KVS. More...
 
static int kmr_first_n_elements_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i)
 
int kmr_get_element_count (KMR_KVS *kvs, long *v)
 Gets the total number of key-value pairs. More...
 
static int kmr_get_element_count_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p)
 
static int kmr_get_integer_values_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p)
 
int kmr_histogram_count_by_ranks (KMR_KVS *kvs, long *frq, double *var, _Bool rankzeroonly)
 Fills an integer array FRQ[i] with the count of the elements of each rank. More...
 
static int kmr_make_product_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvi, KMR_KVS *kvo, void *p)
 
int kmr_map_for_some (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_mapfn_t m)
 Maps until some key-value are added. More...
 
int kmr_match (KMR_KVS *kvi0, KMR_KVS *kvi1, KMR_KVS *kvo, struct kmr_option opt)
 Makes key-value pairs as products of the two values in two key-value stream. More...
 
static int kmr_minmax2_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p)
 
struct kmr_ntuple_entry kmr_nth_ntuple (struct kmr_ntuple *u, int nth)
 Returns an NTH entry of an n-tuple. More...
 
int kmr_pairing (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Replaces a value part with a key-value pairing. More...
 
int kmr_pairing_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i)
 
static int kmr_partition_by_count_fn (const struct kmr_kv_box kv, const KMR_KVS *kvs, KMR_KVS *kvo, void *p, const long i)
 
static int kmr_partition_by_ranking_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i)
 
int kmr_product_ntuples (KMR_KVS *kvo, struct kmr_ntuple **vv[2], long cnt[2], int marker, int slots[][2], int nslots, int keys[][2], int nkeys)
 Makes a direct product of the two sets of n-tuples VV[0] and VV[1] with their counts in CNT[0] and CNT[1]. More...
 
static int kmr_product_ntuples_by_space (KMR_KVS *kvo, struct kmr_ntuple **vv[2], long cnt[2], int marker, int slots[][2], int nslots, int keys[][2], int nkeys, _Bool byspace)
 
static int kmr_put_integer_to_array_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i)
 
int kmr_put_ntuple (KMR *mr, struct kmr_ntuple *u, const int size, const void *v, const int len)
 Adds an entry V with LEN in an n-tuple U whose size is limited to SIZE. More...
 
int kmr_put_ntuple_entry (KMR *mr, struct kmr_ntuple *u, const int sz, struct kmr_ntuple_entry e)
 Adds an n-tuple entry E in an n-tuple U whose size is limited to SIZE. More...
 
int kmr_put_ntuple_long (KMR *mr, struct kmr_ntuple *u, const int sz, long v)
 Adds an integer value in an n-tuple U whose size is limited to SIZE. More...
 
static int kmr_rank_for_sort (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i)
 
int kmr_ranking (KMR_KVS *kvi, KMR_KVS *kvo, long *count, struct kmr_option opt)
 Assigns a ranking to key-value pairs, and returns the number of the total elements in COUNT. More...
 
static int kmr_ranking_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i)
 
static int kmr_ranking_to_rank_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i)
 
int kmr_reduce_for_some (KMR_KVS *kvi, KMR_KVS *kvo, void *arg, struct kmr_option opt, kmr_redfn_t r)
 Reduces until some key-value are added. More...
 
void kmr_reset_ntuple (struct kmr_ntuple *u, int n, int marker)
 Resets an n-tuple U with N entries and a MARKER. More...
 
int kmr_reverse (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Makes a new pair by swapping the key and the value in each pair. More...
 
int kmr_reverse_fn (const struct kmr_kv_box kv, const KMR_KVS *kvs, KMR_KVS *kvo, void *p, const long i)
 
static int kmr_sample_kv (long nsamples, KMR_KVS *kvi, KMR_KVS *kvo)
 
static int kmr_sample_to_array (long nsamples, KMR_KVS *kvi, struct kmr_kv_box *bv)
 
static int kmr_scan_across_ranks_sequentially (KMR_KVS *kvi, KMR_KVS *kvo, KMR_KVS *total, kmr_redfn_t r)
 
int kmr_scan_on_values (KMR_KVS *kvi, KMR_KVS *kvo, KMR_KVS *total, kmr_redfn_t r)
 Prefix-scans every key-value with a reduce-function (non-self-inclusively) and generates the final value in TOTAL (it generates the same value on all the ranks in the TOTAL). More...
 
static int kmr_scan_sum_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p)
 
int kmr_separate_ntuples (KMR *mr, const struct kmr_kv_box kv[], const long n, struct kmr_ntuple **vv[2], long cnt[2], int markers[2], _Bool disallow_other_entries)
 Separates the n-tuples stored in the value part of KV into the two sets by their marker values. More...
 
int kmr_shuffle_leveling_pair_count (KMR_KVS *kvi, KMR_KVS *kvo)
 Shuffles key-values so that each rank has approximately the same number of pairs. More...
 
int kmr_size_ntuple (struct kmr_ntuple *u)
 Returns the storage size of an n-tuple. More...
 
int kmr_size_ntuple_by_lengths (int n, int len[])
 Returns the storage size of an n-tuple for N entries with LEN[i] size each. More...
 
int kmr_sort (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Sorts a key-value stream globally. More...
 
int kmr_sort_by_one (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Sort by rank0, a degenerated case for small number of keys. More...
 
int kmr_sort_large (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Sorts a key-value stream by the regular or the random sampling-sort. More...
 
int kmr_sort_small (KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
 Sorts a key-value stream, by partitioning to equal ranges. More...
 
static int kmr_sum_key_counts_fn (const struct kmr_kv_box kv[], const long n, const KMR_KVS *kvs, KMR_KVS *kvo, void *p)
 
static int kmr_tag_value_fn (const struct kmr_kv_box kv, const KMR_KVS *kvi, KMR_KVS *kvo, void *p, const long i)
 
int kmr_unpairing (KMR_KVS *kvs, KMR_KVS *kvo, struct kmr_option opt)
 Extracts a key-value pair from a pairing in the value part, discarding the original key. More...
 
int kmr_unpairing_fn (const struct kmr_kv_box kv, const KMR_KVS *kvs, KMR_KVS *kvo, void *p, const long i)
 

Detailed Description

More Operatons on Key-Value Stream.

Definition in file kmrmoreops.c.

Function Documentation

int kmr_find_key ( KMR_KVS kvi,
struct kmr_kv_box  ki,
struct kmr_kv_box ko 
)

Finds a key-value pair for a key.

It is an error when not exactly one entry is found. It does not consume the input KVS KVI. The returned key-value entry must be used before freeing the input KVS, when it points to an opaque data. It maps internally, so it is slow. It is tricky that the internally created KVS KVS0 points to the key-value area in the input KVS KVI.

Definition at line 43 of file kmrmoreops.c.

int kmr_find_string ( KMR_KVS kvi,
const char *  k,
const char **  vq 
)

Finds the key K in the key-value stream KVS.

It returns a pointer pointing inside the key-value stream. It is an error when not exactly one entry is found. It does not consume the input KVS. It maps internally, so slow.

Definition at line 73 of file kmrmoreops.c.

int kmr_get_element_count ( KMR_KVS kvs,
long *  v 
)

Gets the total number of key-value pairs.

It uses replication and reduction.

Definition at line 114 of file kmrmoreops.c.

int kmr_reverse ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Makes a new pair by swapping the key and the value in each pair.

That is, it makes new pairs (v0,k0) from (k0,v0). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.

Definition at line 159 of file kmrmoreops.c.

int kmr_pairing ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Replaces a value part with a key-value pairing.

That is, it makes new pairs (k0,(k0,v0)) from (k0,v0). See kmr_unpairing(). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.

Definition at line 212 of file kmrmoreops.c.

int kmr_unpairing ( KMR_KVS kvs,
KMR_KVS kvo,
struct kmr_option  opt 
)

Extracts a key-value pair from a pairing in the value part, discarding the original key.

It is the inverse of kmr_pairing. That is, it makes new pairs (k1,v1) from (k0,(k1,v1)). See kmr_pairing(). This is a simple mapper. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN, TAKE_CKPT. See struct kmr_option.

Definition at line 234 of file kmrmoreops.c.

int kmr_sort_small ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Sorts a key-value stream, by partitioning to equal ranges.

It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It assumes uniform distribution, and partioning is simply determined by the range of keys (MIN-MAX range is divided by nprocs). Effective-options: NOTHREADING, INSPECT. See struct kmr_option.

Definition at line 388 of file kmrmoreops.c.

int kmr_sort_large ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Sorts a key-value stream by the regular or the random sampling-sort.

It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It can be used for "GraySort". Effective-options: NOTHREADING, INSPECT. See struct kmr_option.

Definition at line 469 of file kmrmoreops.c.

int kmr_sort_by_one ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Sort by rank0, a degenerated case for small number of keys.

It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. Effective-options: INSPECT. See struct kmr_option.

Definition at line 544 of file kmrmoreops.c.

int kmr_sort ( KMR_KVS kvi,
KMR_KVS kvo,
struct kmr_option  opt 
)

Sorts a key-value stream globally.

It is NOT-STABLE due to quick-sort used inside. It consumes an input key-value stream unless INSPECT is specified. It selects a sorting routine on the total number of keys. See kmr_sort_large(), kmr_sort_small(), or kmr_sort_by_one(). The results are stored as ascending ranks, thus the rank0 holds the minimum. Effective-options: INSPECT. See struct kmr_option.

Definition at line 575 of file kmrmoreops.c.

int kmr_match ( KMR_KVS kvi0,
KMR_KVS kvi1,
KMR_KVS kvo,
struct kmr_option  opt 
)

Makes key-value pairs as products of the two values in two key-value stream.

It creates a set of key-value pairs (ai,bj) of the pairs (key,ai) from KVS0 and (key,bj) from KVS1 for the matching key. It makes a direct-product of the values when multiple values exist for a matching key. That is, for example, given a set {(k,a0), (k,a1), (k,a2)} in KVS0 and {(k,b3), (k,b4)} in KVS1 for some distinct key, it creates {(a0,b3), (a0,b4), (a1,b3), (a1,b4), (a2,b3), (a2,b4)}. Effective-options: NOTHREADNG. See struct kmr_option.

Definition at line 696 of file kmrmoreops.c.

int kmr_ranking ( KMR_KVS kvi,
KMR_KVS kvo,
long *  count,
struct kmr_option  opt 
)

Assigns a ranking to key-value pairs, and returns the number of the total elements in COUNT.

Ranking is a position in the key-value stream. That is, for example, given a sequence {(k0,v0), (k1,v1), (k2,v2)}, it creates {(0,(k0,v0)), (1,(k1,v1)), (2,(k2,v2))}. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN. See struct kmr_option.

Definition at line 764 of file kmrmoreops.c.

int kmr_distribute ( KMR_KVS kvi,
KMR_KVS kvo,
_Bool  cyclic,
struct kmr_option  opt 
)

Distributes key-values so that each rank has approximately the same number of pairs.

It is used to level the load of mapping among ranks by calling it before mapping. kmr_shuffle() can be sufficient to distribute pairs in most cases, but sometimes it results in uneven distribution because shuffling is based on hashing on the keys. Effective-options: NOTHREADING, INSPECT, KEEP_OPEN. See struct kmr_option.

Definition at line 835 of file kmrmoreops.c.

int kmr_scan_on_values ( KMR_KVS kvi,
KMR_KVS kvo,
KMR_KVS total,
kmr_redfn_t  r 
)

Prefix-scans every key-value with a reduce-function (non-self-inclusively) and generates the final value in TOTAL (it generates the same value on all the ranks in the TOTAL).

The key-values are scanned in the order in the KVS as they are concatenated in the rank-order. The reduce-function should be associative and free of side-effects (because it is called multiple times on the same data). The reduce-function should output a single key-value when given any number of key-value pairs. Furthermore, it should output an identity element when it is given zero key-value pairs.

Definition at line 943 of file kmrmoreops.c.

int kmr_shuffle_leveling_pair_count ( KMR_KVS kvi,
KMR_KVS kvo 
)

Shuffles key-values so that each rank has approximately the same number of pairs.

It collects the same keys on a rank (cf. kmr_distribute()).

Definition at line 1074 of file kmrmoreops.c.

int kmr_choose_first_part ( KMR_KVS kvi,
KMR_KVS kvo,
long  n,
struct kmr_option  opt 
)

Chooses the first N entries from a key-value stream KVI.

The option nothreading is implied to keep the ordering. Effective-options: INSPECT, KEEP_OPEN. See struct kmr_option.

Definition at line 1145 of file kmrmoreops.c.

int kmr_map_for_some ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_mapfn_t  m 
)

Maps until some key-value are added.

It stops processing, when the output is non-empty. It does not guarantee singleness. Existence/emptiness be checked by kmr_get_element_count().

Definition at line 1170 of file kmrmoreops.c.

int kmr_reduce_for_some ( KMR_KVS kvi,
KMR_KVS kvo,
void *  arg,
struct kmr_option  opt,
kmr_redfn_t  r 
)

Reduces until some key-value are added.

It stops processing, when the output is non-empty. It does not guarantee singleness. Existence/emptiness be checked by kmr_get_element_count().

Definition at line 1183 of file kmrmoreops.c.

struct kmr_ntuple_entry kmr_nth_ntuple ( struct kmr_ntuple u,
int  nth 
)

Returns an NTH entry of an n-tuple.

It returns a pair of a length and a pointer.

Definition at line 1197 of file kmrmoreops.c.

int kmr_size_ntuple ( struct kmr_ntuple u)

Returns the storage size of an n-tuple.

Definition at line 1211 of file kmrmoreops.c.

int kmr_size_ntuple_by_lengths ( int  n,
int  len[] 
)

Returns the storage size of an n-tuple for N entries with LEN[i] size each.

Definition at line 1221 of file kmrmoreops.c.

void kmr_reset_ntuple ( struct kmr_ntuple u,
int  n,
int  marker 
)

Resets an n-tuple U with N entries and a MARKER.

Definition at line 1234 of file kmrmoreops.c.

int kmr_put_ntuple ( KMR mr,
struct kmr_ntuple u,
const int  size,
const void *  v,
const int  len 
)

Adds an entry V with LEN in an n-tuple U whose size is limited to SIZE.

An n-tuple should be initialized by kmr_reset_ntuple() first. Note it fills with zeros the gap of the alignment padding, allowing the n-tuples be used as opaque keys.

Definition at line 1252 of file kmrmoreops.c.

int kmr_put_ntuple_long ( KMR mr,
struct kmr_ntuple u,
const int  sz,
long  v 
)

Adds an integer value in an n-tuple U whose size is limited to SIZE.

See kmr_put_ntuple().

Definition at line 1274 of file kmrmoreops.c.

int kmr_put_ntuple_entry ( KMR mr,
struct kmr_ntuple u,
const int  sz,
struct kmr_ntuple_entry  e 
)

Adds an n-tuple entry E in an n-tuple U whose size is limited to SIZE.

See kmr_put_ntuple().

Definition at line 1284 of file kmrmoreops.c.

int kmr_add_ntuple ( KMR_KVS kvo,
void *  k,
int  klen,
struct kmr_ntuple u 
)

Adds an n-tuple U with a given key K and KLEN in a key-value stream KVO.

Definition at line 1295 of file kmrmoreops.c.

int kmr_separate_ntuples ( KMR mr,
const struct kmr_kv_box  kv[],
const long  n,
struct kmr_ntuple **  vv[2],
long  cnt[2],
int  markers[2],
_Bool  disallow_other_entries 
)

Separates the n-tuples stored in the value part of KV into the two sets by their marker values.

It is intended to be used in reduce functions. It separates the n-tuples to the first set by marker=MARKERS[0] and to the second set by marker=MARKERS[1]. It returns two malloced arrays in VV with their sizes in CNT. The arrays VV[0] and VV[1] should be freed by the caller.

Definition at line 1318 of file kmrmoreops.c.

int kmr_product_ntuples ( KMR_KVS kvo,
struct kmr_ntuple **  vv[2],
long  cnt[2],
int  marker,
int  slots[][2],
int  nslots,
int  keys[][2],
int  nkeys 
)

Makes a direct product of the two sets of n-tuples VV[0] and VV[1] with their counts in CNT[0] and CNT[1].

It is intended to be used in reduce functions. The resulting n-tuples are created by SLOTS, which chooses i-th entry of the n-tuples by the SLOTS[i][0]-th entry from the the SLOTS[i][1] set, 0 from the first set and 1 from the second set. The product n-tuples have MARKER and are inserted into KVO under the new key. The new key is selected like values using KEYS[j][0] and KEYS[j][1]. The key is not an n-tuple when NKEYS=1, or an n-tuple of KEYS[j] entries. The n-tuple key has zero as a marker. Note that it does not remove duplicate entries.

Definition at line 1528 of file kmrmoreops.c.

int kmr_histogram_count_by_ranks ( KMR_KVS kvs,
long *  frq,
double *  var,
_Bool  rankzeroonly 
)

Fills an integer array FRQ[i] with the count of the elements of each rank.

The array FRQ be as large as nprocs. It also fills VAR[0]=average, VAR[1]=variance, VAR[2]=min, and VAR[3]=max. FRQ or VAR can be null.

Definition at line 1569 of file kmrmoreops.c.