KMR
testckpt4.c
1 /* testckpt4 (2015-03-24) */
2 
3 /* spawn test program for testing checkpoint restart in 'selective' mode.
4 
5  Run this program like this.
6 
7  $ KMROPTION=kmrrc mpiexec -machinefile hosts_file -n 3 ./a.out
8 
9  The contents of 'hosts_file' is like this
10  umekoji0.aics27.riken.jp
11  umekoji0.aics27.riken.jp
12  umekoji0.aics27.riken.jp
13  umekoji0.aics27.riken.jp
14  umekoji0.aics27.riken.jp
15  umekoji0.aics27.riken.jp
16  umekoji0.aics27.riken.jp
17  umekoji0.aics27.riken.jp
18  umekoji0.aics27.riken.jp
19  umekoji0.aics27.riken.jp
20  umekoji0.aics27.riken.jp
21  umekoji0.aics27.riken.jp
22  umekoji0.aics27.riken.jp
23 
24  Then press Ctrl-C to interrupt execution of the program.
25  There will be checkpoint directories.
26 
27  Rerun the program like the previous run. The program will be
28  be resumed from the previous state using the checkpoint files.
29 
30  If you can see the following 10 key-value pairs on screen,
31  the answer is correct.
32 
33  k[8]=0;v[8]=0
34  k[8]=9;v[8]=9
35  k[8]=4;v[8]=4
36  k[8]=1;v[8]=1
37  k[8]=8;v[8]=8
38  k[8]=5;v[8]=5
39  k[8]=6;v[8]=6
40  k[8]=2;v[8]=2
41  k[8]=3;v[8]=3
42  k[8]=7;v[8]=7
43 
44  These key-value pairs can be printed on different runs based on
45  your interrupt timing. Be careful to watch the screen.
46 */
47 
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <sys/types.h>
51 #include <unistd.h>
52 #include <mpi.h>
53 #include "kmr.h"
54 
55 #define NUM_COMMANDS 10
56 
57 static int
58 gen_cmdkvs(const struct kmr_kv_box kv,
59  const KMR_KVS *kvi, KMR_KVS *kvo, void *p, long i_)
60 {
61  char *cmd1 = "maxprocs=1 /bin/sleep 1";
62  char *cmd2 = "maxprocs=1 /bin/sleep 5";
63  char *cmd3 = "maxprocs=1 /bin/sleep 10";
64  int vlen = (int)strlen(cmd2) + 1;
65  for (int i = 0; i < NUM_COMMANDS; i++) {
66  char *cmd = NULL;
67  if (i % 3 == 0) {
68  cmd = cmd1;
69  } else if (i % 3 == 1) {
70  cmd = cmd2;
71  } else {
72  cmd = cmd3;
73  }
74  struct kmr_kv_box nkv = { .klen = sizeof(long),
75  .vlen = vlen * (int)sizeof(char),
76  .k.i = i,
77  .v.p = (void *)cmd };
78  kmr_add_kv(kvo, nkv);
79  }
80  return MPI_SUCCESS;
81 }
82 
83 static int
84 output_result(const struct kmr_kv_box kv,
85  const KMR_KVS *kvi, KMR_KVS *kvo, void *p, long i_)
86 {
87  struct kmr_kv_box nkv = { .klen = sizeof(long),
88  .vlen = sizeof(long),
89  .k.i = kv.k.i,
90  .v.i = kv.k.i };
91  kmr_add_kv(kvo, nkv);
92  return MPI_SUCCESS;
93 }
94 
95 int
96 main(int argc, char **argv)
97 {
98  MPI_Init(&argc, &argv);
99  kmr_init();
100  KMR *mr = kmr_create_context(MPI_COMM_WORLD, MPI_INFO_NULL, 0);
101  int rank;
102  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
103 
104  MPI_Barrier(MPI_COMM_WORLD);
105  if (rank == 0) {
106  fprintf(stderr, "Start\n");
107  }
108  MPI_Barrier(MPI_COMM_WORLD);
109 
110  KMR_KVS *kvs_commands = kmr_create_kvs(mr, KMR_KV_INTEGER, KMR_KV_OPAQUE);
111  int ret = kmr_map_once(kvs_commands, 0, kmr_noopt, 1, gen_cmdkvs);
112  if (ret != MPI_SUCCESS) {
113  MPI_Abort(MPI_COMM_WORLD, 1);
114  }
115  kmr_dump_kvs(kvs_commands, 1);
116 
117  MPI_Barrier(MPI_COMM_WORLD);
118  if (rank == 0) {
119  fprintf(stderr, "MAP_ONCE DONE\n");
120  }
121  MPI_Barrier(MPI_COMM_WORLD);
122 
123  KMR_KVS *kvs_runcmds = kmr_create_kvs(mr, KMR_KV_INTEGER, KMR_KV_OPAQUE);
124  ret = kmr_shuffle(kvs_commands, kvs_runcmds, kmr_noopt);
125  if (ret != MPI_SUCCESS) {
126  MPI_Abort(MPI_COMM_WORLD, 1);
127  }
128  kmr_dump_kvs(kvs_runcmds, 1);
129 
130  MPI_Barrier(MPI_COMM_WORLD);
131  if (rank == 0) {
132  fprintf(stderr, "SHUFFLE DONE\n");
133  }
134  MPI_Barrier(MPI_COMM_WORLD);
135 
136  KMR_KVS *kvs_results = kmr_create_kvs(mr, KMR_KV_INTEGER, KMR_KV_INTEGER);
137  struct kmr_spawn_option sopt_sepsp = { .separator_space = 1,
138  .take_ckpt = 1 };
139  ret = kmr_map_serial_processes(kvs_runcmds, kvs_results, 0, MPI_INFO_NULL,
140  sopt_sepsp, output_result);
141  kmr_dump_kvs(kvs_results, 1);
142  kmr_free_kvs(kvs_results);
143 
144  MPI_Barrier(MPI_COMM_WORLD);
145  if (rank == 0) {
146  fprintf(stderr, "MAP_SPAWN DONE\n");
147  }
148  MPI_Barrier(MPI_COMM_WORLD);
149 
150  if (rank == 0) {
151  fprintf(stderr, "Finish\n");
152  }
153 
154  kmr_free_context(mr);
155  kmr_fin();
156  MPI_Finalize();
157 }
int kmr_map_serial_processes(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
Maps on processes started by MPI_Comm_spawn() to run serial processes.
Definition: kmrmapms.c:1945
Key-Value Stream (abstract).
Definition: kmr.h:587
int kmr_add_kv(KMR_KVS *kvs, const struct kmr_kv_box kv)
Adds a key-value pair.
Definition: kmrbase.c:751
int kmr_map_once(KMR_KVS *kvo, void *arg, struct kmr_option opt, _Bool rank_zero_only, kmr_mapfn_t m)
Maps once.
Definition: kmrbase.c:1402
#define kmr_create_kvs(MR, KF, VF)
Makes a new key-value stream (of type KMR_KVS) with the specified field datatypes.
Definition: kmr.h:71
int kmr_shuffle(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Shuffles key-value pairs to the appropriate destination ranks.
Definition: kmrbase.c:2036
KMR Context.
Definition: kmr.h:222
int kmr_free_kvs(KMR_KVS *kvs)
Releases a key-value stream (type KMR_KVS).
Definition: kmrbase.c:621
Handy Copy of a Key-Value Field.
Definition: kmr.h:358
int kmr_dump_kvs(KMR_KVS *kvs, int flag)
Dumps contents of a key-value stream to stdout.
Definition: kmrutil.c:1609
Options to Mapping by Spawns.
Definition: kmr.h:662
int kmr_fin(void)
Clears the environment.
Definition: kmrbase.c:124
#define kmr_init()
Sets up the environment.
Definition: kmr.h:747
int kmr_free_context(KMR *mr)
Releases a context created with kmr_create_context().
Definition: kmrbase.c:326
KMR Interface.
KMR * kmr_create_context(const MPI_Comm comm, const MPI_Info conf, const char *name)
Makes a new KMR context (a context has type KMR).
Definition: kmrbase.c:147