KMR
testckpt2.c
1 /* testckpt2.c (2014-05-28) */
2 
3 /* spawn test program for testing checkpoint restart.
4 
5  Run this program like this.
6 
7  $ KMROPTION=kmrrc mpiexec -machinefile hosts_file -n 3 ./a.out
8 
9  The contents of 'hosts_file' is like this
10  umekoji0.aics27.riken.jp
11  umekoji0.aics27.riken.jp
12  umekoji0.aics27.riken.jp
13  umekoji0.aics27.riken.jp
14  umekoji0.aics27.riken.jp
15  umekoji0.aics27.riken.jp
16  umekoji0.aics27.riken.jp
17  umekoji0.aics27.riken.jp
18  umekoji0.aics27.riken.jp
19  umekoji0.aics27.riken.jp
20  umekoji0.aics27.riken.jp
21  umekoji0.aics27.riken.jp
22  umekoji0.aics27.riken.jp
23 
24  Then press Ctrl-C to interrupt execution of the program.
25  There will be checkpoint directories.
26 
27  Rerun the program like the previous run. The program will be
28  be resumed from the previous state using the checkpoint files.
29 
30  You can change the number of mpi processes between the 1st run
31  and the 2nd run.
32 
33  $ KMROPTION=kmrrc mpiexec -machinefile hosts_file -n 2 ./a.out
34 
35  If you can see the following 10 key-value pairs on screen,
36  the answer is correct.
37 
38  k[8]=0;v[8]=0
39  k[8]=9;v[8]=9
40  k[8]=4;v[8]=4
41  k[8]=1;v[8]=1
42  k[8]=8;v[8]=8
43  k[8]=5;v[8]=5
44  k[8]=6;v[8]=6
45  k[8]=2;v[8]=2
46  k[8]=3;v[8]=3
47  k[8]=7;v[8]=7
48 
49  These key-value pairs can be printed on different runs based on
50  your interrupt timing. Be careful to watch the screen.
51 */
52 
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <sys/types.h>
56 #include <unistd.h>
57 #include <mpi.h>
58 #include "kmr.h"
59 
60 #define NUM_COMMANDS 10
61 
62 static int
63 gen_cmdkvs(const struct kmr_kv_box kv,
64  const KMR_KVS *kvi, KMR_KVS *kvo, void *p, long i_)
65 {
66  char *cmd1 = "maxprocs=1 /bin/sleep 1";
67  char *cmd2 = "maxprocs=1 /bin/sleep 5";
68  char *cmd3 = "maxprocs=1 /bin/sleep 10";
69  int vlen = (int)strlen(cmd2) + 1;
70  for (int i = 0; i < NUM_COMMANDS; i++) {
71  char *cmd = NULL;
72  if (i % 3 == 0) {
73  cmd = cmd1;
74  } else if (i % 3 == 1) {
75  cmd = cmd2;
76  } else {
77  cmd = cmd3;
78  }
79  struct kmr_kv_box nkv = { .klen = sizeof(long),
80  .vlen = vlen * (int)sizeof(char),
81  .k.i = i,
82  .v.p = (void *)cmd };
83  kmr_add_kv(kvo, nkv);
84  }
85  return MPI_SUCCESS;
86 }
87 
88 static int
89 output_result(const struct kmr_kv_box kv,
90  const KMR_KVS *kvi, KMR_KVS *kvo, void *p, long i_)
91 {
92  struct kmr_kv_box nkv = { .klen = sizeof(long),
93  .vlen = sizeof(long),
94  .k.i = kv.k.i,
95  .v.i = kv.k.i };
96  kmr_add_kv(kvo, nkv);
97  return MPI_SUCCESS;
98 }
99 
100 int
101 main(int argc, char **argv)
102 {
103  MPI_Init(&argc, &argv);
104  kmr_init();
105  KMR *mr = kmr_create_context(MPI_COMM_WORLD, MPI_INFO_NULL, 0);
106  int rank;
107  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
108 
109  MPI_Barrier(MPI_COMM_WORLD);
110  if (rank == 0) {
111  fprintf(stderr, "Start\n");
112  }
113  MPI_Barrier(MPI_COMM_WORLD);
114 
115  KMR_KVS *kvs_commands = kmr_create_kvs(mr, KMR_KV_INTEGER, KMR_KV_OPAQUE);
116  int ret = kmr_map_once(kvs_commands, 0, kmr_noopt, 1, gen_cmdkvs);
117  if (ret != MPI_SUCCESS) {
118  MPI_Abort(MPI_COMM_WORLD, 1);
119  }
120  kmr_dump_kvs(kvs_commands, 1);
121 
122  MPI_Barrier(MPI_COMM_WORLD);
123  if (rank == 0) {
124  fprintf(stderr, "MAP_ONCE DONE\n");
125  }
126  MPI_Barrier(MPI_COMM_WORLD);
127 
128  KMR_KVS *kvs_runcmds = kmr_create_kvs(mr, KMR_KV_INTEGER, KMR_KV_OPAQUE);
129  ret = kmr_shuffle(kvs_commands, kvs_runcmds, kmr_noopt);
130  if (ret != MPI_SUCCESS) {
131  MPI_Abort(MPI_COMM_WORLD, 1);
132  }
133  kmr_dump_kvs(kvs_runcmds, 1);
134 
135  MPI_Barrier(MPI_COMM_WORLD);
136  if (rank == 0) {
137  fprintf(stderr, "SHUFFLE DONE\n");
138  }
139  MPI_Barrier(MPI_COMM_WORLD);
140 
141  KMR_KVS *kvs_results = kmr_create_kvs(mr, KMR_KV_INTEGER, KMR_KV_INTEGER);
142  struct kmr_spawn_option sopt_sepsp = { .separator_space = 1 };
143  ret = kmr_map_serial_processes(kvs_runcmds, kvs_results, 0, MPI_INFO_NULL,
144  sopt_sepsp, output_result);
145  kmr_dump_kvs(kvs_results, 1);
146  kmr_free_kvs(kvs_results);
147 
148  MPI_Barrier(MPI_COMM_WORLD);
149  if (rank == 0) {
150  fprintf(stderr, "MAP_SPAWN DONE\n");
151  }
152  MPI_Barrier(MPI_COMM_WORLD);
153 
154  if (rank == 0) {
155  fprintf(stderr, "Finish\n");
156  }
157 
158  kmr_free_context(mr);
159  kmr_fin();
160  MPI_Finalize();
161 }
int kmr_map_serial_processes(KMR_KVS *kvi, KMR_KVS *kvo, void *arg, MPI_Info info, struct kmr_spawn_option opt, kmr_mapfn_t mapfn)
Maps on processes started by MPI_Comm_spawn() to run serial processes.
Definition: kmrmapms.c:1945
Key-Value Stream (abstract).
Definition: kmr.h:587
int kmr_add_kv(KMR_KVS *kvs, const struct kmr_kv_box kv)
Adds a key-value pair.
Definition: kmrbase.c:751
int kmr_map_once(KMR_KVS *kvo, void *arg, struct kmr_option opt, _Bool rank_zero_only, kmr_mapfn_t m)
Maps once.
Definition: kmrbase.c:1402
#define kmr_create_kvs(MR, KF, VF)
Makes a new key-value stream (of type KMR_KVS) with the specified field datatypes.
Definition: kmr.h:71
int kmr_shuffle(KMR_KVS *kvi, KMR_KVS *kvo, struct kmr_option opt)
Shuffles key-value pairs to the appropriate destination ranks.
Definition: kmrbase.c:2036
KMR Context.
Definition: kmr.h:222
int kmr_free_kvs(KMR_KVS *kvs)
Releases a key-value stream (type KMR_KVS).
Definition: kmrbase.c:621
Handy Copy of a Key-Value Field.
Definition: kmr.h:358
int kmr_dump_kvs(KMR_KVS *kvs, int flag)
Dumps contents of a key-value stream to stdout.
Definition: kmrutil.c:1609
Options to Mapping by Spawns.
Definition: kmr.h:662
int kmr_fin(void)
Clears the environment.
Definition: kmrbase.c:124
#define kmr_init()
Sets up the environment.
Definition: kmr.h:747
int kmr_free_context(KMR *mr)
Releases a context created with kmr_create_context().
Definition: kmrbase.c:326
KMR Interface.
KMR * kmr_create_context(const MPI_Comm comm, const MPI_Info conf, const char *name)
Makes a new KMR context (a context has type KMR).
Definition: kmrbase.c:147