KMR
kmrgenscript.in.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # Copyright (C) 2012-2016 RIKEN AICS
3 
4 ## \file kmrgenscript.in.py KMR-Shell Job-Script Generator.
5 
6 import sys
7 import os
8 from optparse import OptionParser
9 
10 kmrhome = '@KMRHOME@'
11 
12 ## Checks file existence.
13 # If file does not exist, it prints an error message and exit.
14 # @param path file path for check.
15 
16 def checkexist (path) :
17  if not os.path.exists(path) :
18  print 'Error: file or dir "%s" is not exist.' % path
19  sys.exit()
20 
21 ## Checks path is a directory.
22 # If path does not exist, it prints an error message and exit. Or,
23 # it creates a directory with force option.
24 
25 def checkdir (path, force) :
26  if os.path.exists(path) :
27  if not os.path.isdir(path) :
28  print 'Error: "%s" is not directory.' % path
29  sys.exit()
30  else :
31  if force :
32  try :
33  os.mkdir(path)
34  except IOError :
35  print 'Error: could not create "%s".' % path
36  sys.exit()
37  else :
38  print 'Error: directory "%s" is not exist. create it or use -f option.' % path
39  sys.exit()
40 
41 ## Generates job-script for K.
42 # @param node number of node to execute.
43 # @param infile input file pathname.
44 # @param outfile output file prefix.
45 # @param indir directory path name that has input files.
46 # @param outdir directory path name that has result output files.
47 # @param rsctime resource time limit.
48 # @param mapper pathname of mapper program.
49 # @param reducer pathname of reducer program.
50 # @param multi multiple input file for one mapper process.
51 # @param scrfile output script file name.
52 
53 def k_scheduler(node, infile, outfile, indir, outdir, rsctime, mapper, reducer, multi, scrfile) :
54 
55  # Read template file.
56  template = ''
57  if template == '' :
58  try :
59  template = open('kmrgenscript.template').read()
60  except IOError :
61  pass
62  if template == '' :
63  try :
64  dir0 = os.path.dirname(os.path.realpath(__file__))
65  dir1 = os.path.realpath(dir0 + '/../lib')
66  #template = open(dir1 + '/kmrgenscript.template').read()
67  except IOError :
68  pass
69  if template == '' :
70  try :
71  dir2 = os.path.realpath(kmrhome + '/lib')
72  template = open(dir2 + '/kmrgenscript.template').read()
73  except IOError :
74  pass
75  if template == '' :
76  print 'Error: could not open job-script template.'
77  sys.exit()
78 
79  # Stage in section
80  ncol = len(str(node -1))
81  stginstr = ""
82  # Stage in reducer if specified
83  if reducer :
84  stginstr += '#PJM --stgin "rank=* %s %%r:./"' \
85  % ('./' + os.path.basename(reducer))
86  if multi :
87  files = os.listdir(indir)
88  rank = 0
89  for file in files :
90  ipath = os.path.join(indir, file)
91  if len(stginstr) :
92  stginstr += '\n'
93  stginstr += '#PJM --stgin "rank=%s %s %%r:./work/"' % (rank, ipath)
94  rank = rank +1
95  if rank >= node :
96  rank = 0
97  else :
98  if len(stginstr) :
99  stginstr += '\n'
100  ipath = os.path.join(indir, infile)
101  stginstr += '#PJM --stgin "rank=* %s%%0%sr %%r:./input"' % (ipath, ncol)
102 
103  # Stage out section
104  opath = os.path.join(outdir, outfile)
105  stgoutstr = '#PJM --stgout "rank=* %%r:./output.%%r %s.%%0%sr"' % (opath, ncol)
106 
107  # program execute section
108  if not multi :
109  if not reducer :
110  execstr = 'mpiexec -n %s -of-proc output ./kmrshell -m %s ./input'\
111  % (node, './' + os.path.basename(mapper))
112  else :
113  execstr = 'mpiexec -n %s -of-proc output ./kmrshell -m %s -r %s ./input'\
114  % (node, './' + os.path.basename(mapper), './' + os.path.basename(reducer))
115  else :
116  if not reducer :
117  execstr = 'mpiexec -n %s -of-proc output ./kmrshell -m %s ./work'\
118  % (node, './' + os.path.basename(mapper))
119  else :
120  execstr = 'mpiexec -n %s -of-proc output ./kmrshell -m %s -r %s ./work'\
121  % (node, './' + os.path.basename(mapper), './' + os.path.basename(reducer))
122 
123 
124  # replace template keyword using parameter.
125  script = template % {'NODE': node, 'RSCTIME': rsctime, 'MAPPER': mapper, 'DATASTGIN': stginstr, 'DATASTGOUT': stgoutstr, 'EXEC': execstr, 'KMRHOME': kmrhome}
126 
127  # output script
128  if scrfile is None :
129  print script
130  else :
131  out = open(scrfile, "w")
132  print >> out, script
133  out.close()
134 
135 ## Selects job-scheduler.
136 # @param node number of node to execute.
137 # @param infile input file pathname.
138 # @param outfile output file prefix.
139 # @param indir directory path name that has input files.
140 # @param outdir directory path name that has result output files.
141 # @param rsctime resource time limit.
142 # @param mapper pathname of mapper program.
143 # @param reducer pathname of reducer program.
144 # @param multi multiple input file for one mapper process.
145 # @param sched scheduler.
146 # @param scrfile output script file name.
147 
148 def selectscheduler(node, infile, outfile, indir, outdir, rsctime, mapper, reducer, multi, sched, scrfile) :
149  if sched == 'K' :
150  k_scheduler(node, infile, outfile, indir, outdir, rsctime, mapper, reducer, multi, scrfile)
151  # for other schedulers...
152 
153 ## kmrgenscript main routine.
154 # It works on Python 2.4 or later.
155 
156 if __name__ == "__main__" :
157 
158  usage = "usage: %prog [options] -m mapper [-r reducer]"
159  parser = OptionParser(usage)
160 
161  parser.add_option("-e",
162  "--number-of-exec-node",
163  dest="node",
164  type="int",
165  help="number of execute node",
166  metavar="number",
167  default=1)
168 
169  parser.add_option("-p",
170  "--input-file-prefix",
171  dest="infile",
172  type="string",
173  help="input filename prefix",
174  metavar="'string'",
175  default='part')
176 
177  parser.add_option("-o",
178  "--outputfile",
179  dest="outfile",
180  type="string",
181  help="output filename prefix",
182  metavar="'string'",
183  default='output')
184 
185  parser.add_option("-d",
186  "--inputdir",
187  dest="indir",
188  type="string",
189  help="input directory",
190  metavar="'string'",
191  default='./')
192 
193  parser.add_option("-O",
194  "--outputdir",
195  dest="outdir",
196  type="string",
197  help="output directory",
198  metavar="'string'",
199  default='./')
200 
201  parser.add_option("-t",
202  "--resource-time",
203  dest="rsctime",
204  type="string",
205  help="resource time",
206  metavar="'string'",
207  default='00:10:00')
208 
209  parser.add_option("-m",
210  "--mapper",
211  dest="mapper",
212  type="string",
213  help="mapper path",
214  metavar="'string'")
215 
216  parser.add_option("-r",
217  "--reducer",
218  dest="reducer",
219  type="string",
220  help="reducer path",
221  metavar="'string'")
222 
223  parser.add_option("-S",
224  "--scheduler",
225  dest="sched",
226  type="string",
227  help="scheduler (default is 'K')",
228  metavar="'string'",
229  default='K')
230 
231  parser.add_option("-w",
232  "--write-scriptfile",
233  dest="scrfile",
234  type="string",
235  help="script filename",
236  metavar="'string'")
237 
238  parser.add_option("-M",
239  "--multi-input",
240  dest="multi",
241  action="store_true",
242  help="multi input files to one node",
243  default=False)
244 
245  parser.add_option("-f",
246  "--force",
247  dest="force",
248  action="store_true",
249  help="force option",
250  default=False)
251 
252  (options, args) = parser.parse_args()
253 
254  # check parameters.
255 
256  if len(args) > 1 :
257  parser.error("missing parameter")
258  sys.exit()
259 
260  if not options.mapper :
261  print "mapper not specified\n"
262  sys.exit()
263 
264  checkexist(options.indir)
265  checkdir(options.outdir, options.force)
266 
267  if options.multi :
268  if options.indir == "./" :
269  print "-M option needs -d (input directory) option.\n"
270  sys.exit()
271  files = os.listdir(options.indir)
272  if len(files) < options.node :
273  print 'Node number is greater than number of files in %s.\n' % options.indir
274  sys.exit()
275 
276  selectscheduler(options.node, options.infile, options.outfile,
277  options.indir, options.outdir, options.rsctime,
278  options.mapper, options.reducer, options.multi,
279  options.sched, options.scrfile)
280 
281 # Copyright (C) 2012-2016 RIKEN AICS
282 # This library is distributed WITHOUT ANY WARRANTY. This library can be
283 # redistributed and/or modified under the terms of the BSD 2-Clause License.