KMR
wc.mapper.py
1 #! /usr/bin/python
2 #
3 # wc.mapper.py (2014-10-31)
4 #
5 # The combination of wc.mapper.py, wc.kvgen.sh and wc.reducer.py performs
6 # word counting of files in a specified directory.
7 #
8 # How to run this program.
9 #
10 # 1. Prepare input files
11 # $ mkdir ./inp
12 # $ cp ../file1 ./inp
13 # $ cp ../file2 ./inp
14 #
15 # There are two files so that two mappers will be run to process them.
16 #
17 # 2. Execute kmrrun
18 # $ mpiexec -machinefile machines -np 2 ./kmrrun \
19 # -m ./wc.mapper.py -k ./wc.kvgen.sh -r ./wc.reducer.py ./inp
20 #
21 
22 import sys
23 import re
24 
25 if __name__ == "__main__":
26  argv = sys.argv
27  if (len(argv) != 2):
28  sys.stderr.write("Specify an input file.\n")
29  sys.exit(1)
30 
31  rf = open(argv[1])
32  wf = open(argv[1] + ".out", 'w')
33  line = rf.readline()
34  while line:
35  words = re.split(r'[\s/]+', line)
36  for w in words:
37  if (w == ''):
38  continue
39  wf.write("%s 1\n" % (w))
40  line = rf.readline()
41  rf.close()
42  wf.close()