KMR
wc.reducer.py
1 #! /usr/bin/python
2 #
3 # wc.reducer.py (2014-10-31)
4 #
5 # The combination of wc.mapper.py, wc.kvgen.sh and wc.reducer.py performs
6 # word counting of files in a specified directory.
7 #
8 # How to run this program.
9 #
10 # 1. Prepare input files
11 # $ mkdir ./inp
12 # $ cp ../file1 ./inp
13 # $ cp ../file2 ./inp
14 #
15 # There are two files so that two mappers will be run to process them.
16 #
17 # 2. Execute kmrrun
18 # $ mpiexec -machinefile machines -np 2 ./kmrrun \
19 # -m ./wc.mapper.py -k ./wc.kvgen.sh -r ./wc.reducer.py ./inp
20 #
21 
22 import sys
23 import os
24 
25 if __name__ == "__main__":
26  argv = sys.argv
27  if (len(argv) != 2):
28  sys.stderr.write("Specify an input file.\n")
29  sys.exit(1)
30 
31  key = None
32  count = 0
33  rf = open(argv[1])
34  line = rf.readline()
35  while line:
36  (k,ns) = line.split()
37  if (key == None):
38  key = k
39  n = int(ns)
40  count += n
41  line = rf.readline()
42  rf.close()
43 
44  os.remove(argv[1])
45  print "%s %d" % (key, count)