KMR
Main Page
Data Types List
Files
File List
File Members
kmrrun
wc.mapper.py
1
#! /usr/bin/python
2
#
3
# wc.mapper.py (2014-10-31)
4
#
5
# The combination of wc.mapper.py, wc.kvgen.sh and wc.reducer.py performs
6
# word counting of files in a specified directory.
7
#
8
# How to run this program.
9
#
10
# 1. Prepare input files
11
# $ mkdir ./inp
12
# $ cp ../file1 ./inp
13
# $ cp ../file2 ./inp
14
#
15
# There are two files so that two mappers will be run to process them.
16
#
17
# 2. Execute kmrrun
18
# $ mpiexec -machinefile machines -np 2 ./kmrrun \
19
# -m ./wc.mapper.py -k ./wc.kvgen.sh -r ./wc.reducer.py ./inp
20
#
21
22
import
sys
23
import
re
24
25
if
__name__ ==
"__main__"
:
26
argv = sys.argv
27
if
(len(argv) != 2):
28
sys.stderr.write(
"Specify an input file.\n"
)
29
sys.exit(1)
30
31
rf = open(argv[1])
32
wf = open(argv[1] +
".out"
,
'w'
)
33
line = rf.readline()
34
while
line:
35
words = re.split(
r'[\s/]+'
, line)
36
for
w
in
words:
37
if
(w ==
''
):
38
continue
39
wf.write(
"%s 1\n"
% (w))
40
line = rf.readline()
41
rf.close()
42
wf.close()
Generated by
1.8.11