KMR
kmrfsplit.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # Copyright (C) 2012-2016 RIKEN AICS
3 
4 ## \file kmrfsplit.py KMR-Shell File Splitter.
5 
6 import sys
7 import os
8 import re
9 from optparse import OptionParser
10 
11 ## Write part of the inputfile to outputfile.
12 # @param ipath inputfile path.
13 # @param opath outputfile path.
14 # @startpos start position of part.
15 # @endpos end position of part.
16 
17 def writefile(ipath, opath, startpos, endpos) :
18  bufsize = 0x8000000
19  # read buffer size is 128Mbyte
20  try:
21  fin = open(ipath, "r")
22  except IOError:
23  print 'Error: could not open "%s".' % ipath
24  sys.exit()
25 
26  try:
27  fout = open(opath, "w")
28  except IOError:
29  print 'Error: could not open "%s".' % opath
30  sys.exit()
31 
32  fin.seek(startpos, 0)
33  remain = endpos - startpos
34  while remain > 0 :
35  # bufferd read/write.
36  if bufsize > remain :
37  bufsize = remain
38  buf = fin.read(bufsize)
39  fout.write(buf)
40  remain -= len(buf)
41  fin.close()
42  fout.close()
43 
44 ## Caluculate cutting point of file.
45 # Search separator string in proper position of input file
46 # and return cutting point of file.
47 # If separator string not found, print error message and exit.
48 #
49 # @param ipath input file path.
50 # @param sep separator string. (regular expression)
51 # @param startpos start position of separate.
52 # @param endpos end position of separate.
53 
54 def getcuttingpoint(ipath, sep, startpos, partsize) :
55  bufsize = 0x8000000
56  # read buffer size is 128Mbyte
57  filesize = os.path.getsize(ipath)
58  if startpos + partsize > filesize :
59  # remain size of file is smaller than partition size.
60  endpos = filesize
61  else:
62  endpos = startpos + partsize
63  if endpos + bufsize > filesize :
64  bufsize = filesize - endpos
65  try:
66  f = open(ipath, "r")
67  except IOError:
68  print 'Error: could not open "%s".' % ipath
69  sys.exit()
70  f.seek(endpos, 0)
71  # read size of buffer.
72  buf = f.read(bufsize)
73  f.close()
74  # search separator string in the buffer.
75  p = re.compile(sep)
76  ret = p.search(buf)
77  if ret is None:
78  print "Separator not found in proper position.\n"
79  sys.exit()
80  endpos += ret.end()
81  return endpos
82 
83 ## Split a file using separator string.
84 #
85 # @param nums number of part to split.
86 # @param sep separator string. (regular expression)
87 # @param odir output directory of splitted files.
88 # @param opref output file prefix of splitted files.
89 # @param infile input file path.
90 
91 def splitfile(nums, sep, odir, opref, infile) :
92  startpos = 0
93  filesize = os.path.getsize(infile)
94  partsize = filesize / nums
95 
96  print "Splitting file: ",
97  for i in range(nums-1) :
98  endpos = getcuttingpoint(infile, sep, startpos, partsize)
99 
100  # compose output file name.
101  # ex: partXXXXXX, where XXXXXX is number of part.
102  suffix = "%06d" % i
103  opath = os.path.join(odir, (opref + suffix))
104  # output cutted part of input file.
105  writefile(infile, opath, startpos, endpos)
106  startpos = endpos
107  sys.stdout.write('.')
108  sys.stdout.flush()
109 
110  # output remain part of input file.
111  suffix = "%06d" % (nums-1)
112  opath = os.path.join(odir, (opref + suffix))
113  writefile(infile, opath, startpos, filesize)
114  print "done."
115 
116 ## kmrfsplit main routine.
117 # It works on Python 2.4 or later.
118 
119 if __name__ == "__main__":
120 
121  usage = "usage: %prog [options] inputfile"
122  parser = OptionParser(usage)
123 
124  parser.add_option("-n",
125  "--num-separate",
126  dest="nums",
127  type="int",
128  help="number of file separation",
129  metavar="number",
130  default=1)
131 
132  parser.add_option("-s",
133  "--separator",
134  dest="sep",
135  type="string",
136  help="separator string",
137  metavar="'string'",
138  default='\n')
139 
140  parser.add_option("-d",
141  "--output-directory",
142  dest="odir",
143  type="string",
144  help="output directory",
145  metavar="'string'",
146  default="./")
147 
148  parser.add_option("-p",
149  "--output-file-prefix",
150  dest="opref",
151  type="string",
152  help="output filename prefix",
153  metavar="'string'",
154  default="part")
155 
156  parser.add_option("-f",
157  "--force",
158  dest="force",
159  action="store_true",
160  help="force option",
161  default=False)
162 
163  (options, args) = parser.parse_args()
164 
165  # parameter check.
166  if len(args) <> 1 :
167  parser.error("missing parameter")
168  sys.exit()
169 
170  inputfile = args[0]
171 
172  if not os.path.exists(inputfile) :
173  print 'Error: inputfile %s is not exist.' % inputfile
174  sys.exit()
175 
176  if os.path.exists(options.odir) :
177  if not os.path.isdir(options.odir) :
178  print 'Error: "%s" is not directory.' % options.odir
179  sys.exit()
180  else:
181  if options.force :
182  try:
183  os.mkdir(options.odir)
184  except IOError:
185  print 'Error: could not create "%s".' % options.odir
186  sys.exit()
187  else:
188  print 'Error: directory "%s" is not exist. create it or use -f option.' % options.odir
189  sys.exit()
190 
191  splitfile(options.nums, options.sep, options.odir, options.opref, inputfile)
192 
193 # Copyright (C) 2012-2016 RIKEN AICS
194 # This library is distributed WITHOUT ANY WARRANTY. This library can be
195 # redistributed and/or modified under the terms of the BSD 2-Clause License.