Edouard@788: #! /usr/bin/env python3 Edouard@788: vb@1513: # This file is under GNU General Public License 3.0 vb@1513: # see LICENSE.txt vb@1513: vb@1513: Edouard@788: from argparse import ArgumentParser Edouard@788: from fileinput import FileInput, hook_encoded Edouard@788: import re, itertools, sys Edouard@788: Edouard@788: space = re.compile(r'^\s') Edouard@788: Edouard@788: p = ArgumentParser(description="re-write re-order csv and strip lines with too long words") Edouard@788: p.add_argument('--input', '-i', type=str, default="somefile.cvs", Edouard@788: help='input file') Edouard@788: p.add_argument('--length', '-l', type=int, default=100, Edouard@788: help='min word length to stripp a line') Edouard@788: Edouard@788: args = p.parse_args() Edouard@788: Edouard@788: try: Edouard@788: from icu import UnicodeString, Locale Edouard@788: except ImportError: Edouard@788: print("warning: PyICU not installed, using fallback", file=sys.stderr) Edouard@788: else: Edouard@788: locale = Locale("utf-8") Edouard@788: Edouard@788: _all = ( Edouard@788: line.split(',') Edouard@788: for line in FileInput( Edouard@788: args.input, Edouard@788: openhook=hook_encoded("utf-8") Edouard@788: ) Edouard@788: if not space.match(line) Edouard@788: ) Edouard@788: edouard@1478: _some = (line for line in _all if len(line[2]) < args.length) Edouard@788: edouard@1478: for i, w in enumerate(_some): Edouard@788: print("{l},{i},{w},0".format(l=w[0], i=i, w=w[2]))