1 #! /usr/bin/env python3
3 # This file is under GNU General Public License 3.0
7 from argparse import ArgumentParser
8 from fileinput import FileInput, hook_encoded
9 import re, itertools, sys
11 space = re.compile(r'^\s')
13 p = ArgumentParser(description="re-write re-order csv and strip lines with too long words")
14 p.add_argument('--input', '-i', type=str, default="somefile.cvs",
16 p.add_argument('--length', '-l', type=int, default=100,
17 help='min word length to stripp a line')
22 from icu import UnicodeString, Locale
24 print("warning: PyICU not installed, using fallback", file=sys.stderr)
26 locale = Locale("utf-8")
30 for line in FileInput(
32 openhook=hook_encoded("utf-8")
34 if not space.match(line)
37 _some = (line for line in _all if len(line[2]) < args.length)
39 for i, w in enumerate(_some):
40 print("{l},{i},{w},0".format(l=w[0], i=i, w=w[2]))