Edouard@788
|
1 |
#! /usr/bin/env python3
|
Edouard@788
|
2 |
|
vb@1517
|
3 |
# This file is under GNU General Public License 3.0
|
vb@1517
|
4 |
# see LICENSE.txt
|
vb@1517
|
5 |
|
vb@1517
|
6 |
|
Edouard@788
|
7 |
from argparse import ArgumentParser
|
Edouard@788
|
8 |
from fileinput import FileInput, hook_encoded
|
Edouard@788
|
9 |
import re, itertools, sys
|
Edouard@788
|
10 |
|
Edouard@788
|
11 |
space = re.compile(r'^\s')
|
Edouard@788
|
12 |
|
Edouard@788
|
13 |
p = ArgumentParser(description="re-write re-order csv and strip lines with too long words")
|
Edouard@788
|
14 |
p.add_argument('--input', '-i', type=str, default="somefile.cvs",
|
Edouard@788
|
15 |
help='input file')
|
Edouard@788
|
16 |
p.add_argument('--length', '-l', type=int, default=100,
|
Edouard@788
|
17 |
help='min word length to stripp a line')
|
Edouard@788
|
18 |
|
Edouard@788
|
19 |
args = p.parse_args()
|
Edouard@788
|
20 |
|
Edouard@788
|
21 |
try:
|
Edouard@788
|
22 |
from icu import UnicodeString, Locale
|
Edouard@788
|
23 |
except ImportError:
|
Edouard@788
|
24 |
print("warning: PyICU not installed, using fallback", file=sys.stderr)
|
Edouard@788
|
25 |
else:
|
Edouard@788
|
26 |
locale = Locale("utf-8")
|
Edouard@788
|
27 |
|
Edouard@788
|
28 |
_all = (
|
Edouard@788
|
29 |
line.split(',')
|
Edouard@788
|
30 |
for line in FileInput(
|
Edouard@788
|
31 |
args.input,
|
Edouard@788
|
32 |
openhook=hook_encoded("utf-8")
|
Edouard@788
|
33 |
)
|
Edouard@788
|
34 |
if not space.match(line)
|
Edouard@788
|
35 |
)
|
Edouard@788
|
36 |
|
edouard@1478
|
37 |
_some = (line for line in _all if len(line[2]) < args.length)
|
Edouard@788
|
38 |
|
edouard@1478
|
39 |
for i, w in enumerate(_some):
|
Edouard@788
|
40 |
print("{l},{i},{w},0".format(l=w[0], i=i, w=w[2]))
|