db/csv2csv.py
author Krista Bennett <krista@pep-project.org>
Fri, 04 May 2018 16:30:21 +0200
branchlocal_cpptest
changeset 2652 43b913f99a27
parent 1513 e7f7e42385b5
permissions -rw-r--r--
Shelving broken things to break other things
Edouard@788
     1
#! /usr/bin/env python3
Edouard@788
     2
vb@1513
     3
# This file is under GNU General Public License 3.0
vb@1513
     4
# see LICENSE.txt
vb@1513
     5
vb@1513
     6
Edouard@788
     7
from argparse import ArgumentParser
Edouard@788
     8
from fileinput import FileInput, hook_encoded
Edouard@788
     9
import re, itertools, sys
Edouard@788
    10
Edouard@788
    11
space = re.compile(r'^\s')
Edouard@788
    12
Edouard@788
    13
p = ArgumentParser(description="re-write re-order csv and strip lines with too long words")
Edouard@788
    14
p.add_argument('--input', '-i', type=str, default="somefile.cvs",
Edouard@788
    15
    help='input file')
Edouard@788
    16
p.add_argument('--length', '-l', type=int, default=100,
Edouard@788
    17
    help='min word length to stripp a line')
Edouard@788
    18
Edouard@788
    19
args = p.parse_args()
Edouard@788
    20
Edouard@788
    21
try:
Edouard@788
    22
    from icu import UnicodeString, Locale
Edouard@788
    23
except ImportError:
Edouard@788
    24
    print("warning: PyICU not installed, using fallback", file=sys.stderr)
Edouard@788
    25
else:
Edouard@788
    26
    locale = Locale("utf-8")
Edouard@788
    27
Edouard@788
    28
_all = (
Edouard@788
    29
        line.split(',')
Edouard@788
    30
        for line in FileInput(
Edouard@788
    31
                args.input,
Edouard@788
    32
                openhook=hook_encoded("utf-8")
Edouard@788
    33
            )
Edouard@788
    34
        if not space.match(line)
Edouard@788
    35
)
Edouard@788
    36
edouard@1478
    37
_some = (line for line in _all if len(line[2]) < args.length)
Edouard@788
    38
edouard@1478
    39
for i, w in enumerate(_some):
Edouard@788
    40
    print("{l},{i},{w},0".format(l=w[0], i=i, w=w[2]))