db/csv2csv.py
author Krista Grothoff <krista@pep-project.org>
Mon, 28 Nov 2016 17:50:30 +0100
branchENGINE-112
changeset 1452 b81abf75647d
parent 788 f75a6f866c38
child 1478 88dae00c8954
permissions -rw-r--r--
closing branch ENGINE-112 (again ;)
     1 #! /usr/bin/env python3
     2 
     3 from argparse import ArgumentParser
     4 from fileinput import FileInput, hook_encoded
     5 import re, itertools, sys
     6 
     7 space = re.compile(r'^\s')
     8 
     9 p = ArgumentParser(description="re-write re-order csv and strip lines with too long words")
    10 p.add_argument('--input', '-i', type=str, default="somefile.cvs",
    11     help='input file')
    12 p.add_argument('--length', '-l', type=int, default=100,
    13     help='min word length to stripp a line')
    14 
    15 args = p.parse_args()
    16 
    17 try:
    18     from icu import UnicodeString, Locale
    19 except ImportError:
    20     print("warning: PyICU not installed, using fallback", file=sys.stderr)
    21 else:
    22     locale = Locale("utf-8")
    23 
    24 _all = (
    25         line.split(',')
    26         for line in FileInput(
    27                 args.input,
    28                 openhook=hook_encoded("utf-8")
    29             )
    30         if not space.match(line)
    31 )
    32 
    33 _some = line for line in _all if len(line[2]) < args.length
    34 
    35 for i, w in enumerate(_all):
    36     print("{l},{i},{w},0".format(l=w[0], i=i, w=w[2]))