db/csv2csv.py
author Krista Bennett <krista@pep-project.org>
Wed, 06 Jun 2018 20:21:24 +0200
branchENGINE-422
changeset 2742 cd06139cb7fc
parent 1513 e7f7e42385b5
permissions -rw-r--r--
close branch
     1 #! /usr/bin/env python3
     2 
     3 # This file is under GNU General Public License 3.0
     4 # see LICENSE.txt
     5 
     6 
     7 from argparse import ArgumentParser
     8 from fileinput import FileInput, hook_encoded
     9 import re, itertools, sys
    10 
    11 space = re.compile(r'^\s')
    12 
    13 p = ArgumentParser(description="re-write re-order csv and strip lines with too long words")
    14 p.add_argument('--input', '-i', type=str, default="somefile.cvs",
    15     help='input file')
    16 p.add_argument('--length', '-l', type=int, default=100,
    17     help='min word length to stripp a line')
    18 
    19 args = p.parse_args()
    20 
    21 try:
    22     from icu import UnicodeString, Locale
    23 except ImportError:
    24     print("warning: PyICU not installed, using fallback", file=sys.stderr)
    25 else:
    26     locale = Locale("utf-8")
    27 
    28 _all = (
    29         line.split(',')
    30         for line in FileInput(
    31                 args.input,
    32                 openhook=hook_encoded("utf-8")
    33             )
    34         if not space.match(line)
    35 )
    36 
    37 _some = (line for line in _all if len(line[2]) < args.length)
    38 
    39 for i, w in enumerate(_some):
    40     print("{l},{i},{w},0".format(l=w[0], i=i, w=w[2]))