def test_conf(): def _reader(text): return list(csv.reader(StringIO(text), 'whitespace')) assert _reader('replace " " "\n"') == [['replace', ' ', '\n']] expected = [['Lowercase'], ['regex', 'y t', 'Y T'], ['Replace', 'e', 'a']] gotten = _reader('''# using a simple config file Lowercase \n # it even supports comments # If there is a space in the argument, make sure you quote it though! regex "y t" "Y T" # extraneous whitespaces are ignored Replace e a''') assert gotten == expected expected = [['Lowercase'], ['regex', 'regextestforconf.csv'], ['Replace', 'simplereplacetestforconf.csv']] file = './resources/test/normalizers/configfile.conf' with open(file) as f: assert list(csv.reader(f, 'whitespace')) == expected expected = [ ['Normalizer1', 'arg1', 'arg 2'], ['Normalizer2'], ['Normalizer3', 'This is argument 1\nSpanning multiple lines\n', 'argument 2'], ['Normalizer4', 'argument with double quote (")'] ] assert _reader(""" Normalizer1 arg1 "arg 2" # This is a comment Normalizer2 # (Normalizer2 has no arguments) Normalizer3 "This is argument 1 Spanning multiple lines " "argument 2" Normalizer4 "argument with double quote ("")" """) == expected assert _reader("lower case ") == [['lower', 'case']] assert _reader("lower case \n") == [['lower', 'case']] assert _reader('test "stuff "\t') == [['test', 'stuff ']] assert _reader('test "stuff "\n') == [['test', 'stuff ']] assert _reader('test "stuff\n\t"\n\t \t YEs \t \n') == \ [['test', 'stuff\n\t'], ['YEs']] assert _reader("\n\n\n\nline5")[0].lineno == 5 assert _reader('# Remove XML tags\n"<[^>]+>" " " \n\n# Remove punctuation\n"[,.-]" " "') == \ [['<[^>]+>', ' '], ["[,.-]", " "]]
def __init__(self, normalizer, file, encoding=None, path=None): if encoding is None: encoding = settings.default_encoding title = file if path is not None: file = os.path.join(path, file) with open(file, encoding=encoding) as f: self._normalizer = NormalizationComposite(title=title) for line in csv.reader(f): try: self._normalizer.add(normalizer(*line)) except TypeError as e: raise ValueError("%s:%d %r(%r) %r" % (file, line.lineno, normalizer, line, e))
def test_csv(): _reader = get_reader assert _reader('replace," ","\n"') == [['replace', ' ', '\n']] assert type(csv.reader(StringIO(''))) is csv.Reader assert type(csv.Reader(StringIO(''), csv.DefaultDialect)) is csv.Reader assert _reader('""') == [['']] assert _reader('') == [] assert _reader(example1) == expected1 assert _reader('"","test"," quiot"""') == [['', 'test', ' quiot"']] assert _reader(' val1 ,\t val2 \n') == [['val1', 'val2']] assert _reader(' ","') == [[',']] assert _reader('""') == [['']] assert _reader(''' "A,B","""A,B""", ''') == [['A,B', '"A,B"', '']] assert _reader('"A,B","""A,B""",') == [['A,B', '"A,B"', '']] assert _reader(' A\tB, \t B\tA\t ,') == [['A\tB', 'B\tA', '']] assert _reader('"#nocomment",#yescomment\n') == [['#nocomment', '']] assert _reader('"#nocomment",#here ') == [['#nocomment', '']] assert _reader('"#nocomment",#') == [['#nocomment', '']] assert _reader('"#nocomment"# test') == [['#nocomment']] assert _reader('"#nocomment" # commented') == [['#nocomment']] assert _reader('\t t ') == [['t']] assert _reader('t') == [['t']] assert _reader('replace," ","\n"') == [['replace', ' ', '\n']] assert _reader(',') == [['', '']] assert _reader('#yescomment,#here \n') == [] assert _reader(r'''# test "(?s)<\?xml.*</head>","" # inline comment # ignore "<[^>]+>"," " # test "[,.-\?]", "" # more comments''') == [['(?s)<\\?xml.*</head>', ''], ['<[^>]+>', ' '], ['[,.-\\?]', '']] assert _reader('test,ok\n\n \n\t\n\t\n.\n\n') == [['test', 'ok'], ['.']] assert _reader('"Some", "words" # comment') == [['Some', 'words']]
def _reader(text): return list(csv.reader(StringIO(text), 'whitespace'))
def get_reader(text, *args, **kwargs): return list(csv.reader(StringIO(text), *args, **kwargs))