def write_csv(filename, entries):
    uw = UnicodeWriter(open(filename, 'w'))
    N = len(entries)
    sys.stderr.write(HIDE_CURSOR)
    for j, e in enumerate(e for e in entries
            if e.volume([1, 2]) or e.civil_equivalent.startswith(u'!')):
        note = u'Поиск похожих примеров [ %s%% ] %s\r' % (
                int(j / float(N) * 100), e.civil_equivalent + ERASE_LINEEND)
        sys.stderr.write(note.encode('utf-8'))

        ecolumn = e.civil_equivalent + {1: u'¹', 2: u'²'}.get(e.homonym_order, u'')
        all_examples = e.all_examples()
        example_matches = []
        for i in range(len(all_examples) - 1):
            ex1, ex2 = all_examples[i:i+2]
            if levenshtein_distance(ex1.ts_example, ex2.ts_example) < MAX_DISTANCE:
                example_matches.append((ex1, ex2))
        if example_matches:
            uw.writerow((ecolumn, '', ''))
            for match in example_matches:
                uw.writerow(('', match[0].example, match[0].address_text))
                uw.writerow(('', match[1].example, match[1].address_text))
                uw.writerow(('','',''))
    sys.stderr.write(ERASE_LINE + SHOW_CURSOR)
    uw.stream.close()
Exemplo n.º 2
0
def write_csv(filename, examples):
    uw = UnicodeWriter(open(filename, 'w'))
    NON_MARK_CHARS = ur'[\s\ \u00A0,0-9\.;:\-\u2011\!\(\)\[\]\?—–«»…]+'
    register = {}
    for e in (e for e in examples if e.host_entry.volume(1)):
        for mark in re.split(NON_MARK_CHARS, e.address_text):
            if mark in register:
                register[mark] = (register[mark][0] + 1, e)
            else:
                register[mark] = (1, e)
    for mark, (number, e) in sorted(register.items()):
        row = (
            mark,
            str(number),
            e.address_text,
            str(e.id),
            e.host_entry.civil_equivalent,
        )
        uw.writerow(row)
    uw.stream.close()
Exemplo n.º 3
0
def write_csv(filename, entries):
    uw = UnicodeWriter(open(filename, 'w'))
    for e in (e for e in entries if e.first_volume):
        ecolumn = e.civil_equivalent + {1: u'¹', 2: u'²'}.get(e.homonym_order, u'')
        for m in list(e.meanings) + list(e.metaph_meanings):
            meaning = m.meaning.strip()
            gloss = m.gloss.strip()
            if meaning or gloss:
                uw.writerow((str(m.id), ecolumn, u'%s ⏹ %s' % (meaning, gloss)))
                if ecolumn:
                   ecolumn = u''
            for cm in m.child_meanings:
                meaning = cm.meaning.strip()
                gloss = cm.gloss.strip()
                if meaning or gloss:
                    row = (str(cm.id), ecolumn, u'• %s ⏹ %s' % (meaning, gloss))
                    uw.writerow(row)
                    if ecolumn:
                       ecolumn = u''
    uw.stream.close()
Exemplo n.º 4
0
#!/usr/bin/env python
# coding: utf-8
import os
import sys

import django
sys.path.append(
    os.path.dirname(os.path.dirname(
            os.path.abspath(__file__))))
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'slavdict.settings')
django.setup()

from slavdict.dictionary.models import CollocationGroup
from slavdict.unicode_csv import UnicodeWriter

uw = UnicodeWriter(open('cg_meanings.csv', 'w'))
for cg in (cg for cg in CollocationGroup.objects.all()
              if cg.host_entry.first_volume):
    cgcolumn = u'; '.join(c.collocation for c in cg.collocations)
    for m in list(cg.meanings) + list(cg.metaph_meanings):
        meaning = m.meaning.strip()
        gloss = m.gloss.strip()
        if meaning or gloss:
            uw.writerow((str(m.id), cgcolumn, u'%s ⏹ %s' % (meaning, gloss)))
            if cgcolumn:
               cgcolumn = u''