def write_csv(filename, entries): uw = UnicodeWriter(open(filename, 'w')) N = len(entries) sys.stderr.write(HIDE_CURSOR) for j, e in enumerate(e for e in entries if e.volume([1, 2]) or e.civil_equivalent.startswith(u'!')): note = u'Поиск похожих примеров [ %s%% ] %s\r' % ( int(j / float(N) * 100), e.civil_equivalent + ERASE_LINEEND) sys.stderr.write(note.encode('utf-8')) ecolumn = e.civil_equivalent + {1: u'¹', 2: u'²'}.get(e.homonym_order, u'') all_examples = e.all_examples() example_matches = [] for i in range(len(all_examples) - 1): ex1, ex2 = all_examples[i:i+2] if levenshtein_distance(ex1.ts_example, ex2.ts_example) < MAX_DISTANCE: example_matches.append((ex1, ex2)) if example_matches: uw.writerow((ecolumn, '', '')) for match in example_matches: uw.writerow(('', match[0].example, match[0].address_text)) uw.writerow(('', match[1].example, match[1].address_text)) uw.writerow(('','','')) sys.stderr.write(ERASE_LINE + SHOW_CURSOR) uw.stream.close()
def write_csv(filename, examples): uw = UnicodeWriter(open(filename, 'w')) NON_MARK_CHARS = ur'[\s\ \u00A0,0-9\.;:\-\u2011\!\(\)\[\]\?—–«»…]+' register = {} for e in (e for e in examples if e.host_entry.volume(1)): for mark in re.split(NON_MARK_CHARS, e.address_text): if mark in register: register[mark] = (register[mark][0] + 1, e) else: register[mark] = (1, e) for mark, (number, e) in sorted(register.items()): row = ( mark, str(number), e.address_text, str(e.id), e.host_entry.civil_equivalent, ) uw.writerow(row) uw.stream.close()
def write_csv(filename, entries): uw = UnicodeWriter(open(filename, 'w')) for e in (e for e in entries if e.first_volume): ecolumn = e.civil_equivalent + {1: u'¹', 2: u'²'}.get(e.homonym_order, u'') for m in list(e.meanings) + list(e.metaph_meanings): meaning = m.meaning.strip() gloss = m.gloss.strip() if meaning or gloss: uw.writerow((str(m.id), ecolumn, u'%s ⏹ %s' % (meaning, gloss))) if ecolumn: ecolumn = u'' for cm in m.child_meanings: meaning = cm.meaning.strip() gloss = cm.gloss.strip() if meaning or gloss: row = (str(cm.id), ecolumn, u'• %s ⏹ %s' % (meaning, gloss)) uw.writerow(row) if ecolumn: ecolumn = u'' uw.stream.close()
#!/usr/bin/env python # coding: utf-8 import os import sys import django sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(__file__)))) os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'slavdict.settings') django.setup() from slavdict.dictionary.models import CollocationGroup from slavdict.unicode_csv import UnicodeWriter uw = UnicodeWriter(open('cg_meanings.csv', 'w')) for cg in (cg for cg in CollocationGroup.objects.all() if cg.host_entry.first_volume): cgcolumn = u'; '.join(c.collocation for c in cg.collocations) for m in list(cg.meanings) + list(cg.metaph_meanings): meaning = m.meaning.strip() gloss = m.gloss.strip() if meaning or gloss: uw.writerow((str(m.id), cgcolumn, u'%s ⏹ %s' % (meaning, gloss))) if cgcolumn: cgcolumn = u''