Python PersistentDict.close примеры использования

Язык программирования: Python

Пространство имен/Пакет: pdict

Класс/Тип: PersistentDict

Метод/Функция: close

Примеров на hotexamples.com: 10

Python PersistentDict.close - 10 примеров найдено. Это лучшие примеры Python кода для pdict.PersistentDict.close, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

close(10)

autocommit(5)

commit(4)

keys(3)

PersistentDict(1)

__getitem__(1)

__init__(1)

preload(1)

sync(1)

Пример #1

Показать файл

Файл: corpus.py Проект: estnltk/pfe

def excel_to_corpus(excel_path, corpus_path):
    '''NB! Make sure to use .xls file extension for Excel files.'''
    corpus = PyCorpus(corpus_path)
    excel  = ExcelFile(excel_path)
    # as we do not know the number of sheets, we parse all of them
    # until we obtain a error
    idx = 0
    while True:
        try:
            df = excel.parse(str(idx))
            # recreate some information that was modified when exporting to xls
            new_df = dict()
            for col in df.columns:
                data = []
                for v in df[col]:
                    if type(v) == float and math.isnan(v):
                        data.append(None)
                    elif v == 0:
                        data.append(False)
                    elif v == 1:
                        data.append(True)
                    else:
                        data.append(v)
                new_df[col] = Series(data)
            corpus[str(idx)] = DataFrame(new_df)
        except xlrd.biffh.XLRDError:
            break
        idx += 1
    corpus.close()

Пример #2

Показать файл

Файл: corpus.py Проект: estnltk/pfe

def corpus_to_excel(corpus_path, excel_path):
    '''NB! Make sure to use .xls file extension for Excel files.'''
    corpus = PyCorpus(corpus_path)
    writer = ExcelWriter(excel_path)
    for key in corpus:
        corpus[key].to_excel(writer, sheet_name=key)
    writer.save()
    corpus.close()

Пример #3

Показать файл

Файл: corpus.py Проект: estnltk/pfe

def as_treetagger_corpus(orig_path, dest_path, encoding='latin-1', language='english'):
    assert (orig_path != eng_path)
    orig = PyCorpus(orig_path)
    dest = PyCorpus(eng_path)
    dest.autocommit(False)
    for doc_id in orig.keys():
        dest[doc_id] = as_treetagger_doc(orig[doc_id], encoding=encoding, language=language)
    dest.commit()
    orig.close()
    dest.close()

Пример #4

Показать файл

Файл: corpus.py Проект: estnltk/pfe

def boi_to_t3corpus(orig_path, t3_path):
    '''Parse a t3 corpus, where documents are separated with -- '''
    f = codecs.open(orig_path, 'rb', 'utf-8')
    contents = f.read()
    f.close()
    docs = re.split('--\r?\n\r?\n', contents)
    corpus = PyCorpus(t3_path)
    for i, doc in enumerate(docs):
        corpus[str(i+1)] = parse_t3_doc_from_string(doc)
    corpus.close()

Пример #5

Показать файл

Файл: corpus.py Проект: estnltk/pfe

def as_eng_postagged_corpus(orig_path, eng_path):
    '''Uses nltk default tagger.'''
    assert (orig_path != eng_path)
    orig = PyCorpus(orig_path)
    dest = PyCorpus(eng_path)
    dest.autocommit(False)
    for doc_id in orig.keys():
        dest[doc_id] = as_eng_postagged_doc(orig[doc_id])
    dest.commit()
    orig.close()
    dest.close()

Пример #6

Показать файл

Файл: ner.py Проект: estnltk/pfe

def crf_model_predict(model_path, corpus, target_path, series_name):
    f = open(model_path, 'rb')
    model, kwargs = cPickle.load(f)
    f.close()
    s = Corpus(target_path)
    for doc_id, predictions in crf_predict(model, corpus, **kwargs):
        doc = corpus[doc_id]
        doc[series_name] = predictions
        s[doc_id] = doc
        sys.stderr.write('Document {0} classified.\n'.format(doc_id))
    s.close()

Пример #7

Показать файл

Файл: corpus.py Проект: estnltk/pfe

def as_t3corpus(orig_path, t3_path):
    '''Convert a corpus at orig_path to t3mesta corpus to t3_path.'''
    orig_corpus = PyCorpus(orig_path)
    dest_corpus = PyCorpus(t3_path)
    dest_corpus.autocommit(False)

    dest_keys = set(dest_corpus.keys())
    for key in orig_corpus.keys():
        if key not in dest_keys:
            dest_corpus[key] = as_t3doc(orig_corpus[key])

    dest_corpus.commit()

    orig_corpus.close()
    dest_corpus.close()

Пример #8

Показать файл

Файл: corpus.py Проект: estnltk/pfe

def parse_plain_corpus(plainpath, corpuspath):
    corpus = PyCorpus(corpuspath)
    data = codecs.open(plainpath, 'rb', 'utf-8').read()
    docs = re.split('s*?\r?\n\r?\n', data)
    data = None
    corpus.autocommit(False)
    for doc in docs:
        lines = re.split('\r?\n', doc.strip())
        title = lines[0].strip()
        contents = '\n'.join(lines[1:]).strip()
        text_stream = cStringIO.StringIO(contents.encode('utf-8'))
        utf8_stream = codecs.getreader('utf-8')(text_stream)
        corpus[title] = parse_plain_doc_from_stream(utf8_stream)
    corpus.commit()
    corpus.close()

Пример #9

Показать файл

Файл: ner.py Проект: estnltk/pfe

def crf_model_predict_mc(model_path, corpus, target_path, series_name, n):
    '''Multi core version of crf_model_predict.
       n - number of processes to use.
    '''
    sys.stderr.write('Dividing documents between {0} processes.\n'.format(n))
    doc_ids  = list(corpus.keys())
    id_lists = [[] for _ in range(n)]
    idx = 0
    for doc_id in corpus.keys():
        id_lists[idx].append(doc_id)
        idx += 1
        if idx >= n:
            idx = 0
    sys.stderr.write('Launching processes.\n')
    dest_names   = []
    processes    = []
    for idx, ids in enumerate(id_lists):
        if len(ids) > 0:
            folder = tempfile.mkdtemp()
            # write the new corpus
            src_name  = os.path.join(folder, 'src.corpus')
            dest_name = os.path.join(folder, 'dest.corpus')
            tmp_corp = Corpus(src_name)
            tmp_corp.autocommit(False)
            for doc_id in ids:
                tmp_corp[doc_id] = corpus[doc_id]
            tmp_corp.close()

            # start the process
            process = Process(target=crf_process,
                                args=(model_path,
                                     src_name,
                                     dest_name,
                                     series_name))
            process.start()
            sys.stderr.write('Process {0} launched\n'.format(idx))
            # store the identificators
            dest_names.append(dest_name)
            processes.append(process)
    for p in processes:
        p.join()
    sys.stderr.write('Processes finished!\n')

    # concatenate temporary outputs
    target_corp = Corpus(target_path)
    target_corp.autocommit(False)
    for dest_name in dest_names:
        tmp_corp = Corpus(dest_name)
        for doc_id in tmp_corp:
            target_corp[doc_id] = tmp_corp[doc_id]
        tmp_corp.close()
    target_corp.close()
    sys.stderr.write('Corpus {0} created'.format(target_path))

Пример #10

Показать файл

Файл: ner.py Проект: estnltk/pfe

def crf_process(model_path, tmp_corpus_path, tmp_target_path, series_name):
    tmp_corp = Corpus(tmp_corpus_path)
    crf_model_predict(model_path, tmp_corp, tmp_target_path, series_name)
    tmp_corp.close()