Python read_json_file примеры, spacy.gold.read_json_file Python примеры использования

Пример #1

0

Показать файл

def main(language, train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False, pseudoprojective=False,
         L1=1e-6):
    parser_cfg = dict(locals())
    tagger_cfg = dict(locals())
    entity_cfg = dict(locals())

    lang = spacy.util.get_lang_class(language)

    parser_cfg['features'] = lang.Defaults.parser_features
    entity_cfg['features'] = lang.Defaults.entity_features

    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        gold_dev = list(read_json_file(dev_loc))
        if n_sents > 0:
            gold_train = gold_train[:n_sents]
        train(lang, gold_train, gold_dev, model_dir, tagger_cfg, parser_cfg, entity_cfg,
              n_sents=n_sents, gold_preproc=gold_preproc, corruption_level=corruption_level,
              n_iter=n_iter)
    if out_loc:
        write_parses(lang, dev_loc, model_dir, out_loc)
    scorer = evaluate(lang, list(read_json_file(dev_loc)),
                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

Пример #2

0

Показать файл

Файл: train.py Проект: paolodedios/spaCy

def main(language, train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False, pseudoprojective=False):
    parser_cfg = dict(locals())
    tagger_cfg = dict(locals())
    entity_cfg = dict(locals())

    lang = spacy.util.get_lang_class(language)
    
    parser_cfg['features'] = lang.Defaults.parser_features
    entity_cfg['features'] = lang.Defaults.entity_features

    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        gold_dev = list(read_json_file(dev_loc))
        train(lang, gold_train, gold_dev, model_dir, tagger_cfg, parser_cfg, entity_cfg,
              n_sents=n_sents, gold_preproc=gold_preproc, corruption_level=corruption_level,
              n_iter=n_iter)
    if out_loc:
        write_parses(lang, dev_loc, model_dir, out_loc)
    scorer = evaluate(lang, list(read_json_file(dev_loc)),
                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

Пример #3

0

Показать файл

Файл: nn_train.py Проект: Arttii/spaCy

def main(train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, verbose=False,
         nv_word=10, nv_tag=10, nv_label=10, nv_hidden=10,
         eta=0.1, mu=0.9, eval_only=False):




    gold_train = list(read_json_file(train_loc, lambda doc: 'wsj' in doc['id']))

    nlp = train(English, gold_train, model_dir,
               feat_set='embed',
               eta=eta, mu=mu,
               nv_word=nv_word, nv_tag=nv_tag, nv_label=nv_label, nv_hidden=nv_hidden,
               n_sents=n_sents, n_iter=n_iter,
               verbose=verbose)

    scorer = evaluate(nlp, list(read_json_file(dev_loc)))
    
    print 'TOK', 100-scorer.token_acc
    print 'POS', scorer.tags_acc
    print 'UAS', scorer.uas
    print 'LAS', scorer.las

    print 'NER P', scorer.ents_p
    print 'NER R', scorer.ents_r
    print 'NER F', scorer.ents_f

Пример #4

0

Показать файл

Файл: train.py Проект: domsooch/spaCy

def main(train_loc,
         dev_loc,
         model_dir,
         n_sents=0,
         n_iter=15,
         out_loc="",
         verbose=False,
         debug=False,
         corruption_level=0.0,
         gold_preproc=False,
         beam_width=1,
         eval_only=False,
         use_orig_arc_eager=False):
    if use_orig_arc_eager:
        English.ParserTransitionSystem = TreeArcEager
    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        train(English,
              gold_train,
              model_dir,
              feat_set='basic' if not debug else 'debug',
              gold_preproc=gold_preproc,
              n_sents=n_sents,
              corruption_level=corruption_level,
              n_iter=n_iter,
              beam_width=beam_width,
              verbose=verbose,
              use_orig_arc_eager=use_orig_arc_eager)
    #if out_loc:
    #    write_parses(English, dev_loc, model_dir, out_loc, beam_width=beam_width)
    scorer = evaluate(English,
                      list(read_json_file(dev_loc)),
                      model_dir,
                      gold_preproc=gold_preproc,
                      verbose=verbose,
                      beam_width=beam_width)
    print 'TOK', scorer.token_acc
    print 'POS', scorer.tags_acc
    print 'UAS', scorer.uas
    print 'LAS', scorer.las

    print 'NER P', scorer.ents_p
    print 'NER R', scorer.ents_r
    print 'NER F', scorer.ents_f

Пример #5

0

Показать файл

Файл: train.py Проект: anukat2015/spaCy

def main(language,
         train_loc,
         dev_loc,
         model_dir,
         n_sents=0,
         n_iter=15,
         out_loc="",
         verbose=False,
         debug=False,
         corruption_level=0.0,
         gold_preproc=False,
         eval_only=False,
         pseudoprojective=False):
    lang = {'en': English, 'de': German}.get(language)

    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        train(lang,
              gold_train,
              model_dir,
              feat_set='basic' if not debug else 'debug',
              gold_preproc=gold_preproc,
              n_sents=n_sents,
              corruption_level=corruption_level,
              n_iter=n_iter,
              verbose=verbose,
              pseudoprojective=pseudoprojective)
    if out_loc:
        write_parses(lang, dev_loc, model_dir, out_loc)
    scorer = evaluate(lang,
                      list(read_json_file(dev_loc)),
                      model_dir,
                      gold_preproc=gold_preproc,
                      verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

Пример #6

0

Показать файл

def main(train_loc,
         dev_loc,
         model_dir,
         n_sents=0,
         n_iter=15,
         verbose=False,
         nv_word=10,
         nv_tag=10,
         nv_label=10,
         nv_hidden=10,
         eta=0.1,
         mu=0.9,
         eval_only=False):

    gold_train = list(read_json_file(train_loc,
                                     lambda doc: 'wsj' in doc['id']))

    nlp = train(English,
                gold_train,
                model_dir,
                feat_set='embed',
                eta=eta,
                mu=mu,
                nv_word=nv_word,
                nv_tag=nv_tag,
                nv_label=nv_label,
                nv_hidden=nv_hidden,
                n_sents=n_sents,
                n_iter=n_iter,
                verbose=verbose)

    scorer = evaluate(nlp, list(read_json_file(dev_loc)))

    print 'TOK', 100 - scorer.token_acc
    print 'POS', scorer.tags_acc
    print 'UAS', scorer.uas
    print 'LAS', scorer.las

    print 'NER P', scorer.ents_p
    print 'NER R', scorer.ents_r
    print 'NER F', scorer.ents_f

Пример #7

0

Показать файл

Файл: train.py Проект: michigan-com/spaCy

def main(train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False):
    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        train(English, gold_train, model_dir,
              feat_set='basic' if not debug else 'debug',
              gold_preproc=gold_preproc, n_sents=n_sents,
              corruption_level=corruption_level, n_iter=n_iter,
              verbose=verbose)
    #if out_loc:
    #    write_parses(English, dev_loc, model_dir, out_loc, beam_width=beam_width)
    scorer = evaluate(English, list(read_json_file(dev_loc)),
                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

Пример #8

0

Показать файл

Файл: train.py Проект: pratikmehta14/spaCy

def main(train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False):
    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        train(English, gold_train, model_dir,
              feat_set='basic' if not debug else 'debug',
              gold_preproc=gold_preproc, n_sents=n_sents,
              corruption_level=corruption_level, n_iter=n_iter,
              verbose=verbose)
    #if out_loc:
    #    write_parses(English, dev_loc, model_dir, out_loc, beam_width=beam_width)
    scorer = evaluate(English, list(read_json_file(dev_loc)),
                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

Пример #9

0

Показать файл

Файл: train.py Проект: Develer/spaCy

def main(language, train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False, pseudoprojective=False):
    lang = {'en':English, 'de':German}.get(language)

    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        train(lang, gold_train, model_dir,
              feat_set='basic' if not debug else 'debug',
              gold_preproc=gold_preproc, n_sents=n_sents,
              corruption_level=corruption_level, n_iter=n_iter,
              verbose=verbose,pseudoprojective=pseudoprojective)
    if out_loc:
        write_parses(lang, dev_loc, model_dir, out_loc)
    scorer = evaluate(lang, list(read_json_file(dev_loc)),
                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

Пример #10

0

Показать файл

Файл: train.py Проект: nournia/spaCy

def write_parses(Language, dev_loc, model_dir, out_loc, beam_width=None):
    nlp = Language(data_dir=model_dir)
    if beam_width is not None:
        nlp.parser.cfg.beam_width = beam_width
    gold_tuples = read_json_file(dev_loc)
    scorer = Scorer()
    out_file = codecs.open(out_loc, "w", "utf8")
    for raw_text, sents in gold_tuples:
        sents = _merge_sents(sents)
        for annot_tuples, brackets in sents:
            if raw_text is None:
                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
                nlp.tagger(tokens)
                nlp.entity(tokens)
                nlp.parser(tokens)
            else:
                tokens = nlp(raw_text, merge_mwes=False)
            gold = GoldParse(tokens, annot_tuples)
            scorer.score(tokens, gold, verbose=False)
            for t in tokens:
                out_file.write("%s\t%s\t%s\t%s\n" % (t.orth_, t.tag_, t.head.orth_, t.dep_))
    return scorer

Пример #11

0

Показать файл

Файл: train.py Проект: domsooch/spaCy

def write_parses(Language, dev_loc, model_dir, out_loc, beam_width=None):
    nlp = Language(data_dir=model_dir)
    if beam_width is not None:
        nlp.parser.cfg.beam_width = beam_width
    gold_tuples = read_json_file(dev_loc)
    scorer = Scorer()
    out_file = codecs.open(out_loc, 'w', 'utf8')
    for raw_text, sents in gold_tuples:
        sents = _merge_sents(sents)
        for annot_tuples, brackets in sents:
            if raw_text is None:
                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
                nlp.tagger(tokens)
                nlp.entity(tokens)
                nlp.parser(tokens)
            else:
                tokens = nlp(raw_text, merge_mwes=False)
            gold = GoldParse(tokens, annot_tuples)
            scorer.score(tokens, gold, verbose=False)
            for t in tokens:
                out_file.write('%s\t%s\t%s\t%s\n' %
                               (t.orth_, t.tag_, t.head.orth_, t.dep_))
    return scorer

Пример #12

0

Показать файл

Файл: train.py Проект: anukat2015/spaCy

def write_parses(Language, dev_loc, model_dir, out_loc):
    nlp = Language(data_dir=model_dir)
    gold_tuples = read_json_file(dev_loc)
    scorer = Scorer()
    out_file = io.open(out_loc, 'w', 'utf8')
    for raw_text, sents in gold_tuples:
        sents = _merge_sents(sents)
        for annot_tuples, brackets in sents:
            if raw_text is None:
                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
                nlp.tagger(tokens)
                nlp.entity(tokens)
                nlp.parser(tokens)
            else:
                tokens = nlp(raw_text)
            #gold = GoldParse(tokens, annot_tuples)
            #scorer.score(tokens, gold, verbose=False)
            for sent in tokens.sents:
                for t in sent:
                    if not t.is_space:
                        out_file.write(
                            '%d\t%s\t%s\t%s\t%s\n' %
                            (t.i, t.orth_, t.tag_, t.head.orth_, t.dep_))
                out_file.write('\n')

Пример #13

0

Показать файл

Файл: train.py Проект: Develer/spaCy

def write_parses(Language, dev_loc, model_dir, out_loc):
    nlp = Language(data_dir=model_dir)
    gold_tuples = read_json_file(dev_loc)
    scorer = Scorer()
    out_file = io.open(out_loc, 'w', 'utf8')
    for raw_text, sents in gold_tuples:
        sents = _merge_sents(sents)
        for annot_tuples, brackets in sents:
            if raw_text is None:
                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
                nlp.tagger(tokens)
                nlp.entity(tokens)
                nlp.parser(tokens)
            else:
                tokens = nlp(raw_text)
            #gold = GoldParse(tokens, annot_tuples)
            #scorer.score(tokens, gold, verbose=False)
            for sent in tokens.sents:
                for t in sent:
                    if not t.is_space:
                        out_file.write(
                            '%d\t%s\t%s\t%s\t%s\n' % (t.i, t.orth_, t.tag_, t.head.orth_, t.dep_)
                        )
                out_file.write('\n')

Пример #14

0

Показать файл

Developed and tested for spaCy 2.0.6. Updated for v2.2.2
"""
import random
import plac
import spacy
import os.path
from spacy.tokens import Doc
from spacy.gold import read_json_file, GoldParse

random.seed(0)

PWD = os.path.dirname(__file__)

TRAIN_DATA = list(
    read_json_file(
        os.path.join(PWD, "ner_example_data", "ner-sent-per-line.json")))


def get_position_label(i, words, tags, heads, labels, ents):
    """Return labels indicating the position of the word in the document.
    """
    if len(words) < 20:
        return "short-doc"
    elif i == 0:
        return "first-word"
    elif i < 10:
        return "early-word"
    elif i < 20:
        return "mid-word"
    elif i == len(words) - 1:
        return "last-word"

Пример #15

0

Показать файл

The specific example here is not necessarily a good idea --- but it shows
how an arbitrary objective function for some word can be used.

Developed and tested for spaCy 2.0.6
'''
import random
import plac
import spacy
import os.path
from spacy.gold import read_json_file, GoldParse

random.seed(0)

PWD = os.path.dirname(__file__)

TRAIN_DATA = list(read_json_file(os.path.join(PWD, 'training-data.json')))


def get_position_label(i, words, tags, heads, labels, ents):
    '''Return labels indicating the position of the word in the document.
    '''
    if len(words) < 20:
        return 'short-doc'
    elif i == 0:
        return 'first-word'
    elif i < 10:
        return 'early-word'
    elif i < 20:
        return 'mid-word'
    elif i == len(words) - 1:
        return 'last-word'

Пример #16

0

Показать файл

Файл: ner_multitask_objective.py Проект: IndicoDataSolutions/spaCy

The specific example here is not necessarily a good idea --- but it shows
how an arbitrary objective function for some word can be used.

Developed and tested for spaCy 2.0.6
'''
import random
import plac
import spacy
import os.path
from spacy.gold import read_json_file, GoldParse

random.seed(0)

PWD = os.path.dirname(__file__)

TRAIN_DATA = list(read_json_file(os.path.join(PWD, 'training-data.json')))



def get_position_label(i, words, tags, heads, labels, ents):
    '''Return labels indicating the position of the word in the document.
    '''
    if len(words) < 20:
        return 'short-doc'
    elif i == 0:
        return 'first-word'
    elif i < 10:
        return 'early-word'
    elif i < 20:
        return 'mid-word'
    elif i == len(words)-1:

Python read_json_file примеры использования