Пример #1
0
def main(clean_folder: str, pretrained_embedding_location: str,
         max_length: int, pad_token: str, unk_token: str, ready_folder: str):

    # read all files
    (train_sentences, train_relations, valid_sentences, valid_relations,
     test_sentences, test_relations,
     embeddings) = read.run(clean_folder, pretrained_embedding_location)

    # xd: sentence
    add_pad_unk(embeddings, pad_token, unk_token)

    train_sentences_, train_case_seqs = adjust_sentence(
        train_sentences, max_length, pad_token, embeddings, unk_token)
    valid_sentences_, valid_case_seqs = adjust_sentence(
        valid_sentences, max_length, pad_token, embeddings, unk_token)
    test_sentences_, test_case_seqs = adjust_sentence(test_sentences,
                                                      max_length, pad_token,
                                                      embeddings, unk_token)

    # filter unused words
    sentences = train_sentences_ + valid_sentences_ + test_sentences_
    word_lookup, word_embedding = filter_unused_word(embeddings, sentences)

    # yd: relation
    train_relations_ = adjust_relation(train_relations, max_length)
    valid_relations_ = adjust_relation(valid_relations, max_length)
    test_relations_ = adjust_relation(test_relations, max_length)

    # write sentence and relation
    write.run(train_sentences_, valid_sentences_, test_sentences_, word_lookup,
              word_embedding, train_case_seqs, valid_case_seqs, test_case_seqs,
              train_relations_, valid_relations_, test_relations_,
              ready_folder)
Пример #2
0
    import checksignal as cs
    if cfg.quick == True:
        cs.run(cfg.name + str(cfg.maxdepth), quick=True)
    else:
        cs.run(cfg.name + str(cfg.maxdepth))

if cfg.crossvalidation == True:
    import crossvalidation as cv
    if cfg.quick == True:
        cv.run(cfg.name + str(cfg.maxdepth), quick=True)
    else:
        cv.run(cfg.name + str(cfg.maxdepth))

if cfg.plot == True:
    import plot as p
    if cfg.quick == True:
        p.run(cfg.name + str(cfg.maxdepth), int(cfg.bins), quick=True)
    else:
        p.run(cfg.name + str(cfg.maxdepth), int(cfg.bins))

if cfg.write == True:
    import write as w
    if cfg.quick == True:
        raise Exception("Requires full dataset")
    else:
        w.run(cfg.name + str(cfg.maxdepth), cfg.source)

end = time.time()
print time.asctime(time.localtime()), "Code Ended"

pl.show()
Пример #3
0
    import checksignal as cs
    if cfg.quick == True:
        cs.run(cfg.name + str(cfg.maxdepth), quick = True)
    else:
        cs.run(cfg.name + str(cfg.maxdepth))

if cfg.crossvalidation == True:
    import crossvalidation as cv
    if cfg.quick == True:
        cv.run(cfg.name + str(cfg.maxdepth), quick = True)
    else:
        cv.run(cfg.name + str(cfg.maxdepth))
        
if cfg.plot == True:
    import plot as p
    if cfg.quick == True:
        p.run(cfg.name + str(cfg.maxdepth), int(cfg.bins), quick = True)
    else:
        p.run(cfg.name + str(cfg.maxdepth), int(cfg.bins))

if cfg.write == True:
    import write as w
    if cfg.quick == True:
        raise Exception("Requires full dataset")
    else:
        w.run(cfg.name + str(cfg.maxdepth),cfg.source)

end = time.time()
print time.asctime(time.localtime()), "Code Ended"

pl.show()
Пример #4
0
 def write(self):
     write.run()