示例#1
0
def pretreate() :
    from pretreate.run import Corpus
    from file.path_manager import PathManager
    articles_path = PathManager.CORPUS_ARTICLE
    participle_title_path = PathManager.CORPUS_SPLIT
    treated_article_path = PathManager.CORPUS_FEATURE
    corpus = Corpus()
    corpus.run(articles_path, participle_title_path, treated_article_path)
示例#2
0
def embedding() :
    from embedding.run import Corpus
    from file.path_manager import PathManager
    articles_path = PathManager.CORPUS_ARTICLE
    participle_title_path = PathManager.CORPUS_SPLIT
    sentences_path = PathManager.CORPUS_KEYWORD
    word_embedding_path =PathManager.CORPUS_SIMPLYARTICLE
    corpus = Corpus()
    corpus.run(articles_path, participle_title_path, sentences_path, sentences_path, \
        word_embedding_path, word_embedding_path)
示例#3
0
def tag() :
    from tag.run import Corpus
    from file.path_manager import PathManager
    sentences_path = PathManager.CORPUS_ARTICLE
    tag_tree_path = PathManager.TAG_TAGTREE
    tags_path = PathManager.TAG_ARTICLETAG
    tags_market_path = PathManager.TAG_ARTICLETAG
    sentences_market_path = PathManager.TAG_SENTENCES
    untag_sentence_path = PathManager.TAG_UNTAGSENTENCE
    corpus = Corpus()
    corpus.run(sentences_path, tag_tree_path, sentences_market_path, tags_path, \
        tags_market_path, untag_sentence_path)
    print 'finish'
示例#4
0
def bowlr() :
    from bowlr.run import Corpus
    from file.path_manager import PathManager
    articles_path='E:/data/knowledge/bowlr/car/article_trainset'
    article_market_path='E:/data/knowledge/bowlr/car/article_trainset_market'
    dictionary_path='E:/data/knowledge/bowlr/all/dictionary'
    feature_path='E:/data/knowledge/bowlr/car/trainset_feature'
    feature_market_path='E:/data/knowledge/bowlr/car/trainset_feature_market'
    train_path='E:/data/knowledge/bowlr/car/trainset_feature_market'
    test_path='E:/data/knowledge/bowlr/car/testset_feature_market'
    output_path='E:/data/knowledge/bowlr/car/fprs_tprs'
    corpus = Corpus()
    corpus.run(articles_path, article_market_path, dictionary_path, feature_path, \
               feature_market_path, train_path, test_path, output_path)
示例#5
0
def classify() :
    from classify.run import Corpus
    from file.path_manager import PathManager
    articles_path='E:/data/knowledge/classify/car/article_trainset'
    article_market_path='E:/data/knowledge/classify/car/article_trainset_market'
    feature_path='E:/data/knowledge/classify/car/trainset_feature'
    feature_market_path='E:/data/knowledge/classify/car/trainset_feature_market'
    pos_path='E:/data/knowledge/tools/postag'
    punc_path='E:/data/knowledge/tools/punctuation'
    klword_path='E:/data/knowledge/tools/knowledgeable_word'
    train_path='E:/data/knowledge/classify/car/trainset_feature_market'
    test_path='E:/data/knowledge/classify/car/testset_feature_market'
    corpus = Corpus()
    corpus.run(articles_path, article_market_path, \
        pos_path, punc_path, klword_path, feature_path, feature_market_path, \
        train_path, test_path)
# -*- encoding = gb18030 -*-

# package importing start
import os
import sys
sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/../..'))

from tag.run import Corpus
# package importing end


# run this script

tag_tree_path = sys.argv[1]
sentences_market_path = sys.argv[2]
tags_market_path = sys.argv[3]
dict_market_path = sys.argv[4]
corpus = Corpus()

corpus.run_tag_sentences(tag_tree_path, sentences_market_path, tags_market_path, dict_market_path)
# -*- encoding = gb18030 -*-

# package importing start
import os
import sys
sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/../..'))

from tag.run import Corpus
# package importing end


# run this script

sentences_path = sys.argv[1]
sentences_market_path = sys.argv[2]
corpus = Corpus()

corpus.run_convert_sentences(sentences_path, sentences_market_path)