Python SerializationUtil.get_article示例

编程语言: Python

命名空间/包名称: chemotext_util

方法/功能: get_article

hotexamples.com的示例: 3

Python SerializationUtil.get_article - 已找到3个示例。这些是从开源项目中提取的最受好评的chemotext_util.SerializationUtil.get_article现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get_article(3)

parse_date(3)

get_article_paths(2)

示例#1

显示文件

文件： evaluate.py 项目： stevencox/chemotext

 def word2vec (conf):
     logger = LoggingUtil.init_logging (__file__)
     logger.info ("Creating Chemotext2 word embeddings from input: {0}".format (conf.input_dir))
     sc = SparkUtil.get_spark_context (conf.spark_conf)
     article_paths = SUtil.get_article_paths (conf.input_dir) #[:20000]
     articles = sc.parallelize (article_paths, conf.spark_conf.parts). \
                map (lambda p : SUtil.get_article (p))
     logger.info ("Listed {0} input files".format (articles.count ()))
     
     conf.output_dir = conf.output_dir.replace ("file:", "")
     conf.output_dir = "file://{0}/w2v".format (conf.output_dir)
     return WordEmbed (sc, conf.output_dir, articles)

示例#2

显示文件

文件： words.py 项目： stevencox/chemotext

 def __iter__(self):
     for file_name in self.files:
         if self.match (file_name):
             base = "{0}.json".format (os.path.basename (file_name))
             article_path = os.path.join(self.input_dir, base)
             article = SUtil.get_article (article_path)
             if article is not None:
                 # http://www.nltk.org/api/nltk.tokenize.html#module-nltk.tokenize
                 sentence_tokens = [ self.tokenizer.tokenize(s) for s in sent_tokenize (article.raw) ]
                 sentences = [ self.gene_syn.make_canonical (s) for s in sentence_tokens ]
                 #print (" sentences------------> {0}".format (sentences))
                 #sentences = [ self.tokenizer.tokenize(s) for p in article.paragraphs for s in p.sentences ]
                 #sentences = [ s.split(' ') for p in article.paragraphs for s in p.sentences ]
                 for s in sentences:
                     yield s

示例#3

显示文件

文件： equiv_set.py 项目： stevencox/chemotext

 def get_article (article_path):
     article = EquivalentSet.get_article_equiv_set (SUtil.get_article (article_path))
     return [] if not article else [ article ]