def _enrich_vec_with_web(self, vec, word): webctxs = [_context_to_vector(word, c, True) for c in search_engine_factory().get_contexts(word, SHOULD_ENRICH_BY_SCRAPING)] print 'adding %d contexts from web to query vector.' % len(webctxs) webbowvec = self.vector_corpus.dictionary.doc2bow(itertools.chain(*webctxs), allowUpdate=False) return self._combine_bow_vecs(vec, webbowvec)
def __init__(self, preload_terms): global search_corpus search_corpus = search_engine_factory() self.terms = preload_terms VectorCorpus.__init__(self, search_corpus) from multiprocessing import Pool p = Pool(NUM_PROCESSES) p.map(parallel_hack, preload_terms)