def __init__( self, parse: bool = True, rerank_model: PathLike = None, asp_thresh: int = 3, op_thresh: int = 2, max_iter: int = 3, ): self.acquire_lexicon = AcquireTerms(asp_thresh, op_thresh, max_iter) if parse: from nlp_architect.pipelines.spacy_bist import SpacyBISTParser self.parser = SpacyBISTParser() else: self.parser = None if not rerank_model: print("using pre-trained reranking model") rerank_model = _download_pretrained_rerank_model( RERANK_MODEL_DEFAULT_PATH) download_unzip(*EMBEDDING_URL, EMBEDDING_PATH, license_msg="Glove word embeddings.") self.rerank = RerankTerms(vector_cache=True, rerank_model=rerank_model, emb_model_path=EMBEDDING_PATH)
class TrainSentiment: def __init__(self, parse: bool = True, rerank_model: PathLike = None, asp_thresh: int = 3, op_thresh: int = 2, max_iter: int = 3): self.acquire_lexicon = AcquireTerms(asp_thresh, op_thresh, max_iter) if parse: from nlp_architect.pipelines.spacy_bist import SpacyBISTParser self.parser = SpacyBISTParser() else: self.parser = None if not rerank_model: print('using pre-trained reranking model') rerank_model = _download_pretrained_rerank_model( RERANK_MODEL_DEFAULT_PATH) download_unzip(*EMBEDDING_URL, EMBEDDING_PATH, license_msg="Glove word embeddings.") self.rerank = RerankTerms(vector_cache=True, rerank_model=rerank_model, emb_model_path=EMBEDDING_PATH) def run(self, data: Union[str, PathLike] = None, parsed_data: Union[str, PathLike] = None, out_dir: Union[str, PathLike] = TRAIN_OUT): if not parsed_data: if not self.parser: raise RuntimeError( "Parser not initialized (try parse=True at init)") parsed_dir = Path(out_dir) / 'parsed' / Path(data).stem parsed_data = self.parse_data(data, parsed_dir) generated_aspect_lex = self.acquire_lexicon.acquire_lexicons( parsed_data) _write_aspect_lex(parsed_data, generated_aspect_lex, LEXICONS_OUT) generated_opinion_lex_reranked = \ self.rerank.predict(AcquireTerms.acquired_opinion_terms_path, AcquireTerms.generic_opinion_lex_path) _write_opinion_lex(parsed_data, generated_opinion_lex_reranked, LEXICONS_OUT) return generated_opinion_lex_reranked, generated_aspect_lex def parse_data(self, data: PathLike or PosixPath, parsed_dir: PathLike or PosixPath): _, data_size = parse_docs(self.parser, data, out_dir=parsed_dir) if data_size < 1000: raise ValueError( 'The data contains only {0} sentences. A minimum of 1000 ' 'sentences is required for training.'.format(data_size)) return parsed_dir