def __init__(self, config_fp, distance_mode): Extractor.__init__(self, config_fp) self.feature_name += '_%s' % distance_mode self.valid_distance_mode = ['edit_dist', 'compression_dist'] assert distance_mode in self.valid_distance_mode, "Wrong aggregation_mode: %s" % distance_mode self.distance_mode = distance_mode self.distance_func = getattr(DistanceUtil, self.distance_mode)
def __init__(self): self.__featureNames = sorted( [name for (name, re) in self.DIRECT_FEATS] + [name for (name, re) in self.LEMMA_FEATS] + self.CALCULATED_FEATS ) Extractor.__init__(self)
def __init__(self): Extractor.__init__(self) self.stop = [ line.strip() for line in open(PropertiesUtil.getProperty('PATH', 'stopword_fp'), 'r').readlines() ] self.model = None
def __init__(self, config_fp): Extractor.__init__(self, config_fp) powerful_word_fp = '%s/%s.txt' % (self.config.get( 'DIRECTORY', 'devel_pt'), self.config.get('FILE_NAME', 'powerful_word_name')) self.pword_dict = dict( PowerfulWord.load_powerful_word(powerful_word_fp))
def __init__(self, config_fp, thresh_num=0, thresh_rate=0.0): Extractor.__init__(self, config_fp) powerful_word_fp = '%s/%s.txt' % (self.config.get( 'DIRECTORY', 'devel_pt'), self.config.get('FILE_NAME', 'powerful_word_name')) self.pword = PowerfulWord.load_powerful_word(powerful_word_fp) self.pword_oside = PowerfulWordOneSide.init_powerful_word_oside( self.pword, thresh_num, thresh_rate)
def __init__(self, config_fp, n_components=50, tf_idf=None, tf_idf_result=None): Extractor.__init__(self, config_fp) self.n_components = n_components self.tf_idf, self.tf_idf_result = tf_idf, tf_idf_result self.svd_model = self.init_svd()
def __init__(self, config_fp): Extractor.__init__(self, config_fp) words_pt = '%s/%s' % (self.config.get('DIRECTORY', 'source_pt'), self.config.get('FILE_NAME', 'words_txt')) self.words_dict = self.getWordsDict(words_pt=words_pt) print("get word2vec_dict...") word2vec_pt = '%s/%s' % (self.config.get('DIRECTORY', 'source_pt'), self.config.get('FILE_NAME', 'wiki_es_vec')) self.word2vec_dict = self.getWord2VecDict(word2vec_pt) print("get word2vec_dict done")
def __init__(self, config_fp): Extractor.__init__(self, config_fp) #words_pt = '%s/%s' % (self.config.get('DIRECTORY', 'source_pt'), self.config.get('FILE_NAME', 'words_txt')) #self.words_dict = self.getWordsDict(words_pt=words_pt) train_data = pd.read_csv( '%s/%s' % (self.config.get('DIRECTORY', 'csv_spanish_cleaning_pt'), self.config.get('FILE_NAME', 'preprocessing_train_merge_csv'))).fillna( value="") test_data = pd.read_csv( '%s/%s' % (self.config.get('DIRECTORY', 'csv_spanish_cleaning_pt'), self.config.get('FILE_NAME', 'preprocessing_test_csv'))).fillna( value="") self.idf = TFIDFWordMatchShare.init_idf(train_data)
def __init__(self, config): Extractor.__init__(self, config) self.conf = ConfigParser.ConfigParser() self.conf.read(config) train_data = pd.read_csv( '%s/%s' % (self.config.get('DIRECTORY', 'csv_spanish_cleaning_pt'), self.config.get('FILE_NAME', 'preprocessing_train_merge_csv'))).fillna( value="") test_data = pd.read_csv( '%s/%s' % (self.config.get('DIRECTORY', 'csv_spanish_cleaning_pt'), self.config.get('FILE_NAME', 'preprocessing_test_csv'))).fillna( value="") self.idf = TFIDFWordMatchShare.init_idf(train_data)
def __init__(self, config_fp): Extractor.__init__(self, config_fp) #self.graph_result = Graph(config_fp).buildGraph() self.graph_result = [[['impuestos']], [['Cómo'], [ 'reporto', 'enviar', 'informar', 'reportar', 'informo' ], ['proveedor']], [['hacer', 'Cómo'], ['pedido']], [['bancaria']], [['Quiero'], ['pagar']], [['no', 'ni', 'nunca'], ['pedido']], [['Donde'], ['cupones']], [['número'], ['teléfono']], [['Recibí'], ['pedido']], [['recibí', 'recibido'], ['no', 'ni', 'nunca']], [['confiable'], ['vendedor', 'proveedor']], [['protección'], ['comprador', 'compra']], [['mi'], ['pregunta']]]
def __init__(self, config_fp, language="es"): Extractor.__init__(self, config_fp) self.language = language
def __init__(self, config_fp, n_components=50, tf=None, tf_result=None): # Use tf (raw term count) features for LDA. Extractor.__init__(self, config_fp) self.n_components = n_components self.tf, self.tf_result = tf, tf_result self.lda_model = self.init_lda()
def __init__(self): self.__featureNames = [name for (name,re) in self.DIRECT_FEATS] + self.CALCULATED_FEATS Extractor.__init__(self)
def __init__(self, config_fp): Extractor.__init__(self, config_fp) self.dul_num = self.generate_dul_num()
def __init__(self, config_fp): Extractor.__init__(self, config_fp) self.snowball_stemmer = SnowballStemmer('spanish')
def __init__(self, config_fp): Extractor.__init__(self, config_fp) self.tfidf, self.tfidf_result = self.init_tfidf()
def __init__(self, config): Extractor.__init__(self, config) self.conf = ConfigParser.ConfigParser() self.conf.read(config) self.tf, self.tf_result = self.init_tf()
def __init__(self, config_fp, language="en"): Extractor.__init__(self, config_fp) # should be modified self.language = language
def __init__(self): Extractor.__init__(self)
def __init__(self, config, jar_path, max_path_length, max_path_width, path_dict_and_name): Extractor.__init__(self, config, jar_path, max_path_length, max_path_width) self.word_to_count, self.path_to_count, _, _ = preprocess_test_batch.load_dictionaries( path_dict_and_name)
def __init__(self): self.__featureNames = utils.getStatNames("SENT_LENGTH") Extractor.__init__(self)