Пример #1
0
 def __init__(self, config_fp, distance_mode):
     Extractor.__init__(self, config_fp)
     self.feature_name += '_%s' % distance_mode
     self.valid_distance_mode = ['edit_dist', 'compression_dist']
     assert distance_mode in self.valid_distance_mode, "Wrong aggregation_mode: %s" % distance_mode
     self.distance_mode = distance_mode
     self.distance_func = getattr(DistanceUtil, self.distance_mode)
Пример #2
0
 def __init__(self):
     self.__featureNames = sorted(
         [name for (name, re) in self.DIRECT_FEATS]
         + [name for (name, re) in self.LEMMA_FEATS]
         + self.CALCULATED_FEATS
     )
     Extractor.__init__(self)
Пример #3
0
 def __init__(self):
     Extractor.__init__(self)
     self.stop = [
         line.strip()
         for line in open(PropertiesUtil.getProperty('PATH', 'stopword_fp'),
                          'r').readlines()
     ]
     self.model = None
Пример #4
0
    def __init__(self, config_fp):
        Extractor.__init__(self, config_fp)

        powerful_word_fp = '%s/%s.txt' % (self.config.get(
            'DIRECTORY',
            'devel_pt'), self.config.get('FILE_NAME', 'powerful_word_name'))
        self.pword_dict = dict(
            PowerfulWord.load_powerful_word(powerful_word_fp))
Пример #5
0
    def __init__(self, config_fp, thresh_num=0, thresh_rate=0.0):
        Extractor.__init__(self, config_fp)

        powerful_word_fp = '%s/%s.txt' % (self.config.get(
            'DIRECTORY',
            'devel_pt'), self.config.get('FILE_NAME', 'powerful_word_name'))
        self.pword = PowerfulWord.load_powerful_word(powerful_word_fp)
        self.pword_oside = PowerfulWordOneSide.init_powerful_word_oside(
            self.pword, thresh_num, thresh_rate)
Пример #6
0
 def __init__(self,
              config_fp,
              n_components=50,
              tf_idf=None,
              tf_idf_result=None):
     Extractor.__init__(self, config_fp)
     self.n_components = n_components
     self.tf_idf, self.tf_idf_result = tf_idf, tf_idf_result
     self.svd_model = self.init_svd()
Пример #7
0
 def __init__(self, config_fp):
     Extractor.__init__(self, config_fp)
     words_pt = '%s/%s' % (self.config.get('DIRECTORY', 'source_pt'),
                           self.config.get('FILE_NAME', 'words_txt'))
     self.words_dict = self.getWordsDict(words_pt=words_pt)
     print("get word2vec_dict...")
     word2vec_pt = '%s/%s' % (self.config.get('DIRECTORY', 'source_pt'),
                              self.config.get('FILE_NAME', 'wiki_es_vec'))
     self.word2vec_dict = self.getWord2VecDict(word2vec_pt)
     print("get word2vec_dict done")
Пример #8
0
 def __init__(self, config_fp):
     Extractor.__init__(self, config_fp)
     #words_pt = '%s/%s' % (self.config.get('DIRECTORY', 'source_pt'), self.config.get('FILE_NAME', 'words_txt'))
     #self.words_dict = self.getWordsDict(words_pt=words_pt)
     train_data = pd.read_csv(
         '%s/%s' %
         (self.config.get('DIRECTORY', 'csv_spanish_cleaning_pt'),
          self.config.get('FILE_NAME',
                          'preprocessing_train_merge_csv'))).fillna(
                              value="")
     test_data = pd.read_csv(
         '%s/%s' %
         (self.config.get('DIRECTORY', 'csv_spanish_cleaning_pt'),
          self.config.get('FILE_NAME', 'preprocessing_test_csv'))).fillna(
              value="")
     self.idf = TFIDFWordMatchShare.init_idf(train_data)
Пример #9
0
    def __init__(self, config):
        Extractor.__init__(self, config)
        self.conf = ConfigParser.ConfigParser()
        self.conf.read(config)

        train_data = pd.read_csv(
            '%s/%s' %
            (self.config.get('DIRECTORY', 'csv_spanish_cleaning_pt'),
             self.config.get('FILE_NAME',
                             'preprocessing_train_merge_csv'))).fillna(
                                 value="")
        test_data = pd.read_csv(
            '%s/%s' %
            (self.config.get('DIRECTORY', 'csv_spanish_cleaning_pt'),
             self.config.get('FILE_NAME', 'preprocessing_test_csv'))).fillna(
                 value="")
        self.idf = TFIDFWordMatchShare.init_idf(train_data)
Пример #10
0
 def __init__(self, config_fp):
     Extractor.__init__(self, config_fp)
     #self.graph_result = Graph(config_fp).buildGraph()
     self.graph_result = [[['impuestos']],
                          [['Cómo'],
                           [
                               'reporto', 'enviar', 'informar', 'reportar',
                               'informo'
                           ], ['proveedor']], [['hacer', 'Cómo'],
                                               ['pedido']], [['bancaria']],
                          [['Quiero'], ['pagar']],
                          [['no', 'ni', 'nunca'], ['pedido']],
                          [['Donde'], ['cupones']],
                          [['número'], ['teléfono']],
                          [['Recibí'], ['pedido']],
                          [['recibí', 'recibido'], ['no', 'ni', 'nunca']],
                          [['confiable'], ['vendedor', 'proveedor']],
                          [['protección'], ['comprador', 'compra']],
                          [['mi'], ['pregunta']]]
Пример #11
0
 def __init__(self, config_fp, language="es"):
     Extractor.__init__(self, config_fp)
     self.language = language
Пример #12
0
 def __init__(self, config_fp, n_components=50, tf=None, tf_result=None):
     # Use tf (raw term count) features for LDA.
     Extractor.__init__(self, config_fp)
     self.n_components = n_components
     self.tf, self.tf_result = tf, tf_result
     self.lda_model = self.init_lda()
Пример #13
0
 def __init__(self):
     self.__featureNames = [name for (name,re) in self.DIRECT_FEATS] + self.CALCULATED_FEATS
     Extractor.__init__(self)
Пример #14
0
 def __init__(self, config_fp):
     Extractor.__init__(self, config_fp)
     self.dul_num = self.generate_dul_num()
Пример #15
0
 def __init__(self, config_fp):
     Extractor.__init__(self, config_fp)
     self.snowball_stemmer = SnowballStemmer('spanish')
Пример #16
0
 def __init__(self, config_fp):
     Extractor.__init__(self, config_fp)
     self.tfidf, self.tfidf_result = self.init_tfidf()
Пример #17
0
 def __init__(self, config):
     Extractor.__init__(self, config)
     self.conf = ConfigParser.ConfigParser()
     self.conf.read(config)
     self.tf, self.tf_result = self.init_tf()
Пример #18
0
 def __init__(self, config_fp, language="en"):
     Extractor.__init__(self, config_fp)
     # should be modified
     self.language = language
Пример #19
0
 def __init__(self):
     Extractor.__init__(self)
 def __init__(self):
     Extractor.__init__(self)
Пример #21
0
 def __init__(self, config, jar_path, max_path_length, max_path_width,
              path_dict_and_name):
     Extractor.__init__(self, config, jar_path, max_path_length,
                        max_path_width)
     self.word_to_count, self.path_to_count, _, _ = preprocess_test_batch.load_dictionaries(
         path_dict_and_name)
Пример #22
0
 def __init__(self):
     self.__featureNames = utils.getStatNames("SENT_LENGTH")
     Extractor.__init__(self)