Python DataSet示例

编程语言: Python

命名空间/包名称: alex.components.nlg.tectotpl.tool.ml.dataset

类/类型: DataSet

hotexamples.com的示例: 6

Python DataSet - 已找到6个示例。这些是从开源项目中提取的最受好评的alex.components.nlg.tectotpl.tool.ml.dataset.DataSet现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

DataSet(3)

load_from_arff(2)

get_attrib(1)

load_from_dict(1)

load_from_vect(1)

merge(1)

rename_attrib(1)

save_to_arff(1)

subset(1)

示例#1

显示文件

文件： model.py 项目： UFAL-DSG/alex

 def __vectorize(self, data):
     """\
     Train vectorization and subsequently vectorize. Accepts a DataSet
     or a list of dictionaries to be vectorized.
     """
     # no vectorization performed, only converted to matrix
     if self.vectorizer is None:
         if not isinstance(data, DataSet):
             data_set = DataSet()
             data_set.load_from_dict(data)
             data = data_set
         data.match_headers(self.data_headers, add_values=True)
         # TODO pre-filtering here?
         return data.as_bunch(target=self.class_attr,
                              select_attrib=self.select_attr).data
     # vectorization needed: converted to dictionary
     # and passed to the vectorizer
     if isinstance(data, DataSet):
         data = data.as_dict(select_attrib=self.select_attr,
                             mask_attrib=self.class_attr)
     else:
         data = [{key: val for key, val in inst.items()
                  if key != self.class_attr and key in self.select_attr}
                 for inst in data]
     # pre-filter attributes if filter_attr is set
     if self.filter_attr:
         data = [{key: val for key, val in inst.items()
                  if self.filter_attr(key, val)} for inst in data]
     if not self.vectorizer_trained:
         self.vectorizer.fit(data)
         self.vectorizer_trained = True
     return self.vectorizer.transform(data).tocsr()

示例#2

显示文件

文件： model.py 项目： UFAL-DSG/alex

 def load_training_set(self, filename, encoding='UTF-8'):
     """\
     Load the given training data set into memory and strip it if
     configured to via the train_part parameter.
     """
     log_info('Loading training data set from ' + str(filename) + '...')
     train = DataSet()
     train.load_from_arff(filename, encoding)
     if self.train_part < 1:
         train = train.subset(0, int(round(self.train_part * len(train))),
                              copy=False)
     return train

示例#3

显示文件

 def load_training_set(self, filename, encoding='UTF-8'):
     """\
     Load the given training data set into memory and strip it if
     configured to via the train_part parameter.
     """
     log_info('Loading training data set from ' + str(filename) + '...')
     train = DataSet()
     train.load_from_arff(filename, encoding)
     if self.train_part < 1:
         train = train.subset(0,
                              int(round(self.train_part * len(train))),
                              copy=False)
     return train

示例#4

显示文件

 def __vectorize(self, data):
     """\
     Train vectorization and subsequently vectorize. Accepts a DataSet
     or a list of dictionaries to be vectorized.
     """
     # no vectorization performed, only converted to matrix
     if self.vectorizer is None:
         if not isinstance(data, DataSet):
             data_set = DataSet()
             data_set.load_from_dict(data)
             data = data_set
         data.match_headers(self.data_headers, add_values=True)
         # TODO pre-filtering here?
         return data.as_bunch(target=self.class_attr,
                              select_attrib=self.select_attr).data
     # vectorization needed: converted to dictionary
     # and passed to the vectorizer
     if isinstance(data, DataSet):
         data = data.as_dict(select_attrib=self.select_attr,
                             mask_attrib=self.class_attr)
     else:
         data = [{
             key: val
             for key, val in inst.items()
             if key != self.class_attr and key in self.select_attr
         } for inst in data]
     # pre-filter attributes if filter_attr is set
     if self.filter_attr:
         data = [{
             key: val
             for key, val in inst.items() if self.filter_attr(key, val)
         } for inst in data]
     if not self.vectorizer_trained:
         self.vectorizer.fit(data)
         self.vectorizer_trained = True
     return self.vectorizer.transform(data).tocsr()

示例#5

显示文件

文件： model.py 项目： UFAL-DSG/alex

 def evaluate(self, test_file, encoding='UTF-8', classif_file=None):
     """\
     Evaluate on the given test data file. Return accuracy.
     If classif_file is set, save the classification results to this file.
     """
     test = DataSet()
     test.load_from_arff(test_file, encoding)
     values = self.classify(test)
     golden = self.get_classes(test, dtype=None)
     if classif_file is not None:
         classif = DataSet()
         classif.load_from_vect(test.get_attrib(self.class_attr), values)
         classif.rename_attrib(self.class_attr, self.PREDICTED)
         test.merge(classif)
         test.save_to_arff(classif_file, encoding)
     return zero_one_score(golden, values)

示例#6

显示文件

 def evaluate(self, test_file, encoding='UTF-8', classif_file=None):
     """\
     Evaluate on the given test data file. Return accuracy.
     If classif_file is set, save the classification results to this file.
     """
     test = DataSet()
     test.load_from_arff(test_file, encoding)
     values = self.classify(test)
     golden = self.get_classes(test, dtype=None)
     if classif_file is not None:
         classif = DataSet()
         classif.load_from_vect(test.get_attrib(self.class_attr), values)
         classif.rename_attrib(self.class_attr, self.PREDICTED)
         test.merge(classif)
         test.save_to_arff(classif_file, encoding)
     return zero_one_score(golden, values)