def getNbrFeatures(self, *filenames): ''' Get the number of features directly from the data file (in case we do not have an info file)''' if 'feat_num' not in self.info.keys(): self.getFormatData(filenames[0]) if self.info['format'] == 'dense': data = data_converter.file_to_array(filenames[0]) self.info['feat_num'] = len(data[0]) elif self.info['format'] == 'sparse': self.info['feat_num'] = 0 for filename in filenames: sparse_list = data_converter.sparse_file_to_sparse_list( filename) last_column = [ sparse_list[i][-1] for i in range(len(sparse_list)) ] last_column_feature = [a for (a, b) in last_column] self.info['feat_num'] = max(self.info['feat_num'], max(last_column_feature)) elif self.info['format'] == 'sparse_binary': self.info['feat_num'] = 0 for filename in filenames: data = data_converter.file_to_array(filename) last_column = [int(data[i][-1]) for i in range(len(data))] self.info['feat_num'] = max(self.info['feat_num'], max(last_column)) return self.info['feat_num']
def data_sparse(filename, feat_type): ''' This function takes as argument a file representing a sparse matrix sparse_matrix[i][j] = "a:b" means matrix[i][a] = b It converts it into a numpy array, using sparse_list_to_array function, and returns this array''' sparse_list = data_converter.sparse_file_to_sparse_list(filename) return data_converter.sparse_list_to_csr_sparse(sparse_list, len(feat_type))
def getNbrFeatures (self, *filenames): ''' Get the number of features directly from the data file (in case we do not have an info file)''' if 'feat_num' not in self.info.keys(): self.getFormatData(filenames[0]) if self.info['format'] == 'dense': data = data_converter.file_to_array(filenames[0]) self.info['feat_num'] = len(data[0]) elif self.info['format'] == 'sparse': self.info['feat_num'] = 0 for filename in filenames: sparse_list = data_converter.sparse_file_to_sparse_list (filename) last_column = [sparse_list[i][-1] for i in range(len(sparse_list))] last_column_feature = [a for (a,b) in last_column] self.info['feat_num'] = max(self.info['feat_num'], max(last_column_feature)) elif self.info['format'] == 'sparse_binary': self.info['feat_num'] = 0 for filename in filenames: data = data_converter.file_to_array (filename) last_column = [int(data[i][-1]) for i in range(len(data))] self.info['feat_num'] = max(self.info['feat_num'], max(last_column)) return self.info['feat_num']
def data_sparse (filename, nbr_features): ''' This function takes as argument a file representing a sparse matrix sparse_matrix[i][j] = "a:b" means matrix[i][a] = b It converts it into a numpy array, using sparse_list_to_array function, and returns this array''' sparse_list = data_converter.sparse_file_to_sparse_list(filename) return data_converter.sparse_list_to_csr_sparse (sparse_list, nbr_features)