Пример #1
0
	def getTypeProblem (self, solution_filename):
     		''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
		if 'task' not in self.info.keys():
			solution = np.array(data_converter.file_to_array(solution_filename))
			target_num = solution.shape[1]
			self.info['target_num']=target_num
			if target_num == 1: # if we have only one column
				solution = np.ravel(solution) # flatten
				nbr_unique_values = len(np.unique(solution))
				if nbr_unique_values < len(solution)/8:
					# Classification
					self.info['label_num'] = nbr_unique_values
					if nbr_unique_values == 2:
						self.info['task'] = 'binary.classification'
						self.info['target_type'] = 'Binary'
					else:
						self.info['task'] = 'multiclass.classification'
						self.info['target_type'] = 'Categorical'
				else:
					# Regression
					self.info['label_num'] = 0
					self.info['task'] = 'regression'
					self.info['target_type'] = 'Numerical'     
			else:
				# Multilabel or multiclass       
				self.info['label_num'] = target_num
				self.info['target_type'] = 'Binary' 
				if any(item > 1 for item in map(np.sum,solution.astype(int))):
					self.info['task'] = 'multilabel.classification'     
				else:
					self.info['task'] = 'multiclass.classification'        
		return self.info['task']
		
		
Пример #2
0
	def getFormatData(self,filename):
		''' Get the data format directly from the data file (in case we do not have an info file)'''
		if 'format' in self.info.keys():
			return self.info['format']
		if 'is_sparse' in self.info.keys():
			if self.info['is_sparse'] == 0:
				self.info['format'] = 'dense'
			else:
				data = data_converter.read_first_line (filename)
				if ':' in data[0]:
					self.info['format'] = 'sparse'
				else:
					self.info['format'] = 'sparse_binary'
		else:
			data = data_converter.file_to_array (filename)
			if ':' in data[0][0]:
				self.info['is_sparse'] = 1
				self.info['format'] = 'sparse'
			else:
				nbr_columns = len(data[0])
				for row in range (len(data)):
					if len(data[row]) != nbr_columns:
						self.info['format'] = 'sparse_binary'
				if 'format' not in self.info.keys():
					self.info['format'] = 'dense'
					self.info['is_sparse'] = 0			
		return self.info['format']
Пример #3
0
	def getNbrFeatures (self, *filenames):
		''' Get the number of features directly from the data file (in case we do not have an info file)'''
		if 'feat_num' not in self.info.keys():
			self.getFormatData(filenames[0])
			if self.info['format'] == 'dense':
				data = data_converter.file_to_array(filenames[0])
				self.info['feat_num'] = len(data[0])
			elif self.info['format'] == 'sparse':
				self.info['feat_num'] = 0
				for filename in filenames:
					sparse_list = data_converter.sparse_file_to_sparse_list (filename)
					last_column = [sparse_list[i][-1] for i in range(len(sparse_list))]
					last_column_feature = [a for (a,b) in last_column]
					self.info['feat_num'] = max(self.info['feat_num'], max(last_column_feature))				
			elif self.info['format'] == 'sparse_binary':
				self.info['feat_num'] = 0
				for filename in filenames:
					data = data_converter.file_to_array (filename)
					last_column = [int(data[i][-1]) for i in range(len(data))]
					self.info['feat_num'] = max(self.info['feat_num'], max(last_column))			
		return self.info['feat_num']
Пример #4
0
 def loadType(self, filename, verbose=True):
     ''' Get the variable types'''
     if verbose: print("========= Reading " + filename)
     start = time.time()
     type_list = []
     if os.path.isfile(filename):
         type_list = data_converter.file_to_array(filename, verbose=False)
     else:
         n = self.info['feat_num']
         type_list = [self.info['feat_type']] * n
     type_list = np.array(type_list).ravel()
     end = time.time()
     if verbose: print("[+] Success in %5.2f sec" % (end - start))
     return type_list
Пример #5
0
def data_binary_sparse (filename, nbr_features):	
	''' This function takes as an argument a file representing a binary sparse matrix
	binary_sparse_matrix[i][j] = a means matrix[i][j] = 1
	It converts it into a numpy array an returns this array. '''
	
	data = data_converter.file_to_array (filename)
	nbr_samples = len(data)
	dok_sparse = dok_matrix ((nbr_samples, nbr_features)) # the construction is easier w/ dok_sparse
	print ("Converting {} to dok sparse matrix".format(filename))
	for row in range (nbr_samples):
		for feature in data[row]:
			dok_sparse[row, int(feature)-1] = 1
	print ("Converting {} to csr sparse matrix".format(filename))
	return dok_sparse.tocsr()
Пример #6
0
	def loadType (self, filename, verbose=True):
		''' Get the variable types'''
		if verbose:  print("========= Reading " + filename)
		start = time.time()
		type_list = []
		if os.path.isfile(filename):
			type_list = data_converter.file_to_array (filename, verbose=False)
		else:
			n=self.info['feat_num']
			type_list = [self.info['feat_type']]*n
		type_list = np.array(type_list).ravel()
		end = time.time()
		if verbose:  print( "[+] Success in %5.2f sec" % (end - start))
		return type_list
Пример #7
0
def data_binary_sparse (filename, nbr_features):	
	''' This function takes as an argument a file representing a binary sparse matrix
	binary_sparse_matrix[i][j] = a means matrix[i][j] = 1
	It converts it into a numpy array an returns this array. '''
	
	data = data_converter.file_to_array (filename)
	nbr_samples = len(data)
	dok_sparse = dok_matrix ((nbr_samples, nbr_features)) # the construction is easier w/ dok_sparse
	print ("Converting {} to dok sparse matrix".format(filename))
	for row in range (nbr_samples):
		for feature in data[row]:
			dok_sparse[row, int(feature)-1] = 1
	print ("Converting {} to csr sparse matrix".format(filename))
	return dok_sparse.tocsr()
Пример #8
0
def data(filename, nbr_features=None, verbose = False):
	''' The 2nd parameter makes possible a using of the 3 functions of data reading (data, data_sparse, data_binary_sparse) without changing parameters'''
	if verbose: print (np.array(data_converter.file_to_array(filename)))
	return np.array(data_converter.file_to_array(filename), dtype=float)
Пример #9
0
def data(filename, nbr_features=None, verbose = False):
    ''' The 2nd parameter makes possible a using of the 3 functions of data reading (data, data_sparse, data_binary_sparse) without changing parameters'''
    if verbose: print (np.array(data_converter.file_to_array(filename)))
    return np.array(data_converter.file_to_array(filename), dtype=float)