def data_splitting():
    split_type = "general"  #"all" or "general"
    test_percent = 0.2
    type_labeling = "per_group"  #"per_group" or "per_spp"
    raw_data_file = os.path.join(results_dir, "counts_per_animal.pkl")
    counts_data = rws.loadData(raw_data_file)
    splitted = splitting.split_data(counts_data, split_type, test_percent,
                                    type_labeling)
    splitted_data_file = os.path.join(
        partitions_dir, "split_{}_{}_{}.pkl".format(split_type,
                                                    str(test_percent),
                                                    type_labeling))
    rws.saveData(splitted, splitted_data_file)
def train_model(type_crossval="k-fold", type_classif="qda"):
	""" Function to train a classifier depending on:
	type_crossval: Type of cross validation iterator
	- "k-fold": make just homogeneus k-fold
	- "shuffleSplit": Add a shuffle representation of k-fold
	- "groups_k-fold": Generates cross-validation depending on some groups
	- "L_P_Groups_out": Leave P groups Out
	tyoe_classif: Type of classifier use to train the model
	- "qda"
	- "rf 
	- "svc" """
	split_type="general" #"all" or "general"
	test_percent=0.2
	type_labeling="per_group" #"per_group" or "per_spp"
	splitted_data_file = os.path.join(partitions_dir,
		"split_{}_{}_{}.pkl".format(split_type,str(test_percent),type_labeling))
	train_dict, test_dict = rws.loadData(splitted_data_file)

	train_data = train_dict['data']
	train_labels = train_dict['labels']

	cross_val_test_size=0.2

	if type_classif == 'qda':
		kwargs = {}

	elif type_classif == 'rf':
		kwargs = {"n_trees" : 100, "boots" : True}

	elif type_classif == 'svc':
		kwargs = {"c" : 1.0, "kernel_type" : 'linear', "gamma_value" : 'scale'}

	else:
		raise ValueError("Select an aproppriate type of classifier")

	if split_type == 'all':

		num_splits = 5
		if type_crossval == 'k-fold':
			train_results = cv.k_fold_iter(train_data, train_labels, num_ss=num_splits, 
				clf=type_classif, **kwargs)

		elif type_crossval == 'shuffleSplit':
			test_size = cross_val_test_size
			train_results = cv.shuffle_split_iter(train_data, train_labels, num_ss=5, 
				t_size=test_size, clf=type_classif, **kwargs)

		else:
			raise ValueError("If split_type: 'all'," +
				" you have to choose type_crossval: k-fold or shuffleSplit")

	if split_type == "general":

		s_labels = train_dict['s_labs']
		train_grp_animals = train_dict['train_grps']
		n_groups = round(((1-test_percent) * 10)/2) 
		if type_crossval == 'groups_k-fold':
			train_results = cv.groups_k_fold_iter(train_data, train_labels, s_labels, 
				train_grp_animals, num_groups=n_groups, clf=type_classif, **kwargs)

		elif type_crossval == 'L_P_Groups_out':
			n_groups = 2
			train_results = leave_P_out_iter(train_data, train_labels, s_labels, train_grp_animals, 
				num_groups=n_groups, clf=type_classif, **kwargs)

		else:
			raise ValueError("If split_type: 'general'," + 
			 " you have to choose type_crossval: groups_k-fold or L_P_Groups_out")

	print(len(train_results))
	print(train_results[0][1].shape)
	print(train_results[0][2].shape)
	print(train_results[0][2])
	if len(train_results[0]) == 5:
		print(train_results[0][3].shape)
		print(train_results[0][3])
		print(train_results[0][4])
	else:
		print(train_results[0][3])

	acc_values = np.zeros(len(train_results))
	for i in range(len(train_results)):
		if len(train_results[0]) == 5:
			acc_values[i] = train_results[i][4]
		else:
			acc_values[i] = train_results[i][3]

	print(np.mean(acc_values))

	get_metrics(train_results, type_crossval, type_classif, **kwargs)