def __init__(self, nome, diretorio, nomeExibir=None): self.diretorio = diretorio self.nome = nome if nomeExibir is None: self.nomeExibir = nome else: self.nomeExibir = nomeExibir self.reader = DataUtils() self.imagens = self.reader.obterImagens(self.diretorio)
def train_threaded(self, thread_number, xtrain, ytrain): try: xtrain_shuffled, ytrain_shuffled = DataUtils.shuffle_data(xtrain, ytrain) weight = self.weights[thread_number] loss = [] accuracy = [] learning_rate = self.learning_rate last_checkpoint = 0 for i in range(self.examples_per_thread): itemx, itemy = xtrain_shuffled[i], ytrain_shuffled[i] prediction = self.predict(itemx, weight) if itemy * prediction < 1: weight -= learning_rate * self.hinge_gradient(weight, itemx, itemy, self.regularization) if self.collect_data: current_percentage = (thread_number * self.examples_per_thread + i) / (xtrain_shuffled.shape[0] / 100) if current_percentage != last_checkpoint and current_percentage % 5 == 0: last_checkpoint = current_percentage loss.append(self.loss_function(xtrain, ytrain, weight)) accuracy.append(self.accuracy_function(xtrain, ytrain, weight)) else: if i == self.examples_per_thread - 1: loss.append(self.loss_function(xtrain, ytrain, weight)) accuracy.append(self.accuracy_function(xtrain, ytrain, weight)) self.weights[thread_number] = weight self.losses[thread_number] = loss self.accuracies[thread_number] = accuracy except Exception as e: print(e)
def transform(self, X): numerical_data = DataUtils.get_numerical_data(X) full_numerical_data = self.numeric_imputer.transform(numerical_data) # Safe remove columns which are all empty successfully_imputed_columns = numerical_data.columns[ ~np.isnan(self.numeric_imputer.statistics_)] full_numerical_data = pd.DataFrame( full_numerical_data, columns=successfully_imputed_columns) # Drop columns not in fit self.successfully_imputed_columns columns_to_drop = [ c for c in full_numerical_data.columns if c not in self.successfully_imputed_columns ] if len(columns_to_drop) > 0: full_numerical_data = full_numerical_data.drop(columns_to_drop, axis=1, errors='ignore') # Add columns in self.successfully_imputed_columns and not in current columns_to_add = [ c for c in self.successfully_imputed_columns if c not in full_numerical_data.columns ] if len(columns_to_add) > 0: full_numerical_data = full_numerical_data.reindex(columns=[ *full_numerical_data.columns.tolist(), *columns_to_add ], fill_value=0) return full_numerical_data
def main(): X, y = getXandY(DataUtils('data', 'input.txt').get_data()) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42 ) text_clf = get_pipeline() text_clf.fit(X_train, y_train) predicted = text_clf.predict(X_test) accuracy = np.mean(predicted == y_test) print('SGD Classifier Accuracy: ' + str(accuracy)) dump(text_clf, 'data/clf.joblib') parameters = { 'vect__ngram_range': [(1, 1), (1, 2)], 'tfidf__use_idf': (True, False), 'clf__alpha': (1e-2, 1e-3), 'clf__loss': ('hinge', 'log', 'squared_hinge', 'perceptron'), } gs_clf = GridSearchCV(text_clf, parameters, cv=5, n_jobs=-1) gs_clf.fit(X_train, y_train) gs_predicted = gs_clf.predict(X_test) print('GridSearch Accuracy: ' + str(np.mean(gs_predicted == y_test))) dump(gs_clf, 'data/gs_clf.joblib')
def fit(self, X): numerical_data = DataUtils.get_numerical_data(X) full_numerical_data = self.numeric_imputer.fit_transform( numerical_data) self.successfully_imputed_columns = numerical_data.columns[ ~np.isnan(self.numeric_imputer.statistics_)] # Return fit object return self
def main(): du = DataUtils() du.start_session() usertrends = [] try: usertrends = du.get_usertrends() for ut in usertrends: usertrend_id = ut.usertrend_id ntwts = du.get_num_tweets(usertrend_id) print ut.usertrend_id, ntwts finally: du.close_session()
def fit(self, X): categorical_data = DataUtils.get_categorical_data(X) # Categorical mode imputer self.frequent_vals = pd.Series([ categorical_data[c].value_counts().index[0] for c in categorical_data ], index=categorical_data.columns) # Return fit object return self
def main(filename, train_size): w2v = load_w2v() dataset = DataUtils.load_dataset(filename, w2v) train_len = int(len(dataset) * train_size) test_len = len(dataset) - train_len train_set, test_set = random_split(dataset, [train_len, test_len]) net_model = Siamese(batch_size=1, output_size=5, hidden_size=hidden_layer, vocab_size=len(w2v.wv.vocab), embedding_length=embedded_dim, weights=w2v.wv.vectors) train_dataloader = DataLoader(train_set, batch_size=1, shuffle=True) test_dataloader = DataLoader(test_set,batch_size=1,shuffle=True) iterate_model(net_model, train_dataloader, test_dataloader)
def run_problem_two_experiment(self): self.frozen_lake_jumbo_env = gym.make('FrozenLakeJumbo-v0') policy, V, iterations, theta, avg_deltas = self.value_iteration( env=self.frozen_lake_jumbo_env, discount_factor=self.discount_factor, theta=self.convergence_threshold) score = self.evaluate_policy(self.frozen_lake_jumbo_env, policy, self.discount_factor) print( "Value Iteration converged on frozen lake problem -> \n\titerations to converge: " + str(iterations) + "\n\tconergence threshold: " + str(theta) + "\n\tdiscount factor: " + str(self.discount_factor) + "\n\t100 game avg score: " + str(score) + "\n") DataUtils.write_convergence_diffs( DataUtils.get_results_directory_name() + "/lake-value-iter-gamma-" + str(self.discount_factor) + ".csv", avg_deltas)
def run_problem_two_experiment(self): self.frozen_lake_jumbo_env = gym.make('FrozenLakeJumbo-v0') q_table, iterations, avg_deltas = self.q_learning( self.frozen_lake_jumbo_env, 2000000) score = self.evaluate_policy(env=self.frozen_lake_jumbo_env, q_table=q_table, gamma=self.discount_factor) print( "Q-Learning converged on frozen lake problem -> \n\titerations to converge: " + str(iterations) + "\n\tconvergence threshold: " + str(self.convergence_threshold) + "\n\tdiscount factor: " + str(self.discount_factor) + "\n\t100 game avg score: " + str(score) + "\n") DataUtils.write_convergence_diffs( DataUtils.get_results_directory_name() + "/lake-q-learning-gamma-" + str(self.discount_factor) + ".csv", avg_deltas)
def run_problem_one_experiment(self): self.taxi_v2_env = gym.make('Taxi-v2') self.taxi_v2_env._max_episode_seconds = 999999999 q_table, iterations, avg_deltas = self.q_learning(self.taxi_v2_env) score = self.evaluate_policy(env=self.taxi_v2_env, q_table=q_table, gamma=self.discount_factor) print( "Q-Learning converged on taxi problem -> \n\titerations to converge: " + str(iterations) + "\n\tconvergence threshold: " + str(self.convergence_threshold) + "\n\tdiscount factor: " + str(self.discount_factor) + "\n\t100 game avg score: " + str(score) + "\n") DataUtils.write_convergence_diffs( DataUtils.get_results_directory_name() + "/taxi-q-learning-gamma-" + str(self.discount_factor) + ".csv", avg_deltas)
def classify_custom(text_samples, author_name, test): X, y = getXandY(DataUtils('data', 'input.txt').get_data()) y = list(y) X.extend(text_samples) y.extend([author_name for sample in text_samples]) X_train, y_train = shuffle(X, y) clf = get_pipeline() clf.fit(X_train, y_train) predicted = clf.predict_proba([test])[0] return list(zip(list(clf.classes_), list(predicted)))
def main(): data, labels, idx2char, unique_chars, char2idx = DataUtils.character_encoding( './Dataset/lyrics15LIN.csv', 'Country', max_vec_len, step) num_of_chars = len(unique_chars) model = Sequential() model.add(LSTM(128, input_shape=(max_vec_len, num_of_chars))) model.add(Dense(num_of_chars)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(lr=0.001)) model.fit(data, labels, batch_size=128, epochs=epochs) model.save('./Dataset/15k-30epoch') predict(model, 'country road take me', char2idx, idx2char, unique_chars)
def run_problem_one_experiment(self): self.taxi_v2_env = gym.make('Taxi-v2') self.taxi_v2_env._max_episode_seconds = 999999999 policy, V, iterations, theta, avg_deltas = self.value_iteration( env=self.taxi_v2_env.env, discount_factor=self.discount_factor, theta=self.convergence_threshold) score = self.evaluate_policy(self.taxi_v2_env, policy, self.discount_factor) print( "Value Iteration converged on taxi problem -> \n\titerations to converge: " + str(iterations) + "\n\tconvergence threshold: " + str(theta) + "\n\tdiscount factor: " + str(self.discount_factor) + "\n\t100 game avg score: " + str(score) + "\n") DataUtils.write_convergence_diffs( DataUtils.get_results_directory_name() + "/taxi-value-iter-gamma-" + str(self.discount_factor) + ".csv", avg_deltas)
def predict(model, seed, char2idx, idx2char, unique_chars): pattern = DataUtils.translator(unique_chars, seed, char2idx) res = '' + seed for i in range(word_count): x = np.reshape(pattern, (1, len(pattern), len(unique_chars))) prediction = model.predict(x, verbose=0) index = np.argmax(prediction) result = idx2char[index] res += result seq = np.zeros((1, len(unique_chars)), dtype=bool) seq[0, index] = 1 pattern = np.concatenate((pattern, seq)) pattern = pattern[1:] print(res)
def fit(self, X, y=None): categorical_data = DataUtils.get_categorical_data(X) # Drop categorical with a lot of categories cat_sizes = pd.Series([ categorical_data[c].value_counts().size for c in categorical_data ], index=categorical_data.columns) sparse_categories = \ cat_sizes.loc[cat_sizes > self.max_categories_in_single_variable] skewed_categories = \ cat_sizes.loc[cat_sizes < self.min_categories_in_single_variable] self.categorical_variables_to_remove = sparse_categories.append( skewed_categories) # Return fit object return self
def train(self, xtrain, ytrain): """ Calculates the average gradient for a given batch and updates the weights with these averages after the batch has been processed. """ self.weight = np.zeros(xtrain.shape[1]) learning_rate = self.learning_rate start = datetime.now() # runtime losses, accuracies = [], [] for epoch in range(self.epoch_count): learning_rate /= np.sqrt(epoch + 1) # adaptive learning rate xtrain, ytrain = DataUtils.shuffle_data(xtrain, ytrain) # Loops through the batches for i in range(int(len(ytrain) / self.batch_size)): batch_start = i * self.batch_size batch_end = (i + 1) * self.batch_size # Sums up the gradients of the incorrectly classified samples # or the samples that lie within the margin grad = 0 for sample, label in zip(xtrain[batch_start:batch_end], ytrain[batch_start:batch_end]): prediction = self.predict(sample) if label * prediction < 1: # either within margin or incorrectly classified grad += self.hinge_gradient(self.weight, sample, label, self.regularization) # Weights are updated with average gradients after batch is completed self.weight -= learning_rate * grad / self.batch_size # Losses & accuracies after one epoch. We always need latest value if self.collect_data or epoch == self.epoch_count - 1: losses.append(self.loss_function(xtrain, ytrain, self.weight)) accuracies.append( self.accuracy_function(xtrain, ytrain, self.weight)) self.runtime = datetime.now() - start print(f"Finished in {self.runtime}") return np.array(losses), np.array(accuracies)
import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn.decomposition import PCA from DataUtils import DataUtils from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from sklearn.cluster import KMeans from classify import get_pipeline from sklearn.pipeline import Pipeline np.random.seed(5) getXandY = lambda d: (list(map(lambda x: x[0], d[:, :-1])), d[:, len(d[0]) - 1]) X, y = getXandY(np.array(DataUtils('data', 'input.txt').get_data())) pipeline = Pipeline([ ('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ]) X_ = pipeline.fit_transform(X).todense() fig = plt.figure() ax = Axes3D(fig) pca = PCA(n_components=3).fit(X_) data3D = pca.transform(X_) from joblib import load
def __init__(self, data_set_file_name): self.data_set_file_name = data_set_file_name self.data_set = DataUtils.load_data_to_nd_array(data_set_file_name)
def add_all_sample_tweets(): sample_size = 100 du = DataUtils() du.start_session() try: usertrends = du.get_usertrends() usertrend_ids = map(lambda u: u.usertrend_id, usertrends) for uid in usertrend_ids: if du.has_usertrend_for_uttl(uid): continue num_tweets = du.get_num_tweets(uid) if num_tweets < 1000: continue sample_tweet_ids = du.get_sample_tweet_ids(uid, sample_size) tweets = du.get_tweets(sample_tweet_ids) uttls = map(lambda tweet: UserTrendTweetLabel( usertrend_id=uid, tweet_id=tweet.id, text=tweet.text), tweets) du.add_usertrendtweetlabels(uttls) finally: du.close_session()
# define paths to FBA dataset and FBA annotations # NEED TO EDIT THE PATH HERE IF USING ON A DIFFERENT COMPUTER if sys.version_info[0] < 3: PATH_FBA_ANNO = '/Data/FBA2013/' PATH_FBA_AUDIO = '/Data/FBA2013data/' else: PATH_FBA_ANNO = '/home/apati/FBA2013/' PATH_FBA_AUDIO = '/home/apati/FBA2013/' # create data holder perf_assessment_data = [] req_audio = False # instantiate the data utils object for different instruments and create the data INSTRUMENT = 'Alto Saxophone' utils = DataUtils(PATH_FBA_ANNO, PATH_FBA_AUDIO, BAND, INSTRUMENT) for year in YEAR: perf_assessment_data += utils.create_data(year, SEGMENT, audio=req_audio) INSTRUMENT = 'Bb Clarinet' utils = DataUtils(PATH_FBA_ANNO, PATH_FBA_AUDIO, BAND, INSTRUMENT) for year in YEAR: perf_assessment_data += utils.create_data(year, SEGMENT, audio=req_audio) INSTRUMENT = 'Flute' utils = DataUtils(PATH_FBA_ANNO, PATH_FBA_AUDIO, BAND, INSTRUMENT) for year in YEAR: perf_assessment_data += utils.create_data(year, SEGMENT, audio=req_audio) print(len(perf_assessment_data))
def __init__(self, data_set_file_name): self.data_set_file_name = data_set_file_name self.data_set = DataUtils.load_data_to_nd_array(data_set_file_name) self.data_set_feature_labels = DataUtils.load_feature_labels_from_file(data_set_file_name)
def main(): data_utils = DataUtils() # load the data from CSV file to pandas DataFrame (start from the project root directory) housing_data_frame = data_utils.load_csv_to_pandas_df( os.path.join("dataset", "housing", "housing.csv")) # Visualization of the data - geographically (by lat lang) DataVisualizationUtils.scatter_plot( housing_data_frame, "median_house_value", x_name="longitude", y_name="latitude", circle_radius=housing_data_frame["population"] / 100, label="population size (Expressed by the circle radius)") # cross correlation matrix DataVisualizationUtils.cross_correlation_matrix(housing_data_frame) # scatter plot matrix attribute_scatter_plot_matrix = [ "median_house_value", "median_income", "total_rooms", "housing_median_age" ] DataVisualizationUtils.scatter_plot_matrix(housing_data_frame, attribute_scatter_plot_matrix) # Create more relevant/make sense new columns from the data which help us to predict # TODO: NEED TO MAKE IT AN ATTRIBUTE TRANSFORM housing_data_frame["rooms_per_household"] = housing_data_frame[ "total_rooms"] / housing_data_frame["households"] housing_data_frame["bedrooms_per_rooms"] = housing_data_frame[ "total_bedrooms"] / housing_data_frame["total_rooms"] housing_data_frame["population_per_household"] = housing_data_frame[ "population"] / housing_data_frame["households"] # By re-examination of the correlation matrix, we can see that we created new features that more correlated with # house prices. DataVisualizationUtils.cross_correlation_vector(housing_data_frame, "median_house_value") # split to train set and test set using stratified sampling test_data_frame, train_data_frame = data_utils.split_test_train_set_by_stratified_sampling( housing_data_frame, "median_income", [0., 1.5, 3., 4.5, 6., np.inf]) # create label data frame train_labels_data_frame = data_utils.copy_and_drop_column( train_data_frame, "median_house_value") test_labels_data_frame = data_utils.copy_and_drop_column( test_data_frame, "median_house_value") # Prapare the data for ML algorithm numerical_pipeline = Pipeline([ ('imputer', SimpleImputer(strategy="median")), ('std_scalar', StandardScaler()), ]) numerical_attribute = list(test_data_frame.columns) numerical_attribute.remove("ocean_proximity") categorical_attribute = ["ocean_proximity"] full_pipeline = ColumnTransformer([ ('numerical', numerical_pipeline, numerical_attribute), ('categorical', OneHotEncoder(), categorical_attribute) ]) housing_prepared = full_pipeline.fit_transform(housing_data_frame) housing_train_data_prepared = housing_prepared.take(train_data_frame.index, axis=0) housing_test_data_prepared = housing_prepared.take(test_data_frame.index, axis=0) # training the algorithm linear_regression = LinearRegression() linear_regression.fit(housing_train_data_prepared, train_labels_data_frame) prediction_result_linear_regression = linear_regression.predict( housing_test_data_prepared) mean_square_error_linear_regression = np.sqrt( mean_squared_error(prediction_result_linear_regression, test_labels_data_frame)) DataVisualizationUtils.print_with_title( "Linear Regression MSE", mean_square_error_linear_regression) # Trying decision tree model in case our data contain alot of non-linear correlation between the features tree_regression = DecisionTreeRegressor() tree_regression.fit(housing_train_data_prepared, train_labels_data_frame) prediction_result_decision_tree = tree_regression.predict( housing_test_data_prepared) mean_square_error_decision_tree = np.sqrt( mean_squared_error(prediction_result_decision_tree, test_labels_data_frame)) DataVisualizationUtils.print_with_title("Decision Tree MSE", mean_square_error_decision_tree) # Trying cross validation scores = cross_val_score(DecisionTreeRegressor(), housing_train_data_prepared, train_labels_data_frame, scoring="neg_mean_squared_error", cv=10) scores = np.sqrt( -scores ) # sklearn uses utility function rather than cost function so the results are negative. # print(scores); DataVisualizationUtils.print_with_title( "Decision Tree Cross Validation MSE", scores.mean()) # print(scores.std()); # 2566.8761488982286 # Trying random forest random_forest = RandomForestRegressor() random_forest.fit(housing_train_data_prepared, train_labels_data_frame.values.ravel()) prediction_result_random_forest = random_forest.predict( housing_test_data_prepared) mean_square_error_random_forest = np.sqrt( mean_squared_error(prediction_result_random_forest, test_labels_data_frame)) DataVisualizationUtils.print_with_title("Rnadom Forest MSE", mean_square_error_random_forest) # Trying grid search param_grid_search = [{ 'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6, 8] }, { 'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4] }] random_forest_grid_search = RandomForestRegressor() grid_search_results = GridSearchCV(random_forest_grid_search, param_grid_search, cv=5, scoring="neg_mean_squared_error", return_train_score=True) grid_search_results.fit(housing_train_data_prepared, train_labels_data_frame.values.ravel()) cv_results = grid_search_results.cv_results_ DataVisualizationUtils.print_with_title( "Rnadom Forest with Grid Search MSE", "") for mean_score, param in zip(cv_results['mean_test_score'], cv_results['params']): print(np.sqrt(-mean_score), param)
q_learning_exp.run_problem_one_experiment() def run_q_learning_on_problem_two(): q_learning_exp = QLearningExperiment() q_learning_exp.run_problem_two_experiment() if __name__ == "__main__": print("Application running...\n") # Problem one is the small state space problem and problem two is the large state space problem. # Make sure that the 'Results' directory is present to save output files in. if not os.path.isdir(DataUtils.get_results_directory_name()): os.mkdir(DataUtils.get_results_directory_name()) # Run value iteration on the first problem. run_value_iteration_on_problem_one() # Run policy iteration on the first problem. #run_policy_iteration_on_problem_one() # Run q learning on the first problem. #run_q_learning_on_problem_one() # Register the custom jumbo lake environment with open ai gym. gym.envs.registration.register( id='FrozenLakeJumbo-v0', entry_point='frozen_lake_jumbo:FrozenLakeJumboEnv',
class DataLoader(object): def __init__(self, usertrend_id=None): DB_NAME = "sqlite:///../data/db.sqlite" self.du = DataUtils(DB_NAME) #self.utid = usertrend_id def get_sample_tweets_from_usertrendid(usertrend_id, sample_size=1000): tweet_ids = du.get_sample_tweet_ids(usertrend_id, sample_size) tweets = self.du.get_tweets_from_tweetids(tweet_ids) return tweets def label_generator(self, usertrend_id, sample_size=1000, limit=150): du = self.du du.start_session() if du.has_usertrend_for_uttl(usertrend_id): print "{} already has tweet labels".format(usertrend_id) return usertrend = du.get_usertrend(usertrend_id) user_id = usertrend.user_id user = du.get_user(user_id) bio = user.bio headlines = usertrend.related_queries #user_id = remove_non_ascii(user_id) #bio = remove_non_ascii(bio) #headlines = remove_non_ascii(headlines) label_question = 'Would the following tweets offend to the mentioned user: {}\n\n \ For context, here is the user\'s bio:\n\n \ {}\n\n \ Here are the headlines surrounding the user during the time of the tweet:\n \ {}\n\n'.decode(sys.stdin.encoding).format(user_id, bio, headlines) print label_question tweet_ids = du.get_sample_tweet_ids(usertrend_id, sample_size) if len(tweet_ids) == 0: print "{} has less than 1000 tweets".format(usertrend_id) return tweets = du.get_tweets_from_tweetids(tweet_ids) tweet_iter = iter(tweets) label_map = {'y': 1, 'n': 0, 'u': 2} tweet_labels = [] while (len(tweet_labels) < limit and tweet_iter): tweet = tweet_iter.next() text = tweet.text text = remove_non_ascii(text) resp = raw_input(text + '\n') print while resp not in label_map: print "Please enter 'y': yes, 'n': no, 'u': unsure" resp = raw_input(text + '\n') print label = label_map[resp] if label == 0 or label == 1: tweet_label = UserTrendTweetLabel(usertrend_id=usertrend_id, tweet_id=tweet.id, text=tweet.text, label=label) tweet_labels.append(label) print len(tweet_labels) du.add_usertrendtweetlabels(tweet_labels) du.close_session() def load_data(self, usertrend_id): du = self.du du.start_session() utid = self.utid tweet_texts = [] try: tweet_texts = du.get_tweet_texts_from_usertrendid(utid) finally: du.close_session() return tweet_texts
def __init__(self, usertrend_id=None): DB_NAME = "sqlite:///../data/db.sqlite" self.du = DataUtils(DB_NAME)
def anomaly(y_train, y_test, anomaly_label): y_train = DataUtils.anomaly(y_train, anomaly_label) y_test = DataUtils.anomaly(y_test, anomaly_label) return y_train, y_test
def transform(self, X): categorical_data = DataUtils.get_categorical_data(X) return categorical_data.fillna(self.null_value)
def transform(self, X): categorical_data = DataUtils.get_categorical_data(X) return categorical_data.fillna(self.frequent_vals)
# define paths to FBA dataset and FBA annotations # NEED TO EDIT THE PATH HERE IF USING ON A DIFFERENT COMPUTER PATH_FBA_ANNO = '/media/SSD/FBA/MIG-FbaData/' PATH_FBA_AUDIO = '' # not including raw audio PATH_FBA_MIDI = "/media/SSD/FBA/fall19/data/midi/" PATH_FBA_DILL = "/media/SSD/FBA/save_dill/" # create data holder perf_assessment_data = [] req_audio = False req_rating = True # instantiate the data utils object for different instruments and create the data for band in BAND: perf_assessment_data = [] for instrument in INSTRUMENT: utils = DataUtils(PATH_FBA_ANNO, PATH_FBA_AUDIO, band, instrument) for year in YEAR: perf_assessment_data += utils.create_data(year, SEGMENT, audio=req_audio, rating=req_rating) file_name = band + '_' + str(SEGMENT) + '_pc_' + str(len(YEAR)) print("Saving to " + file_name) with open(PATH_FBA_DILL + file_name + '.dill', 'wb') as f: dill.dump(perf_assessment_data, f) # create midi data (piano roll) midi_score = {} unit = "res12" for band in BAND: