def run(self, Train=True, Test=False): print("Running on", self.arg.device) self.set_device(self.arg.device) np.random.seed(self.arg.seed) torch.manual_seed(self.arg.seed) # create training set if self.arg.data_path: log('loading corpus from %s' % self.arg.data_path) if not os.path.exists(self.arg.output_path): os.makedirs(self.arg.output_path) os.makedirs(self.arg.output_path + "/code") os.makedirs(self.arg.output_path + "/code/models") self.define_input_field() # define the fields of several inputs print(self.arg) print(self.bertConfig) print(self.bertTokenizer) consts.TOKEN_MASK_TYPE = self.arg.token_mask_type self.train_set = self.construct_dataset(self.train, keep_events=1, skip_sample=self.arg.skip_sample, tokenizer=self.bertTokenizer) # load datafiles and transinto field self.dev_set = self.construct_dataset(self.dev, tokenizer=self.bertTokenizer) self.test_set = self.construct_dataset(self.test, tokenizer=self.bertTokenizer) self.buil_field_vocab() # build vocab on train and dev set tester = self.get_tester() if self.arg.restart > 0: log('init model from ' + self.arg.demo_model) self.model = self.load_model(self.arg.demo_model) log('model loaded, there are %i sets of params' % len(self.model.parameters_requires_grad_clipping())) else: self.model = self.load_model(None) log('model created from scratch, there are %i sets of params' % len(self.model.parameters_requires_grad_clipping())) self.arg.word_i2s = self.WordsField.vocab.itos self.arg.trigger_label_i2s = self.TriggerLabelField.vocab.itos optimizer_constructor, bert_optimizer_constructor = self.get_otimizer_constructor(self.model) trainer = Trainer(model=self.model, args=self.arg, word_i2s=self.arg.word_i2s, EERuner=self, optimizer_constructor=optimizer_constructor, bert_optimizer_constructor=bert_optimizer_constructor, tester=tester) if Train: print("backup codes") os.system("cp config.cfg {}".format(self.arg.output_path)) os.system("cp network/models/*.py {}".format(self.arg.output_path + "/code/models")) self.store_vec() train_writer = SummaryWriter(os.path.join(self.arg.output_path, "train")) detection_writer = SummaryWriter(os.path.join(self.arg.output_path, "detection")) classification_writer = SummaryWriter(os.path.join(self.arg.output_path, "classification")) self.arg.writer = {"train": train_writer, "detect": detection_writer, "cls": classification_writer} trainer.train(train_set=self.train_set, dev_set=self.dev_set, test_set=self.test_set, epochs= self.arg.epochs, other_testsets={}) self.arg.writer["train"].close() self.arg.writer["detect"].close() self.arg.writer["cls"].close() if Test: trainer.eval(test_set=self.test_set) log('Done!')
def dual_ensemble_classifier_performance_statistics( data_directory, multiple_runs_directory, output_directory, create_simple_plots, create_distribution_plots, widget): log( "Started Dual Ensemble Classifier Performance Statistics: " + str(datetime.datetime.now()), None, widget) # create output directory output_directory = os.path.join( output_directory, 'DualEnsembleClassifierPerformanceStatistics') if not os.path.exists(output_directory): os.makedirs(output_directory) # create simple plots if create_simple_plots: plot_initial_performance(data_directory, output_directory, widget) # create distribution plots if create_distribution_plots: plot_distribution_for_multiple_runs(multiple_runs_directory, output_directory, widget) log( "Finished Dual Ensemble Classifier Performance Statistics: " + str(datetime.datetime.now()), None, widget)
def train(models, it_train, it_val, params): """ Train the model. Parameters: - models: a dictionary with all the models. - atob: a model that goes from A to B. - d: the discriminator model. - p2p: a Pix2Pix model. - it_train: the iterator of the training data. - it_val: the iterator of the validation data. - params: parameters of the training procedure. - dout_size: the size of the output of the discriminator model. """ # Create the experiment folder and save the parameters create_expt_dir(params) # Get the output shape of the discriminator dout_size = d.output_shape[-2:] # Define the data generators generators = generators_creation(it_train, it_val, models, dout_size) # Define the number of samples to use on each training epoch train_samples = params.train_samples if params.train_samples == -1: train_samples = it_train.N batches_per_epoch = train_samples // params.samples_per_batch # Define the number of samples to use for validation val_samples = params.val_samples if val_samples == -1: val_samples = it_val.N losses = {'p2p': [], 'd': [], 'p2p_val': [], 'd_val': []} if params.continue_train: losses = load_losses(log_dir=params.log_dir, expt_name=params.expt_name) for e in tqdm(range(params.epochs)): for b in range(batches_per_epoch): train_iteration(models, generators, losses, params) # Evaluate how the models is doing on the validation set. evaluate(models, generators, losses, val_samples=val_samples) if (e + 1) % params.save_every == 0: save_weights(models, log_dir=params.log_dir, expt_name=params.expt_name) log(losses, models.atob, it_val, log_dir=params.log_dir, expt_name=params.expt_name, is_a_binary=params.is_a_binary, is_b_binary=params.is_b_binary)
def model_analytics(multiple_runs_path, output_path, plot_weight_heatmaps, plot_collapsed_weight_heatmaps, plot_collapsed_weight_heatmaps_aligned, widget): log("Started plotting weight heatmaps", None, widget) plot_heatmaps(multiple_runs_path, plot_weight_heatmaps, plot_collapsed_weight_heatmaps, plot_collapsed_weight_heatmaps_aligned, output_path, widget) log("Finished plotting weight heatmaps", None, widget)
def handle(self): currentTrack = sp.current_user_playing_track() currentTrackURI = util.propertyToString(currentTrack["item"]["uri"]) currentTrackProgressMS = util.propertyToString( currentTrack["progress_ms"]) util.log("Request made, responding with: %s|%s" % (currentTrackURI, currentTrackProgressMS)) self.request.sendall("%s|%s\n" % (currentTrackURI, currentTrackProgressMS))
def dataset_statistics(data_directory, output_directory, widget): # create output directory output_directory = os.path.join(output_directory, 'DatasetStatistics') if not os.path.exists(output_directory): os.makedirs(output_directory) # create output log file output_file = os.path.join(output_directory, 'DatasetStatistic.txt') log("Started dataset statistics: " + str(datetime.datetime.now()), file = output_file, widget = widget) number_of_subjects, subjects_trial_lengths, subjects_trial_response, trial_lengths = read_trial_metadata( data_directory) compute_statistics(number_of_subjects, output_directory, output_file, subjects_trial_lengths, subjects_trial_response, trial_lengths, widget) log("Finished dataset statistics: " + str(datetime.datetime.now()), file = output_file, widget = widget)
def read_trial_data(channel_list, data_directory, log_file, only_two_subjects, widget): count = 0 file_names = [] for (dirpath, dirnames, filenames) in os.walk(data_directory): file_names.extend(filenames) count += 1 if only_two_subjects and count == 2: break # compute number of subjects subject_number = len(file_names) subjects_data = [] for x in range(0, subject_number): subjects_data.append([]) log('Start time of parsing: ' + str(datetime.datetime.now()), file = log_file, widget = widget) count = 0 # parse the file for each subject for file_name in file_names: # get subject's file subject_file = os.path.join(data_directory, file_name) # compute subject's number subject_number = int(file_name.split('.')[0]) - 1 log('Started reading filtered data for subject ' + str(subject_number + 1), file = log_file, widget = widget) # read a subject's data trial by trial # differentiate between channels knowing that there are 210 trials per channel with open(subject_file, 'rb') as file: # iterate over channels for channel_index in range(0, NUMBER_OF_CHANNELS): subjects_data[subject_number].append([]) # iterate over trials for trial_index in range(0, NUMBER_OF_TRIALS): # read trial's length trial_length = read_value_from_binary_file(file, 'f', 4) # read trial's values trial_values = read_array_from_binary_file(file, 'f', 4, int(trial_length)) # keep only channel A23 if channel_index in channel_list: subjects_data[subject_number][channel_index].append(list(trial_values)) log('Finished reading filtered data for subject ' + str(subject_number + 1), file = log_file, widget = widget) count += 1 if only_two_subjects and count == 2: break log('End time of parsing: ' + str(datetime.datetime.now()), file = log_file, widget = widget) return subjects_data
def graph_regions_plot_individual(matrices_directory, output_directory, trial_index, window_index, is_trial = False, widget = None, normalize = True, should_filter = True): # create output directory output_directory = os.path.join(output_directory, 'GraphWavenetAdjacency') if not os.path.exists(output_directory): os.makedirs(output_directory) if is_trial: output_directory = os.path.join(output_directory, 'Trial') else: output_directory = os.path.join(output_directory, 'Window') if not os.path.exists(output_directory): os.makedirs(output_directory) output_directory = os.path.join(output_directory, f'{trial_index}') if not os.path.exists(output_directory): os.makedirs(output_directory) output_directory = os.path.join(output_directory, 'Individual') if not os.path.exists(output_directory): os.makedirs(output_directory) log("Started graph regions plot individual: " + str(datetime.datetime.now()), file = None, widget = widget) # find input matrix if is_trial: matrices_directory = os.path.join(matrices_directory, 'Trial') else: matrices_directory = os.path.join(matrices_directory, 'Window') matrices_directory = os.path.join(matrices_directory, f'{trial_index}') node_size, node_edges = aggregate_channels(matrices_directory, trial_index, window_index, is_trial, normalize, should_filter) title = f'{trial_index}' if not is_trial: title += f' {window_index}' plot_graph(node_edges, node_size, title, output_directory, plt.cm.Blues, INTERVAL_START, INTERVAL_END) log("Finished graph regions plot individual: " + str(datetime.datetime.now()), file = None, widget = widget)
def compute_histogram(input_matrix, widget, output_directory, is_trial): output_directory = os.path.join(output_directory, 'Histogram') if not os.path.exists(output_directory): os.makedirs(output_directory) log_file = os.path.join(output_directory, 'histogram_details.txt') if is_trial: current = 0.0 while current <= 1.0: count = len(list(filter(lambda x: x >= current, input_matrix))) log(f'Number connections for threshold {current}: {count}', file=log_file, widget=None) current += 0.1 fig = go.Figure(data=[ go.Histogram( x=input_matrix, xbins=dict(start=0.0, end=1.0, size=0.1), ) ]) plotly.offline.plot(fig, filename=os.path.join(output_directory, 'WeightHistogram.html'), auto_open=False) else: fig = go.Figure() for window, matrix in enumerate(input_matrix): log(f'Window {window}', file=log_file, widget=None) current = 0.0 while current <= 1.0: count = len(list(filter(lambda x: x >= current, matrix))) log(f'\tNumber connections for threshold {current}: {count}', file=log_file, widget=None) current += 0.1 fig.add_trace( go.Histogram(x=matrix, xbins=dict(start=0.0, end=1.0, size=0.1), name=f'Window {window}')) fig.update_layout(barmode='overlay') # Reduce opacity to see both histograms fig.update_traces(opacity=0.75) plotly.offline.plot(fig, filename=os.path.join(output_directory, 'WeightHistogram.html'), auto_open=False)
def graph_minimum_spanning_arborescence(output_directory, is_trial, graphs, adjacency_matrices, widget, properties_dict): output_directory = os.path.join(output_directory, 'MaximumSpanningArborescence') if not os.path.exists(output_directory): os.makedirs(output_directory) log_file = os.path.join(output_directory, 'MaximumSpanningArborescence.txt') msa_list = [] for window, graph in enumerate(graphs): msa_graph = nx.minimum_spanning_arborescence(graph) msa_list.append(msa_graph.edges()) count = 0 weight = 0 for edge in msa_graph.edges(): node_index_1 = 0 node_index_2 = 0 for key in list(CHANNELS_DICT.keys()): if CHANNELS_DICT[key] == edge[0]: node_index_1 = key break for key in list(CHANNELS_DICT.keys()): if CHANNELS_DICT[key] == edge[1]: node_index_2 = key break weight += adjacency_matrices[window][node_index_1][node_index_2] count += 1 if not is_trial: log(f'Window {window}', file=log_file) log(f'\t Weight: {weight}', file=log_file) log(f'\t Weight average: {weight / (count * 1.0)}', file=log_file) if not is_trial: properties_dict[window][MSA_WEIGHT] = weight else: properties_dict[MSA_WEIGHT] = weight with open(os.path.join(output_directory, 'MSAList.bin'), 'wb+') as f: pickle.dump(msa_list, f)
def raw_data_filter(data_directory, output_directory, degree_of_parallelism, trial_filter_length, widget): # find the subject's directories subjects_directories = [x[0] for x in os.walk(data_directory)] # eliminate current directory subjects_directories = subjects_directories[1:] subject_threads = [] log("Start time: " + str(datetime.datetime.now()), None, widget) start_time = time.time() for subject_directory in subjects_directories: # compute subject number subject_number = subjects_directories.index(subject_directory) + 1 # create thread # specify the subject directory and subject number subject_thread = ParsingThread(subject_number, "thread-" + str(subject_number), subject_directory, subject_number, output_directory, trial_filter_length, widget) # start thread subject_thread.start() subject_threads.append(subject_thread) # create a number of threads equal to the degree of parallelism if subject_number % degree_of_parallelism == 0: # wait for threads to finish for subject_thread in subject_threads: subject_thread.join() subject_threads = [] log("--- %s seconds ---" % (time.time() - start_time), None, widget) log("End time: " + str(datetime.datetime.now()), None, widget)
def graph_shortest_path(output_directory, is_trial, graphs, widget, properties_dict): output_directory = os.path.join(output_directory, 'ShortestPath') if not os.path.exists(output_directory): os.makedirs(output_directory) log_file = os.path.join(output_directory, 'ShortestPath.txt') for window, graph in enumerate(graphs): shortest_path_dict = nx.shortest_path(graph, weight='weight') average = 0 for start in list(shortest_path_dict.keys()): for end in list(shortest_path_dict[start].keys()): if start != end: path = shortest_path_dict[start][end] path_weight = 0 for index in range(1, len(path)): path_weight = path_weight + 1 - graph.get_edge_data( path[index - 1], path[index])['weight'] average += path_weight average /= (NUMBER_OF_CHANNELS * (NUMBER_OF_CHANNELS - 1.0)) shortest_path = nx.average_shortest_path_length(graph) if not is_trial: log(f'Window {window}: ', file=log_file) log(f'\tAverage shortest path length: {shortest_path}', file=log_file) log(f'\tAverage maximum weight path: {average}', file=log_file) if not is_trial: properties_dict[window][AVG_SHORTEST_PATH] = shortest_path properties_dict[window][AVG_MAX_WEIGHT_PATH] = average else: properties_dict[AVG_SHORTEST_PATH] = shortest_path properties_dict[AVG_MAX_WEIGHT_PATH] = average
def initialize_model(channel_list, dual_dataset_cross_loader, dual_dataset_test_loader, dual_dataset_train_loader, dual_log_file_channels_txt, dual_log_file_html, dual_log_file_response_csv, dual_log_file_response_txt, dual_log_file_stimulus_csv, dual_log_file_stimulus_txt, dual_model_path, example_length, log_file, number_of_subjects, response_classes, stimulus_classes, widget, with_visdom): # plot to VISDOM if enabled viz = None if with_visdom: viz = Visdom(port = 8097, server = 'http://localhost', base_url = '/') # create model dual_model = DualEnsembleClassifierModel( ( [example_length, int(example_length * 2 / 3 + STIMULUS_OUTPUT_SIZE), STIMULUS_OUTPUT_SIZE], [example_length, int(example_length * 2 / 3 + RESPONSE_OUTPUT_SIZE), RESPONSE_OUTPUT_SIZE] ), len(channel_list) ) log(dual_model, file = log_file, widget = None) # initialize weights weightInit = WeightInitializer() weightInit.init_weights(dual_model, 'xavier_normal_', { 'gain': 0.02 }) log("Started dual training: " + str(datetime.datetime.now()), file = log_file, widget = widget) # fit the model dual_model.fit(viz, "dual", dual_dataset_train_loader, dual_dataset_cross_loader, dual_log_file_html, number_epochs = 5, learning_rate = 0.001, widget = widget) # make the prediction dual_model.predict(dual_dataset_test_loader, dual_log_file_stimulus_csv, dual_log_file_stimulus_txt, dual_log_file_response_csv, dual_log_file_response_txt, dual_log_file_channels_txt, stimulus_classes, response_classes, number_of_subjects) # save the model to a file dual_model.save_model_to_file(dual_model_path) log("End dual training: " + str(datetime.datetime.now()), file = log_file, widget = widget)
def recurrent_graph_wavenet(dots_folder_path, trial_division_file_path, output_path, subject_number, trial_index, window_index, input_length, output_length, batch_size, loader_option, widget, blocks, layers, number_of_epochs, initial_train_percentage, increase_train_percentage, use_functional_network, functional_network_path, use_previous_weight_matrix, previous_weight_matrix_path, include_cross, use_gpu, is_experiment=True): # set device if use_gpu: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") else: device = torch.device("cpu") # set number of pytorch threads torch.set_num_threads(int(os.cpu_count() * 0.75)) # set the highest priority to the process (if unix) if platform.uname().system == 'Linux': os.nice(-40) output_path = os.path.join(output_path, 'GraphWavenet') if is_experiment: if not os.path.exists(output_path): os.makedirs(output_path) output_path = os.path.join(output_path, 'Run 1') os.makedirs(output_path) else: folders = next(os.walk(output_path))[1] folder_numbers = [int(x.split()[1]) for x in folders] new_number = max(folder_numbers) + 1 output_path = os.path.join(output_path, f'Run {new_number}') os.makedirs(output_path) else: if not os.path.exists(output_path): os.makedirs(output_path) if loader_option == 'Window': output_path = os.path.join(output_path, f'{trial_index}_{window_index}') else: output_path = os.path.join(output_path, f'{trial_index}') os.makedirs(output_path) arguments_file_path = os.path.join(output_path, 'arguments.json') save_running_parameters( batch_size, blocks, dots_folder_path, increase_train_percentage, initial_train_percentage, input_length, layers, loader_option, number_of_epochs, output_length, output_path, subject_number, trial_division_file_path, trial_index, window_index, arguments_file_path, use_functional_network, functional_network_path, use_previous_weight_matrix, previous_weight_matrix_path, include_cross) log_file = os.path.join(output_path, 'log.txt') log(f'Graph wavenet start: {str(datetime.datetime.now())}', log_file, widget) supports = None if use_functional_network: supports = load_functional_network(functional_network_path, subject_number, trial_index) if use_previous_weight_matrix: if supports is None: supports = [] supports.extend( load_previous_weight_matrix(previous_weight_matrix_path, loader_option, trial_index, window_index)) if supports is not None: supports = [x.to(device) for x in supports] loader_splits = None if loader_option == 'Window': loader_splits = create_loader_window( dots_folder_path=dots_folder_path, subject_number=subject_number, trial_index=trial_index, window_index=window_index, input_length=input_length, output_length=output_length, batch_size=batch_size, shuffle=True, trial_division_file_path=trial_division_file_path, output_path=output_path, initial_train_percentage=initial_train_percentage, increase_train_percentage=increase_train_percentage, include_cross=include_cross) if loader_option == 'Trial': loader_splits = create_loader_trial( dots_folder_path=dots_folder_path, subject_number=subject_number, trial_index=trial_index, input_length=input_length, output_length=output_length, batch_size=batch_size, shuffle=True, trial_division_file_path=trial_division_file_path, output_path=output_path, initial_train_percentage=initial_train_percentage, increase_train_percentage=increase_train_percentage, include_cross=include_cross) train_engine = TrainEngine(number_of_nodes=NUMBER_OF_CHANNELS, blocks=blocks, layers=layers, loader_splits=loader_splits, log_file=log_file, widget=widget, output_directory=output_path, number_of_epochs=number_of_epochs, use_previous_model=False, input_length=input_length, output_length=output_length, supports=supports, device=device) if include_cross: train_engine.train() else: train_engine.full_train() matrix_path = previous_weight_matrix_path if loader_option == 'Trial': matrix_path = os.path.join(matrix_path, 'Trial', f'{trial_index}') if not os.path.exists(matrix_path): os.makedirs(matrix_path) else: matrix_path = os.path.join(matrix_path, 'Window', f'{trial_index}') if not os.path.exists(matrix_path): os.makedirs(matrix_path) matrix_path = os.path.join(matrix_path, f'{window_index}') if not os.path.exists(matrix_path): os.makedirs(matrix_path) train_engine.save_weight_matrix(matrix_path) log(f'Graph wavenet end: {str(datetime.datetime.now())}', log_file, widget)
def __new__(cls, *args, **kwargs): log('created %s with params %s' % (str(cls), str(args))) instance = super(Model, cls).__new__(cls) instance.__init__(*args, **kwargs) return instance
def reconstruct_signal_from_loader(self, split_index, loader, mean, std): cross_mase_file = os.path.join(self.output_directory, f'cross_mase_{split_index}.txt') cross_loss_file = os.path.join(self.output_directory, f'cross_loss_{split_index}.txt') if self.output_length == 1: real_list = [[] for x in range(self.number_of_nodes)] predicted_list = [[] for x in range(self.number_of_nodes)] mase = np.array([0.0 for x in range(self.number_of_nodes)]) count = 0 for input, real in loader: input = input.float() real = real.float() input = input.to(self.device) real = real.to(self.device) # pad one zero at the beginning input = nn.functional.pad(input, (1, 0, 0, 0)) # get prediction predicted = self.best_model[split_index](input) # transpose 2nd and 4th dimension (channel and time) predicted = predicted.transpose(1, 3) loss = self.loss_function(real, predicted) log(f'Loss: {loss}', file=cross_loss_file) for node in range(self.number_of_nodes): real_list[node].append(float(real[0][0][node][0])) predicted_list[node].append(float( predicted[0][0][node][0])) mase += self.mean_absolute_scaled_error(input, real, predicted) count += 1 mase = mase / count for node in range(self.number_of_nodes): log(f'Channel {node}. MASE: {mase[node]}', file=cross_mase_file, widget=None) log(f'Overall MASE: {mase.mean()}', file=cross_mase_file, widget=None) if self.reconstruct_signal: real_list = np.array(real_list) predicted_list = np.array(predicted_list) real_list = (real_list * std) + mean predicted_list = (predicted_list * std) + mean if self.cross_signal_reconstruction_figure is None: self.cross_signal_reconstruction_figure = go.Figure() self.index_list = [i for i in range(len(real_list[0]))] for node in range(self.number_of_nodes): if not self.was_real_plotted: self.cross_signal_reconstruction_figure.add_trace( go.Scatter( x=self.index_list[-len(real_list[node]):], y=real_list[node], mode='lines', name=f'Real ch. {node}_{split_index}')) self.cross_signal_reconstruction_figure.add_trace( go.Scatter(x=self.index_list[-len(real_list[node]):], y=predicted_list[node], mode='lines', name=f'Predict ch. {node}_{split_index}')) plotly.offline.plot(self.cross_signal_reconstruction_figure, filename=os.path.join( self.output_directory, f'CrossSignal_{split_index}.html'), auto_open=False) else: raise NotImplementedError( 'GW with output greater than 1 not implemented.') self.was_real_plotted = True
import socket import sys import time import spotipy from util import util #constants HOST = "localhost" PORT = 8000 #parse arguments if len(sys.argv) == 2: username = sys.argv[1] else: util.log("Usage: python %s username" % sys.argv[0]) sys.exit() #determine necessary scope and authorize scope = util.gatherScope() sp = util.promptAuth(username, scope) while (True): #make request to conductor server sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: sock.connect((HOST, PORT)) responseArr = sock.recv(1024).strip().split("|") conductorTrackURI = responseArr[0] conductorTrackProgressMS = int(responseArr[1]) except KeyboardInterrupt:
def train(self): log(f'Start training time: {str(datetime.datetime.now())}', self.log_file, self.widget) for split_index in range(len(self.loader_splits)): log( f'Train {split_index + 1}/{len(self.loader_splits)}. Start time: {str(datetime.datetime.now())}', self.log_file, self.widget) loader_split = self.loader_splits[split_index] train_loader = loader_split[0] cross_loader = loader_split[1] mean = loader_split[2] std = loader_split[3] if split_index == 0 or not self.use_previous_model: self.create_model() if split_index != 0 and self.use_previous_model: self.model = copy.deepcopy(self.best_model[-1]) self.best_model.append(copy.deepcopy(self.model)) else: self.best_model.append(copy.deepcopy(self.model)) min_cross_error = 100000 last_update = 0 self.create_optimizer() losses = [] cross_epoch_loss = [] self.actual_epochs = 0 for epoch in range(self.number_of_epochs): self.actual_epochs += 1 self.model.train() log( f'Epoch: {epoch + 1}/{self.number_of_epochs}. Start time: {str(datetime.datetime.now())}', self.log_file, self.widget) for input, real in train_loader: input = input.float() real = real.float() input = input.to(self.device) real = real.to(self.device) self.optimizer.zero_grad() # pad one zero at the beginning input = nn.functional.pad(input, (1, 0, 0, 0)) # get prediction predicted = self.model(input) # transpose 2nd and 4th dimension (channel and time) predicted = predicted.transpose(1, 3) # compute loss loss = self.loss_function(predicted, real) # compute gradient loss.backward() # clip gradient torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clipping_gradient) # update model self.optimizer.step() # log loss log( f'\tLoss: {float(loss)}; Time: {str(datetime.datetime.now())}', self.log_file, self.widget) losses.append(float(loss)) self.scheduler.step() cross_loss = 0 count = 0 self.model.eval() for input, real in cross_loader: input = input.float() real = real.float() input = input.to(self.device) real = real.to(self.device) # pad one zero at the beginning input = nn.functional.pad(input, (1, 0, 0, 0)) # get prediction predicted = self.model(input) # transpose 2nd and 4th dimension (channel and time) predicted = predicted.transpose(1, 3) # compute loss loss = self.loss_function(real, predicted) cross_loss += float(loss) count += 1 cross_epoch_loss.append(cross_loss / count) log( f'Cross Loss: {cross_epoch_loss[-1]}; Last update: {last_update}; ' f'Time: {str(datetime.datetime.now())}', self.log_file, self.widget) if cross_epoch_loss[-1] <= min_cross_error: self.best_model[-1] = copy.deepcopy(self.model) last_update = 0 min_cross_error = cross_epoch_loss[-1] log('New best model!', self.log_file, self.widget) else: last_update += 1 if last_update >= 10: log("EARLY STOP", self.log_file, self.widget) break if last_update < 10: self.best_model[split_index] = copy.deepcopy(self.model) self.best_model[split_index].eval() self.plot_train_loss(losses, cross_epoch_loss, split_index) self.best_model[split_index].save_model_to_file( os.path.join(self.output_directory, f'model_{split_index}.model')) with torch.no_grad(): self.reconstruct_signal_from_loader(split_index, cross_loader, mean, std) log(f'End training time: {str(datetime.datetime.now())}', self.log_file, self.widget)
def full_train(self): log(f'Train start time: {str(datetime.datetime.now())}', self.log_file, self.widget) loader_split = self.loader_splits train_loader = loader_split[0][0] self.create_model() self.create_optimizer() losses = [] self.actual_epochs = 0 for epoch in range(self.number_of_epochs): self.actual_epochs += 1 self.model.train() log( f'Epoch: {epoch + 1}/{self.number_of_epochs}. Start time: {str(datetime.datetime.now())}', self.log_file, self.widget) for input, real in train_loader: input = input.float() real = real.float() input = input.to(self.device) real = real.to(self.device) self.optimizer.zero_grad() # pad one zero at the beginning input = nn.functional.pad(input, (1, 0, 0, 0)) # get prediction predicted = self.model(input) # transpose 2nd and 4th dimension (channel and time) predicted = predicted.transpose(1, 3) # compute loss loss = self.loss_function(predicted, real) # compute gradient loss.backward() # clip gradient torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clipping_gradient) # update model self.optimizer.step() # log loss log( f'\tLoss: {float(loss)}; Time: {str(datetime.datetime.now())}', self.log_file, self.widget) losses.append(float(loss)) self.scheduler.step() self.best_model = [self.model] self.best_model[0].eval() self.plot_train_loss(losses, None, 0) self.best_model[0].save_model_to_file( os.path.join(self.output_directory, f'model.model')) log(f'End training time: {str(datetime.datetime.now())}', self.log_file, self.widget)
def plot_heatmaps(multiple_runs_path, plot_weight_heatmaps, plot_collapsed_weight_heatmaps, plot_collapsed_weight_heatmaps_aligned, output_path, widget): # open multiple runs directory data_directory = multiple_runs_path # create output directory heatmap_output_directory = output_path heatmap_output_directory = os.path.join(heatmap_output_directory, 'ModelAnalytics') if not os.path.exists(heatmap_output_directory): os.makedirs(heatmap_output_directory) # find configurations division_directories = os.listdir(data_directory) # for each configuration for directory in division_directories: # open configuration directory = os.path.join(data_directory, directory) # get configuration name division_name = directory.split('\\')[-1:][0] # create configuration heatmap directory division_directory_heatmap = os.path.join(heatmap_output_directory, division_name) if not os.path.exists(division_directory_heatmap): os.makedirs(division_directory_heatmap) # compute example lengts example_length = get_example_length(DIVISION_LENGTH, int(division_name.split('_')[-2]), int(division_name.split('_')[-1])) # compute stimulus and response hidden size stimulus_hidden_size = get_hidden_size(example_length, STIMULUS_OUTPUT_SIZE) response_hidden_size = get_hidden_size(example_length, RESPONSE_OUTPUT_SIZE) # create a dummy model in which we will load the actual models model = DualEnsembleClassifierModel( ([example_length, stimulus_hidden_size, STIMULUS_OUTPUT_SIZE ], [example_length, response_hidden_size, RESPONSE_OUTPUT_SIZE]), NUMBER_OF_CHANNELS) # for each individual run runs_directories = [ x[0] for x in os.walk(os.path.join(data_directory, directory)) ][1:] # find the number of individual runs number_of_models = len(runs_directories) model_tensor_stimulus = None model_tensor_response = None first_model = True # for each run for runs_directory in runs_directories: # load model model.load_model_from_file( os.path.join(runs_directory, division_name + '_DUAL.model')) first_tensor_response = True first_tensor_stimulus = True tensor_list_stimulus = None tensor_list_response = None # for each parameter of our model for name, param in model.named_parameters(): # find input matrix for stimulus if name.find('0.0.weight') != -1: if first_tensor_stimulus: tensor_list_stimulus = param.data tensor_list_stimulus = tensor_list_stimulus[None, :, :] first_tensor_stimulus = False else: tensor = param.data tensor = tensor[None, :, :] tensor_list_stimulus = torch.cat( (tensor_list_stimulus, tensor), 0) # find input matrix for response elif name.find('1.0.weight') != -1: if first_tensor_response: tensor_list_response = param.data tensor_list_response = tensor_list_response[None, :, :] first_tensor_response = False else: tensor = param.data tensor = tensor[None, :, :] tensor_list_response = torch.cat( (tensor_list_response, tensor), 0) # if the first model, save the list of tensors (one tensor for each channel) if first_model: model_tensor_response = tensor_list_response model_tensor_stimulus = tensor_list_stimulus first_model = False # otherwise, add over the previous run else: model_tensor_response += tensor_list_response model_tensor_stimulus += tensor_list_stimulus log(f'Finished {runs_directory}', file=None, widget=widget) # average model_tensor_stimulus = model_tensor_stimulus / number_of_models model_tensor_response = model_tensor_response / number_of_models # compute std std_stimulus = torch.std(model_tensor_stimulus, unbiased=False).numpy().tolist() std_response = torch.std(model_tensor_response, unbiased=False).numpy().tolist() # compute mean mean_stimulus = torch.mean(model_tensor_stimulus).numpy().tolist() mean_response = torch.mean(model_tensor_response).numpy().tolist() # standardize response input response_array = model_tensor_response.numpy() response_array = response_array - mean_response response_array = response_array / std_response # standardize stimulus input stimulus_array = model_tensor_stimulus.numpy() stimulus_array = stimulus_array - mean_stimulus stimulus_array = stimulus_array / std_stimulus if plot_weight_heatmaps: # create a diverging pallete ( 0 - white, extremities - red) cmap = sns.diverging_palette(10, 10, as_cmap=True) """ HEATMAP RESPONSE """ # compute the heatmaps limits min_response = response_array[0][0][0] for channel in range(NUMBER_OF_CHANNELS): min_response = min(min_response, response_array[channel].min()) max_response = response_array[0][0][0] for channel in range(NUMBER_OF_CHANNELS): max_response = max(max_response, response_array[channel].max()) channel = 0 # create 8 figs and plot response input for count in range(8): f, axes = plt.subplots(4, 4) # plot each channel for row in range(4): for col in range(4): sns.heatmap(response_array[channel], cmap=cmap, center=0.0, ax=axes[row][col], cbar=False, vmin=min_response, vmax=max_response) axes[row][col].set_ylabel('') axes[row][col].set_xlabel('') axes[row][col].set_xticks([]) axes[row][col].set_yticks([]) axes[row][col].set_title(f'{channel}', fontdict={'fontsize': 7}, pad=0) log(f'Plotted channel {channel}', file=None, widget=widget) channel += 1 f.savefig( os.path.join( division_directory_heatmap, f"{division_name}_response_heatmap_{count}.png")) plt.close(f) channel = 0 """ HEATMAP STIMULUS """ # compute the heatmaps limits min_stimulus = stimulus_array[0][0][0] for channel in range(NUMBER_OF_CHANNELS): min_stimulus = min(min_stimulus, stimulus_array[channel].min()) max_stimulus = stimulus_array[0][0][0] for channel in range(NUMBER_OF_CHANNELS): max_stimulus = max(max_stimulus, stimulus_array[channel].max()) channel = 0 # create 8 figs and plot stimulus input for count in range(8): f, axes = plt.subplots(4, 4) # plot each channel for row in range(4): for col in range(4): sns.heatmap(stimulus_array[channel], cmap=cmap, center=0.0, ax=axes[row][col], cbar=False, vmax=max_stimulus, vmin=min_stimulus) axes[row][col].set_ylabel('') axes[row][col].set_xlabel('') axes[row][col].set_xticks([]) axes[row][col].set_yticks([]) axes[row][col].set_title(f'{channel}', fontdict={'fontsize': 7}, pad=0) log(f'Plotted channel {channel}', file=None, widget=widget) channel += 1 f.savefig( os.path.join( division_directory_heatmap, f"{division_name}_stimulus_heatmap_{count}.png")) plt.close(f) channel = 0 if plot_collapsed_weight_heatmaps: """ HEATMAPS Y COLLAPSE """ collapsed_folder = os.path.join(division_directory_heatmap, 'CollapsedHeatmaps') if not os.path.exists(collapsed_folder): os.makedirs(collapsed_folder) plot_collapsed_heatmaps(stimulus_array, collapsed_folder, 'stimulus_input', int(division_name.split('_')[-2]), int(division_name.split('_')[-1]), DIVISION_LENGTH, True, True, False) plot_collapsed_heatmaps(response_array, collapsed_folder, 'response_input', int(division_name.split('_')[-2]), int(division_name.split('_')[-1]), DIVISION_LENGTH, False, True, False) if plot_collapsed_weight_heatmaps_aligned: """ HEATMAPS Y COLLAPSE """ collapsed_folder = os.path.join(division_directory_heatmap, 'CollapsedHeatmapsAligned') if not os.path.exists(collapsed_folder): os.makedirs(collapsed_folder) plot_collapsed_heatmaps(stimulus_array, collapsed_folder, 'stimulus_input', int(division_name.split('_')[-2]), int(division_name.split('_')[-1]), DIVISION_LENGTH, True, True, True) plot_collapsed_heatmaps(response_array, collapsed_folder, 'response_input', int(division_name.split('_')[-2]), int(division_name.split('_')[-1]), DIVISION_LENGTH, False, True, True)
def run(self): log("Starting " + self.name, None, self.widget) self.thread_function() log("Exiting " + self.name, None, self.widget)
def graph_regions_plot_window_difference(matrices_directory, output_directory, trial_index, widget = None, normalize = True, should_filter = True): # create output directory output_directory = os.path.join(output_directory, 'GraphWavenetAdjacency') if not os.path.exists(output_directory): os.makedirs(output_directory) output_directory = os.path.join(output_directory, 'Window') if not os.path.exists(output_directory): os.makedirs(output_directory) output_directory = os.path.join(output_directory, f'{trial_index}') if not os.path.exists(output_directory): os.makedirs(output_directory) output_directory = os.path.join(output_directory, 'Differences') if not os.path.exists(output_directory): os.makedirs(output_directory) log("Started graph regions plot window difference: " + str(datetime.datetime.now()), file = None, widget = widget) # find input matrix matrices_directory = os.path.join(matrices_directory, 'Window') matrices_directory = os.path.join(matrices_directory, f'{trial_index}') node_sizes_list = [] node_edges_list = [] folder_list = [int(x) for x in next(os.walk(matrices_directory))[1]] folder_list = sorted(folder_list) for window_index in folder_list: node_size, node_edges = aggregate_channels(matrices_directory, trial_index, window_index, False, normalize, should_filter) node_sizes_list.append(node_size) node_edges_list.append(node_edges) node_sizes_list = [np.array(x) for x in node_sizes_list] node_edges_list = [np.array(x) for x in node_edges_list] node_sizes_differences = [] node_edges_differences = [] node_similarity = [] edge_similarity = [] for index in range(1, len(node_sizes_list)): node_size = node_sizes_list[index] - node_sizes_list[index - 1] node_edges = node_edges_list[index] - node_edges_list[index - 1] node_sizes_differences.append(node_size) node_edges_differences.append(node_edges) node_similarity.append(1 - abs(node_size).sum() / node_size.size) edge_similarity.append(1 - abs(node_edges).sum() / node_edges.size) similarity_file = os.path.join(output_directory, "similarity.txt") if os.path.exists(similarity_file): os.remove(similarity_file) for index in range(len(node_similarity)): log(f'Window {index + 1}-{index}', file = similarity_file, widget = None) log(f'\tNode similarity: {node_similarity[index]}', file = similarity_file, widget = None) log(f'\tEdge similarity: {edge_similarity[index]}', file = similarity_file, widget = None) maximum = max( max([x.max() for x in node_sizes_differences]), max([x.max() for x in node_edges_differences]) ) minimum = min( min([x.min() for x in node_sizes_differences]), min([x.min() for x in node_edges_differences]) ) maximum = max(maximum, abs(minimum)) minimum = -maximum for index in range(len(node_sizes_differences)): title = f'{trial_index} {index + 1}-{index}' plot_graph(node_edges_differences[index], node_sizes_differences[index], title, output_directory, plt.cm.bwr, minimum, maximum) log("Finished graph regions plot window difference: " + str(datetime.datetime.now()), file = None, widget = widget)
def dynamic_time_warping(metrics, output_path, stimulus_pairs, trial_dictionary): output_path = os.path.join(output_path, "DynamicTimeWarping") if not os.path.exists(output_path): os.makedirs(output_path) for metric in metrics: metric_dir = os.path.join(output_path, metric) if not os.path.exists(metric_dir): os.makedirs(metric_dir) for first_stimulus, second_stimulus in stimulus_pairs: first_values = [] second_values = [] first_text = [] second_text = [] for trial in TRIALS_FOR_STIMULUS[first_stimulus][1:]: for window in trial_dictionary[trial]: first_values.append( float(trial_dictionary[trial][window][metric])) first_text.append(f'{trial}_{window}') for trial in TRIALS_FOR_STIMULUS[second_stimulus][1:]: for window in trial_dictionary[trial]: second_values.append( float(trial_dictionary[trial][window][metric])) second_text.append(f'{trial}_{window}') if len(first_values) <= len(second_values): query = first_values template = second_values query_text = first_text template_text = second_text title = first_stimulus.split( ' ')[0] + "+" + second_stimulus.split(' ')[0] else: query = second_values template = first_values query_text = second_text template_text = first_text title = second_stimulus.split( ' ')[0] + "+" + first_stimulus.split(' ')[0] query = np.array(query) template = np.array(template) query_normalized = (query - query.min()) / (query.max() - query.min()) template_normalized = (template - template.min()) / ( template.max() - template.min()) _, paths = dtw.warping_paths(query_normalized, template_normalized, window=10, psi=0) best_path = dtw.best_path(paths) metric_file = os.path.join(metric_dir, f'{title}.txt') log(f'Similarity: {1 - paths[best_path[-1][0] + 1][best_path[-1][1] + 1] / len(best_path)}', file=metric_file) for pair in best_path: log(f'\tPair: {pair}. Match: {query_text[pair[0]]} {template_text[pair[1]]}', file=metric_file) fig, axes = dtwvis.plot_warpingpaths(query, template, paths, best_path) axes[0].texts[0].set_visible(False) axes[0].text( 0, 0, "Similarity = {:.4f}".format( 1 - paths[best_path[-1][0] + 1][best_path[-1][1] + 1] / len(best_path))) plt.savefig(os.path.join(metric_dir, f'{title}.png')) plt.close()
def fit(self, viz, viz_name, train_loader, cross_loader, html_file, number_epochs, learning_rate, widget): """ Fits the model based on the train dataset and number of epochs. """ # define cost first_criterion = nn.CrossEntropyLoss() # weighted classes for response weights = [1.0, 2.5, 1.5] weights_class = torch.FloatTensor(weights) second_criterion = nn.CrossEntropyLoss(weight = weights_class) # define optimizer optimizer = torch.optim.AdamW(self.parameters(), lr = learning_rate) epochs_number = [x for x in range(1, number_epochs + 1)] el_array = [] cv_array = [] first_array = [] second_array = [] first_cross_array = [] second_cross_array = [] # for each pass through the examples for epoch in range(number_epochs): epoch_loss = 0 first_loss_epoch = 0 second_loss_epoch = 0 count = 0 log(f'Epoch {epoch + 1}/{number_epochs}', file = None, widget = widget) # switch to train mode (Dropout used) self.train() # adjust the model one batch at a time for batch in train_loader: first_agg_output = None second_agg_output = None # for each channel for channel in range(self.number_of_channels): # set the tensors to require grad batch[channel * 2].requires_grad = True batch[channel * 2 + 1].requires_grad = True # compute output first_output, second_output = self(batch[channel * 2].float(), batch[channel * 2 + 1].float(), channel) # "vote" = ensemble if channel == 0: first_agg_output = first_output second_agg_output = second_output else: first_agg_output += first_output second_agg_output += second_output # reset the gradients optimizer.zero_grad() # compute loss first_loss = 1 + first_criterion(first_agg_output, batch[-3].long()) second_loss = 1 + second_criterion(second_agg_output, batch[-2].long()) loss = self.dual_loss_aggregation(first_loss, second_loss) # backward propagate through the network loss.backward() # update weights optimizer.step() # compute epoch loss epoch_loss += loss.item() first_loss_epoch += first_loss.item() second_loss_epoch += second_loss.item() count += 1.0 # compute cross validation loss # set to eval mode (Dropout not used) self.eval() cv_loss = 0 first_cross_loss = 0 second_cross_loss = 0 count = 0 # for each batch for batch in cross_loader: first_agg_output = None second_agg_output = None # for each channel for channel in range(self.number_of_channels): # set the tensors to require grad batch[channel * 2].requires_grad = True batch[channel * 2 + 1].requires_grad = True # compute output first_output, second_output = self(batch[channel * 2].float(), batch[channel * 2 + 1].float(), channel) # "vote" = ensemble if channel == 0: first_agg_output = first_output second_agg_output = second_output else: first_agg_output += first_output second_agg_output += second_output # compute loss first_loss = 1 + first_criterion(first_agg_output, batch[-3].long()) second_loss = 1 + second_criterion(second_agg_output, batch[-2].long()) loss = self.dual_loss_aggregation(first_loss, second_loss) first_cross_loss += first_loss.item() second_cross_loss += second_loss.item() cv_loss += loss.item() count += 1 # append losses el_array.append(epoch_loss / count) first_array.append(first_loss_epoch / count) second_array.append(second_loss_epoch / count) cv_array.append(cv_loss / count) first_cross_array.append(first_cross_loss / count) second_cross_array.append(second_cross_loss / count) # print to VISDOM if available if viz: self.plot_to_vizdom(count, cv_loss, epoch, epoch_loss, first_cross_array, first_loss_epoch, second_cross_array, second_loss_epoch, viz, viz_name) # plot losses self.plot_elt_and_elcv(epochs_number, el_array, cv_array, first_array, second_array, first_cross_array, second_cross_array, html_file)
def trial_window_configuration(dots_folder_path, output_directory, window_size, window_offset, threshold, widget): output_directory = os.path.join(output_directory, 'TrialWindowConfiguration') if not os.path.exists(output_directory): os.makedirs(output_directory) window_output_directory = os.path.join(output_directory, 'Window') if not os.path.exists(window_output_directory): os.makedirs(window_output_directory) trial_output_directory = os.path.join(output_directory, 'Trial') if not os.path.exists(trial_output_directory): os.makedirs(trial_output_directory) log_file = os.path.join(output_directory, "log.txt") log("Started creating files for split configuration for each subject and trial: " + str(datetime.datetime.now()), log_file, widget) number_of_subjects = 0 for _, dirnames, filenames in os.walk(dots_folder_path): number_of_subjects += len(dirnames) for subject_number in range(1, number_of_subjects + 1): subject_directory = os.path.join(dots_folder_path, SUBJECT_FILE_PREFIX + get_string_from_number(subject_number)) # construct event timestamp file name for the current subject event_timestamp_file_path = SUBJECT_FILE_PREFIX + get_string_from_number(subject_number) + \ SUBJECT_FILE_EVENT_TIMESTAMPS + SUBJECT_FILE_EXTENSION # construct full path for the current student's event timestamp file event_timestamp_file_path = os.path.join(subject_directory, event_timestamp_file_path) # read the timestamps for the current subject timestamps = read_values_from_binary_file_one_by_one(event_timestamp_file_path, 'i', 4) # construct event code file name for the current subject event_codes_file_path = SUBJECT_FILE_PREFIX + get_string_from_number(subject_number) + \ SUBJECT_FILE_EVENT_CODES + SUBJECT_FILE_EXTENSION # construct full path for the current subject's event codes file event_codes_file_path = os.path.join(subject_directory, event_codes_file_path) # read the event codes for the current subject event_codes = read_values_from_binary_file_one_by_one(event_codes_file_path, 'i', 4) # create a list of tuples where we attach to each event code its corresponding timestamp # structure: [...,(timestamp, event), ...] event_code_timestamps = list(zip(timestamps, event_codes)) # filter out the events we don't need event_code_timestamps = list(filter(lambda event_code_timestamp: event_code_timestamp[1] in EVENT_CODES_FILTER, event_code_timestamps)) for trial_number in range(NUMBER_OF_TRIALS): trial_start_timestamp = event_code_timestamps[2 * trial_number][0] trial_end_timestamp = event_code_timestamps[2 * trial_number + 1][0] trial_length = trial_end_timestamp - trial_start_timestamp + 1 window_file = open(os.path.join(window_output_directory, f'{subject_number}_{trial_number + 1}.txt'), 'w+') trial_file = open(os.path.join(trial_output_directory, f'{subject_number}_{trial_number + 1}.txt'), 'w+') if trial_length <= threshold: split_trial(window_file, trial_start_timestamp, trial_end_timestamp, window_size, window_offset) print(f'{trial_start_timestamp} {trial_end_timestamp}', file = trial_file) else: split_trial(window_file, trial_start_timestamp, trial_start_timestamp + threshold // 2 - 1, window_size, window_offset) split_trial(window_file, trial_end_timestamp - threshold // 2 + 1, trial_end_timestamp, window_size, window_offset) print(f'{trial_start_timestamp} {trial_start_timestamp + threshold // 2 - 1}', file = trial_file) print(f'{trial_end_timestamp - threshold // 2 + 1} {trial_end_timestamp}', file = trial_file) window_file.close() trial_file.close() log("Finished creating files for split configuration for each subject and trial: " + str(datetime.datetime.now()), log_file, widget)
def predict(self, test_loader, first_file_csv, first_file_txt, second_file_csv, second_file_txt, channel_file, first_class_names, second_class_names, number_of_subjects): """ Computes the confusion matrix for the test dataset """ # the two needed arrays for computation first_actual_output = [] first_expected_output = [] second_actual_output = [] second_expected_output = [] # set to eval mode (Dropout not used) self.eval() # for each batch for batch in test_loader: first_agg_output = None second_agg_output = None # for each channel for channel in range(self.number_of_channels): # set the tensors to require grad batch[channel * 2].requires_grad = True batch[channel * 2 + 1].requires_grad = True # compute output first_output, second_output = self(batch[channel * 2].float(), batch[channel * 2 + 1].float(), channel) # "vote" = ensemble if channel == 0: first_agg_output = first_output second_agg_output = second_output else: first_agg_output += first_output second_agg_output += second_output # Get predictions from the maximum value _, first_predicted = torch.max(first_agg_output.data, 1) _, second_predicted = torch.max(second_agg_output.data, 1) first_predicted = first_predicted.tolist() second_predicted = second_predicted.tolist() first_labels = batch[-3].tolist() second_labels = batch[-2].tolist() # extend the arrays with the predicted values and corresponding labels first_actual_output.extend(first_predicted) first_expected_output.extend(first_labels) second_actual_output.extend(second_predicted) second_expected_output.extend(second_labels) # print the classification reports first_report = classification_report(y_true = np.array(first_expected_output), y_pred = np.array(first_actual_output), target_names = first_class_names, output_dict = True) first_df = pandas.DataFrame(first_report).transpose() first_df.to_csv(first_file_csv, index = False) second_report = classification_report(y_true = np.array(second_expected_output), y_pred = np.array(second_actual_output), target_names = second_class_names, output_dict = True) second_df = pandas.DataFrame(second_report).transpose() second_df.to_csv(second_file_csv, index = False) log( classification_report(y_true = np.array(first_expected_output), y_pred = np.array(first_actual_output), target_names = first_class_names) , file = first_file_txt, widget = None ) log( classification_report(y_true = np.array(second_expected_output), y_pred = np.array(second_actual_output), target_names = second_class_names) , file = second_file_txt, widget = None )
def plot_distribution_for_multiple_runs(multiple_runs_directory, output_directory, widget): # open multiple runs path division_directories = os.listdir(multiple_runs_directory) # create log file log_file = os.path.join(output_directory, 'Multiple_runs_statistics.txt.txt') multiple_runs_dict = {} # for each configuration for directory in division_directories: directory = os.path.join(multiple_runs_directory, directory) division_name = directory.split('\\')[-1:][0] multiple_runs_dict[division_name] = {} multiple_runs_dict[division_name]['avg_response_list'] = [] multiple_runs_dict[division_name]['avg_stimulus_list'] = [] # for each run runs_directories = [ x[0] for x in os.walk(os.path.join(multiple_runs_directory, directory)) ][1:] for runs_directory in runs_directories: response_csv_file = os.path.join( runs_directory, division_name + '_DUAL_RESPONSE.csv') stimulus_csv_file = os.path.join( runs_directory, division_name + '_DUAL_STIMULUS.csv') # open csv response_df = pd.read_csv(response_csv_file) # keep only the f1-score response_df = response_df['f1-score'] # drop the last 3 rows because they are no use for us response_df = response_df.drop([3, 4, 5], axis=0) # add response average multiple_runs_dict[division_name]['avg_response_list'].append( (response_df[0] + response_df[1] + response_df[2]) / 3) # open csv stimulus_df = pd.read_csv(stimulus_csv_file) # keep only the f1-score stimulus_df = stimulus_df['f1-score'] # find number of rows number_of_rows = stimulus_df.shape[0] # drop the last 3 rows because they are no use for us stimulus_df = stimulus_df.drop( [number_of_rows - 1, number_of_rows - 2, number_of_rows - 3], axis=0) # number of classes number_of_rows = number_of_rows - 3 average = 0 # compute average of f1- score amongst divisions for row in stimulus_df: average += row average /= number_of_rows # add stimulus average multiple_runs_dict[division_name]['avg_stimulus_list'].append( average) # compute response mean for current configuration mean_response = sum( multiple_runs_dict[division_name]['avg_response_list']) / len( multiple_runs_dict[division_name]['avg_response_list']) # compute response std for current configuration std_response = sqrt( sum( list( map(lambda x: (x - mean_response)**2, multiple_runs_dict[division_name] ['avg_response_list']))) / len(multiple_runs_dict[division_name]['avg_response_list'])) # compute stimulus mean for current configuration mean_stimulus = sum( multiple_runs_dict[division_name]['avg_stimulus_list']) / len( multiple_runs_dict[division_name]['avg_stimulus_list']) # compute stimulus std for current configuration std_stimulus = sqrt( sum( list( map(lambda x: (x - mean_stimulus)**2, multiple_runs_dict[division_name] ['avg_stimulus_list']))) / len(multiple_runs_dict[division_name]['avg_stimulus_list'])) # save parameters multiple_runs_dict[division_name]['mean_response'] = mean_response multiple_runs_dict[division_name]['std_response'] = std_response multiple_runs_dict[division_name]['mean_stimulus'] = mean_stimulus multiple_runs_dict[division_name]['std_stimulus'] = std_stimulus # generate distributions to be plotted response_distributions = [] stimulus_distributions = [] for key in multiple_runs_dict.keys(): response_distributions.append( generate_distribution(multiple_runs_dict[key]['std_response'], multiple_runs_dict[key]['mean_response'])) stimulus_distributions.append( generate_distribution(multiple_runs_dict[key]['std_stimulus'], multiple_runs_dict[key]['mean_stimulus'])) # create a figure fig = make_subplots(rows=2, cols=1, subplot_titles=('Stimulus', 'Response')) # create distribution plots for response distribution_plot = ff.create_distplot(stimulus_distributions, list(multiple_runs_dict.keys()), show_hist=False) # for each configuration (for STIMULUS) count = 0 for name in list(multiple_runs_dict.keys()): # plot distribution fig.add_trace(go.Scatter( distribution_plot['data'][count], name=name.split('_')[-2] + '_' + name.split('_')[-1], line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])), row=1, col=1) # plot distribution mean fig.add_trace(go.Scatter( x=[ multiple_runs_dict[name]['mean_stimulus'], multiple_runs_dict[name]['mean_stimulus'] ], y=[0, max(distribution_plot['data'][count].y)], mode='lines+markers', name='Mean ' + name.split('_')[-2] + '_' + name.split('_')[-1], line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])), row=1, col=1) # plot distribution std fig.add_trace(go.Scatter( x=[ multiple_runs_dict[name]['mean_stimulus'] - multiple_runs_dict[name]['std_stimulus'], multiple_runs_dict[name]['mean_stimulus'] + multiple_runs_dict[name]['std_stimulus'] ], y=[count * 0.5, count * 0.5], mode='lines+markers', name='Std ' + name.split('_')[-2] + '_' + name.split('_')[-1], line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])), row=1, col=1) count += 1 # create distribution plots for response distribution_plot = ff.create_distplot(response_distributions, list(multiple_runs_dict.keys()), show_hist=False) # for each configuration (for STIMULUS) count = 0 for name in list(multiple_runs_dict.keys()): # plot distribution fig.add_trace(go.Scatter( distribution_plot['data'][count], name=name.split('_')[-2] + '_' + name.split('_')[-1], line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])), row=2, col=1) # plot distribution mean fig.add_trace(go.Scatter( x=[ multiple_runs_dict[name]['mean_response'], multiple_runs_dict[name]['mean_response'] ], y=[0, max(distribution_plot['data'][count].y)], mode='lines+markers', name='Mean ' + name.split('_')[-2] + '_' + name.split('_')[-1], line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])), row=2, col=1) # plot distribution std fig.add_trace(go.Scatter( x=[ multiple_runs_dict[name]['mean_response'] - multiple_runs_dict[name]['std_response'], multiple_runs_dict[name]['mean_response'] + multiple_runs_dict[name]['std_response'] ], y=[count * 0.5, count * 0.5], mode='lines+markers', name='Std ' + name.split('_')[-2] + '_' + name.split('_')[-1], line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])), row=2, col=1) count += 1 # Add figure title fig.update_layout(title_text="Performance distribution plots") # save figure plotly.offline.plot(fig, filename=os.path.join(output_directory, 'Distribution_plots.html'), auto_open=False) # log distribution parameters for name in list(multiple_runs_dict.keys()): log(f'{name} :', log_file, widget) log(f'- Mean response: {multiple_runs_dict[name]["mean_response"]}', log_file, widget) log(f'- Std response: {multiple_runs_dict[name]["std_response"]}', log_file, widget) log(f'- Mean stimulus: {multiple_runs_dict[name]["mean_stimulus"]}', log_file, widget) log(f'- Std stimulus: {multiple_runs_dict[name]["std_stimulus"]}', log_file, widget)
import SocketServer import sys import spotipy from util import util #constants HOST = "localhost" PORT = 8000 #parse arguments if len(sys.argv) == 2: username = sys.argv[1] else: util.log("Usage: python %s username" % sys.argv[0]) sys.exit() #determine necessary scope and authorize scope = util.gatherScope() sp = util.promptAuth(username, scope) #define request handler class Handler(SocketServer.BaseRequestHandler): def handle(self): currentTrack = sp.current_user_playing_track() currentTrackURI = util.propertyToString(currentTrack["item"]["uri"]) currentTrackProgressMS = util.propertyToString( currentTrack["progress_ms"]) util.log("Request made, responding with: %s|%s" %
def model_classification_statistics(model_path, trial_lengths_path, output_path, median_value, generate_from_train, generate_from_cross, generate_from_test, widget): log("Started classification statistics: " + str(datetime.datetime.now()), file=None, widget=widget) model_directory = model_path trial_lengths_directory = trial_lengths_path output_directory = output_path output_directory = os.path.join(output_directory, "ClassificationStatistics") if not os.path.exists(output_directory): os.makedirs(output_directory) window_size = int(model_directory.split('_')[-2]) window_offset = int(model_directory.split('_')[-1]) example_length = get_example_length(DIVISION_LENGTH, window_size, window_offset) stimulus_hidden_size = get_hidden_size(example_length, STIMULUS_OUTPUT_SIZE) response_hidden_size = get_hidden_size(example_length, RESPONSE_OUTPUT_SIZE) model = DualEnsembleClassifierModel( ([example_length, stimulus_hidden_size, STIMULUS_OUTPUT_SIZE ], [example_length, response_hidden_size, RESPONSE_OUTPUT_SIZE]), NUMBER_OF_CHANNELS) model.load_model_from_file( os.path.join( model_directory, f"Training_with_{window_size}_{window_offset}_DUAL.model")) response_labels_train = np.fromfile(os.path.join( model_directory, "response_labels_train.dat"), dtype=int) response_labels_cross = np.fromfile(os.path.join( model_directory, "response_labels_cross.dat"), dtype=int) response_labels_test = np.fromfile(os.path.join( model_directory, "response_labels_test.dat"), dtype=int) stimulus_labels_train = np.fromfile(os.path.join( model_directory, "stimulus_labels_train.dat"), dtype=int) stimulus_labels_cross = np.fromfile(os.path.join( model_directory, "stimulus_labels_cross.dat"), dtype=int) stimulus_labels_test = np.fromfile(os.path.join( model_directory, "stimulus_labels_test.dat"), dtype=int) subjects_train = np.fromfile(os.path.join(model_directory, "subjects_train.dat"), dtype=int) subjects_cross = np.fromfile(os.path.join(model_directory, "subjects_cross.dat"), dtype=int) subjects_test = np.fromfile(os.path.join(model_directory, "subjects_test.dat"), dtype=int) trial_index_train = np.fromfile(os.path.join(model_directory, "trial_index_train.dat"), dtype=int) trial_index_cross = np.fromfile(os.path.join(model_directory, "trial_index_cross.dat"), dtype=int) trial_index_test = np.fromfile(os.path.join(model_directory, "trial_index_test.dat"), dtype=int) files_list = [] for (dirpath, dirnames, filenames) in os.walk(model_directory): files_list.extend(filenames) stimulus_train_examples = None stimulus_cross_examples = None stimulus_test_examples = None response_train_examples = None response_cross_examples = None response_test_examples = None for file in files_list: if "channel_stimulus_train" in file: stimulus_train_examples = int(file.split('_')[-2]) if "channel_stimulus_cross" in file: stimulus_cross_examples = int(file.split('_')[-2]) if "channel_stimulus_test" in file: stimulus_test_examples = int(file.split('_')[-2]) if "channel_response_train" in file: response_train_examples = int(file.split('_')[-2]) if "channel_response_cross" in file: response_cross_examples = int(file.split('_')[-2]) if "channel_response_test" in file: response_test_examples = int(file.split('_')[-2]) channel_stimulus_train = np.fromfile( os.path.join( model_directory, f"channel_stimulus_train_{NUMBER_OF_CHANNELS}_{stimulus_train_examples}_{example_length}.dat" )) channel_stimulus_cross = np.fromfile( os.path.join( model_directory, f"channel_stimulus_cross_{NUMBER_OF_CHANNELS}_{stimulus_cross_examples}_{example_length}.dat" )) channel_stimulus_test = np.fromfile( os.path.join( model_directory, f"channel_stimulus_test_{NUMBER_OF_CHANNELS}_{stimulus_test_examples}_{example_length}.dat" )) channel_response_train = np.fromfile( os.path.join( model_directory, f"channel_response_train_{NUMBER_OF_CHANNELS}_{response_train_examples}_{example_length}.dat" )) channel_response_cross = np.fromfile( os.path.join( model_directory, f"channel_response_cross_{NUMBER_OF_CHANNELS}_{response_cross_examples}_{example_length}.dat" )) channel_response_test = np.fromfile( os.path.join( model_directory, f"channel_response_cross_{NUMBER_OF_CHANNELS}_{response_test_examples}_{example_length}.dat" )) channel_stimulus_train = np.reshape( channel_stimulus_train, (NUMBER_OF_CHANNELS, stimulus_train_examples, example_length)) channel_stimulus_cross = np.reshape( channel_stimulus_cross, (NUMBER_OF_CHANNELS, stimulus_cross_examples, example_length)) channel_stimulus_test = np.reshape( channel_stimulus_test, (NUMBER_OF_CHANNELS, stimulus_test_examples, example_length)) channel_response_train = np.reshape( channel_response_train, (NUMBER_OF_CHANNELS, response_train_examples, example_length)) channel_response_cross = np.reshape( channel_response_cross, (NUMBER_OF_CHANNELS, response_cross_examples, example_length)) channel_response_test = np.reshape( channel_response_test, (NUMBER_OF_CHANNELS, response_test_examples, example_length)) trial_lengths = np.fromfile(os.path.join(trial_lengths_directory, "trial_lengths.dat"), dtype=int) trial_lengths = np.reshape(trial_lengths, (11, 180)) # dual dataset creation dual_dataset_train = DatasetForClassificationStatistics( channel_stimulus_train, channel_response_train, stimulus_labels_train, response_labels_train, subjects_train, trial_index_train) dual_dataset_train_loader = DataLoader(dual_dataset_train, batch_size=1, shuffle=True) dual_dataset_cross = DatasetForClassificationStatistics( channel_stimulus_cross, channel_response_cross, stimulus_labels_cross, response_labels_cross, subjects_cross, trial_index_cross) dual_dataset_cross_loader = DataLoader(dual_dataset_cross, batch_size=1, shuffle=True) dual_dataset_test = DatasetForClassificationStatistics( channel_stimulus_test, channel_response_test, stimulus_labels_test, response_labels_test, subjects_test, trial_index_test) dual_dataset_test_loader = DataLoader(dual_dataset_test, batch_size=1, shuffle=True) stimulus_classified = [] stimulus_misclassified = [] response_classified = [] response_misclassified = [] if generate_from_train: train_output_directory = os.path.join(output_directory, 'Train') if not os.path.exists(train_output_directory): os.makedirs(train_output_directory) stimulus_classified_train, stimulus_misclasified_train, response_classified_train, response_misclasified_train = model.predict_for_classification_statistics( dual_dataset_train_loader, trial_lengths, STIMULUS_OUTPUT_SIZE, RESPONSE_OUTPUT_SIZE, train_output_directory, "train", median_value) stimulus_classified.extend(stimulus_classified_train) stimulus_misclassified.extend(stimulus_misclasified_train) response_classified.extend(response_classified_train) response_misclassified.extend(response_misclasified_train) if generate_from_cross: cross_output_directory = os.path.join(output_directory, 'Cross') if not os.path.exists(cross_output_directory): os.makedirs(cross_output_directory) stimulus_classified_cross, stimulus_misclasified_cross, response_classified_cross, response_misclasified_cross = model.predict_for_classification_statistics( dual_dataset_cross_loader, trial_lengths, STIMULUS_OUTPUT_SIZE, RESPONSE_OUTPUT_SIZE, cross_output_directory, "cross", median_value) stimulus_classified.extend(stimulus_classified_cross) stimulus_misclassified.extend(stimulus_misclasified_cross) response_classified.extend(response_classified_cross) response_misclassified.extend(response_misclasified_cross) if generate_from_test: test_output_directory = os.path.join(output_directory, 'Test') if not os.path.exists(test_output_directory): os.makedirs(test_output_directory) stimulus_classified_test, stimulus_misclasified_test, response_classified_test, response_misclasified_test = model.predict_for_classification_statistics( dual_dataset_test_loader, trial_lengths, STIMULUS_OUTPUT_SIZE, RESPONSE_OUTPUT_SIZE, test_output_directory, "test", median_value) stimulus_classified.extend(stimulus_classified_test) stimulus_misclassified.extend(stimulus_misclasified_test) response_classified.extend(response_classified_test) response_misclassified.extend(response_misclasified_test) aggregated_output_directory = os.path.join(output_directory, 'Aggregated') if not os.path.exists(aggregated_output_directory): os.makedirs(aggregated_output_directory) plot_histogram(stimulus_classified, stimulus_misclassified, 'Stimulus Correctly Classified', 'Stimulus Incorrectly Classified', aggregated_output_directory, f"stimulus_classified", median_value) plot_histogram(response_classified, response_misclassified, 'Response Correctly Classified', 'Response Incorrectly Classified', aggregated_output_directory, f"response_classified", median_value) log("Finished classification statistics: " + str(datetime.datetime.now()), file=None, widget=widget)
def graph_metrics(matrices_directory, output_directory, trial_index, is_trial = False, widget = None, histogram = False, percentage = 0.05): # create output directory output_directory = os.path.join(output_directory, 'GraphWavenetAdjacency') if not os.path.exists(output_directory): os.makedirs(output_directory) if is_trial: output_directory = os.path.join(output_directory, 'Trial') else: output_directory = os.path.join(output_directory, 'Window') if not os.path.exists(output_directory): os.makedirs(output_directory) output_directory = os.path.join(output_directory, f'{trial_index}') if not os.path.exists(output_directory): os.makedirs(output_directory) # find input matrix if is_trial: matrices_directory = os.path.join(matrices_directory, 'Trial') else: matrices_directory = os.path.join(matrices_directory, 'Window') matrices_directory = os.path.join(matrices_directory, f'{trial_index}') log_file = os.path.join(output_directory, 'log.txt') log("Started graph analysis: " + str(datetime.datetime.now()), file = log_file, widget = widget) properties_dict_file_path = os.path.join(output_directory, 'property_dict.json') if os.path.exists(properties_dict_file_path): properties_dict = load_dictionary_from_file(properties_dict_file_path) properties_dict = { int(k): v for k, v in properties_dict.items() } os.remove(properties_dict_file_path) init_properties_dict = False else: properties_dict = { } init_properties_dict = True input_matrix = [] if not is_trial: folder_list = [int(x) for x in next(os.walk(matrices_directory))[1]] folder_list = sorted(folder_list) for folder in folder_list: matrix_directory = os.path.join(matrices_directory, f'{folder}') input_matrix.append(read_and_normalize_matrix(matrix_directory)) if init_properties_dict: properties_dict[folder] = { } else: input_matrix.append(read_and_normalize_matrix(matrices_directory)) if histogram: compute_histogram(input_matrix, widget, output_directory, is_trial) inversed_filtered_graphs = [] filtered_graphs = [] inversed_unfiltered_graphs = [] adjacency_matrices = [] for matrix in input_matrix: inversed_unfiltered_graphs.append( nx.DiGraph() ) inversed_filtered_graphs.append( nx.DiGraph() ) filtered_graphs.append( nx.DiGraph() ) values = sorted(matrix, reverse = True) threshold = values[int(percentage * len(values))] matrix = matrix.reshape(NUMBER_OF_CHANNELS, NUMBER_OF_CHANNELS) adjacency_matrices.append(matrix) for start in range(NUMBER_OF_CHANNELS): for end in range(NUMBER_OF_CHANNELS): if matrix[start][end] >= threshold: inversed_filtered_graphs[-1].add_edge( CHANNELS_DICT[start], CHANNELS_DICT[end], weight = 1.0 - matrix[start][end] ) filtered_graphs[-1].add_edge( CHANNELS_DICT[start], CHANNELS_DICT[end], weight = matrix[start][end] ) inversed_unfiltered_graphs[-1].add_edge( CHANNELS_DICT[start], CHANNELS_DICT[end], weight = 1.0 - matrix[start][end] ) log("Started graph clique: " + str(datetime.datetime.now()), file = log_file, widget = widget) graph_clique(output_directory, is_trial, inversed_filtered_graphs, widget, properties_dict) log("Started graph strongly connected components: " + str(datetime.datetime.now()), file = log_file, widget = widget) graph_strongly_connected_components(output_directory, is_trial, inversed_filtered_graphs, widget, properties_dict) log("Started graph MSA: " + str(datetime.datetime.now()), file = log_file, widget = widget) graph_minimum_spanning_arborescence(output_directory, is_trial, inversed_unfiltered_graphs, adjacency_matrices, widget, properties_dict) log("Started graph shortest path: " + str(datetime.datetime.now()), file = log_file, widget = widget) graph_shortest_path(output_directory, is_trial, inversed_filtered_graphs, widget, properties_dict) log("Started graph clustering: " + str(datetime.datetime.now()), file = log_file, widget = widget) clustering(output_directory, is_trial, filtered_graphs, widget, properties_dict) log("Started graph centrality: " + str(datetime.datetime.now()), file = log_file, widget = widget) centrality(output_directory, is_trial, inversed_filtered_graphs, widget, properties_dict) save_dictionary_to_file(properties_dict, os.path.join(output_directory, 'property_dict.json')) log("Finished graph analysis: " + str(datetime.datetime.now()), file = log_file, widget = widget)