def save_text(self, data_tuple, labels, protocol="a"): """ Save text file to new directory Mainly used for cluster to text :param filename: String :param text: String or List :param protocol: String --> "a", "wb", "w", "a+", "w+" :return: None """ unique_labels = set(labels) for label in unique_labels: text = [] for i, sent in enumerate(data_tuple[InformationType.SENTENCE.value][labels == label]): text.append(sent) text.append("\n------------------------------------------\n") for i, filename in enumerate(data_tuple[InformationType.FILE_NAME.value][labels == label]): text.append(filename) target_file_name = str(label) + ".txt" file_path = os.path.join(Path.cluster_directory + self.directory + "/", target_file_name) Log.write("saving" + target_file_name + " to: " + file_path) file = open(file_path, protocol) for lines in text: # Specific case when writing list of precedent filenames file.writelines(lines) file.writelines("\n") file.close() Log.write(target_file_name + " saved to: " + file_path)
def dump_tours(num_hidden, tour_lengths, history, cd_steps, iter, epoch): with open(OUTPUT_FOLDER + 'tour_lengths_h=%s' % num_hidden, 'a') as f: f.write(" ".join(map(lambda x: str(int(x)), tour_lengths))) f.write(" ") short_tour = [] inter_tour = [] long_tour = [] idxs = [0, 1, -1] for tour in history: v = [s.v for s in tour] Log.var(len_v_history=len(v)) if len(v) == 2: short_tour += list(v[i] for i in idxs) elif len(v) == cd_steps + 1: long_tour += list(v[i] for i in idxs) else: inter_tour += list(v[i] for i in idxs) ImageService.get_tours_as_images( num_hidden, iter, epoch, len(idxs), Util.dictize(short_tour=np.array(short_tour), inter_tour=np.array(inter_tour), long_tour=np.array(long_tour)))
def main(): if (len(sys.argv)) != 2: Log.fatal('Needs channel to record!') channel_name = sys.argv[1] recorder = Recorder() signal.signal(signal.SIGINT, lambda sig, frame: recorder.stop()) recorder.record(channel_name)
class POP3Telnet: def __init__(self, host, port): self.tel = telnetlib.Telnet(host, port) self.log = Log() self.log.info("Connected to {host} on port {port}".format(host=host, port=port), tag="TELNET") def close(self): self.tel.close() def read_line(self): return self.tel.read_until("\n") def write(self, msg): self.tel.write("{msg}\r\n".format(msg=msg).encode()) def tell(self, whom, what): self.tel.write("tell {whom} {what}".format(whom=whom, what=what).encode()) def login(self, user, password): self.tel.read_until(b"login: "******"\n") self.tel.write(password.encode('ascii') + b"\n") self.log.info("Logged in as {user}".format(user=user), tag="TELNET")
class Command: def __init__(self, key, controller, req_args=False, description="No description provided.", restriction="none"): self.key = key self.log = Log() self.req_args = req_args self.controller = controller self.description = description self.restriction = restriction _controller_dir = self.controller.split(".") _module_name = _controller_dir[0] self.controller_name = _controller_dir[1] _module = importlib.import_module("controllers.{0}".format(_module_name)) self.controller_class = getattr(_module, _module_name.capitalize())() def execute(self, user, telnet, lists, arg=None): if "none" not in self.restriction: if user not in self.lists[self.restriction].get_users(telnet): self.log.warn("{user} called {cmd} without permission (restricted by role)".format(user=user, cmd=self.controller_name), tag="command") return if self.req_args and not bool(arg): self.log.warn("{user} called {cmd} which requires arguments".format(user=user, cmd=self.controller_name), tag="command") return self.controller_class.execute(self.controller_name, arg, user, telnet)
def save_model(rbm: RBM, filename): filename = 'model_%s_%s' % (filename, time.time()) filename = MODEL_FOLDER + "%s.model" % filename with open(filename, 'wb') as fp: pickle.dump(rbm, fp) Log.info("Model dumped to %s" % filename) return filename
def test_write(self): filename = "server.log" Log.write("testing") root_directory = os.path.abspath(__file__ + r"/../../") full_path = os.path.join(root_directory, filename) file_found = os.path.isfile(full_path) self.assertTrue(file_found)
def __init__(self, server, sender, password, receiver, title, message=None, path=None): """初始化Email :param title: 邮件标题,必填。 :param message: 邮件正文,非必填。 :param path: 附件路径,可传入list(多附件)或str(单个附件),非必填。 :param server: smtp服务器,必填。 :param sender: 发件人,必填。 :param password: 发件人密码,必填。 :param receiver: 收件人,多收件人用“;”隔开,必填。 """ self.title = title self.message = message self.files = path self.msg = MIMEMultipart('related') self.server = server self.sender = sender self.receiver = receiver self.password = password self.log = Log()
def __dictionary_to_list(): """ Converts the binarize structured_data_dict to a list format precedent_vectors:{ filename:{ name: 'AZ-XXXXXXX.txt', demands_vector: [...], facts_vector: [...], outcomes_vector: [...] } } :return: data_list: [{ name: 'AZ-XXXXXXX.txt', demands_vector: [...], facts_vector: [...], outcomes_vector: [...] }, { ... }] """ precedent_vector = Load.load_binary("precedent_vectors.bin") if precedent_vector is None: return [] Log.write("Formatting data") data_list = [] for precedent_file in precedent_vector: data_list.append(precedent_vector[precedent_file]) return data_list
def weights_to_csv(self): """ Writes all the weights to .csv format 1) get the facts 2) for every outcome write the weights :return: None """ try: if self.model is None: self.model = Load.load_binary('multi_class_svm_model.bin') self.classifier_labels = Load.load_binary('classifier_labels.bin') except BaseException: return None index = TagPrecedents().get_intent_index() fact_header = [" "] for header in index['facts_vector']: fact_header.append(header[1]) with open('weights.csv', 'w') as outcsv: writer = csv.writer(outcsv) writer.writerow(fact_header) for i in range(len(self.model.estimators_)): outcome_list = [self.classifier_labels[i]] estimator = self.model.estimators_[i] try: weights = estimator.coef_[0] for j in range(len(weights)): outcome_list.append(weights[j]) writer.writerow(outcome_list) except AttributeError: pass Log.write('Weights saved to .csv')
def run_ensemble(): all_args = get_args() # Create a logger log = Log(all_args.log_dir) print("Log dir: ", all_args.log_dir, flush=True) # Log the run arguments save_args(all_args, log.metadata_dir) if not all_args.disable_cuda and torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') if not os.path.isdir(os.path.join(all_args.log_dir, "files")): os.mkdir(os.path.join(all_args.log_dir, "files")) # Obtain the data loaders trainloader, projectloader, test_loader, classes, num_channels = get_dataloaders(all_args) log_dir_orig = all_args.log_dir trained_orig_trees = [] trained_pruned_trees = [] trained_pruned_projected_trees = [] orig_test_accuracies = [] pruned_test_accuracies = [] pruned_projected_test_accuracies = [] project_infos = [] infos_sample_max = [] infos_greedy = [] infos_fidelity = [] # Train trees in ensemble one by one and save corresponding trees and accuracies for pt in range(1,all_args.nr_trees_ensemble+1): torch.cuda.empty_cache() print("\nTraining tree ",pt, "/", all_args.nr_trees_ensemble, flush=True) log.log_message('Training tree %s...'%str(pt)) args = deepcopy(all_args) args.log_dir = os.path.join(log_dir_orig,'tree_'+str(pt)) trained_tree, pruned_tree, pruned_projected_tree, original_test_acc, pruned_test_acc, pruned_projected_test_acc, project_info, eval_info_samplemax, eval_info_greedy, info_fidelity = run_tree(args) trained_orig_trees.append(trained_tree) trained_pruned_trees.append(pruned_tree) trained_pruned_projected_trees.append(pruned_projected_tree) orig_test_accuracies.append(original_test_acc) pruned_test_accuracies.append(pruned_test_acc) pruned_projected_test_accuracies.append(pruned_projected_test_acc) project_infos.append(project_info) infos_sample_max.append(eval_info_samplemax) infos_greedy.append(eval_info_greedy) infos_fidelity.append(info_fidelity) if pt > 1: #analyse ensemble with > 1 trees: analyse_ensemble(log, all_args, test_loader, device, trained_orig_trees, trained_pruned_trees, trained_pruned_projected_trees, orig_test_accuracies, pruned_test_accuracies, pruned_projected_test_accuracies, project_infos, infos_sample_max, infos_greedy, infos_fidelity)
def initializing_log(self): currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) self.log = Log(self.algorName, self.algorName + self.foldInfo + ' ' + currentTime) #save configuration self.log.add('### model configuration ###') for k in self.config.config: self.log.add(k + '=' + self.config[k])
def eval_fidelity(tree: ProtoTree, test_loader: DataLoader, device, log: Log = None, progress_prefix: str = 'Fidelity') -> dict: tree = tree.to(device) # Keep an info dict about the procedure info = dict() # Make sure the model is in evaluation mode tree.eval() # Show progress on progress bar test_iter = tqdm(enumerate(test_loader), total=len(test_loader), desc=progress_prefix, ncols=0) distr_samplemax_fidelity = 0 distr_greedy_fidelity = 0 # Iterate through the test set for i, (xs, ys) in test_iter: xs, ys = xs.to(device), ys.to(device) # Use the model to classify this batch of input data, with 3 types of routing out_distr, _ = tree.forward(xs, 'distributed') ys_pred_distr = torch.argmax(out_distr, dim=1) out_samplemax, _ = tree.forward(xs, 'sample_max') ys_pred_samplemax = torch.argmax(out_samplemax, dim=1) out_greedy, _ = tree.forward(xs, 'greedy') ys_pred_greedy = torch.argmax(out_greedy, dim=1) # Calculate fidelity distr_samplemax_fidelity += torch.sum( torch.eq(ys_pred_samplemax, ys_pred_distr)).item() distr_greedy_fidelity += torch.sum( torch.eq(ys_pred_greedy, ys_pred_distr)).item() # Update the progress bar test_iter.set_postfix_str(f'Batch [{i + 1}/{len(test_iter)}]') del out_distr del out_samplemax del out_greedy distr_samplemax_fidelity = distr_samplemax_fidelity / float( len(test_loader.dataset)) distr_greedy_fidelity = distr_greedy_fidelity / float( len(test_loader.dataset)) info['distr_samplemax_fidelity'] = distr_samplemax_fidelity info['distr_greedy_fidelity'] = distr_greedy_fidelity log.log_message( "Fidelity between standard distributed routing and sample_max routing: " + str(distr_samplemax_fidelity)) log.log_message( "Fidelity between standard distributed routing and greedy routing: " + str(distr_greedy_fidelity)) return info
def run(command_list, dataset): if len(command_list) > 0: Log.write('Command not recognized') return False Log.write('Training multi output regression') svr = MultiOutputRegression(dataset) svr.train() return True
def main(): (start_time, end_time, vod_id) = CommandLineParser().parse_command_line() m3u8_playlist = PlaylistFetcher().fetch_for_vod(vod_id) if m3u8_playlist is None: Log.fatal("Seems like vod {} doesn't exist".format(vod_id)) playlist = Chunks.get(m3u8_playlist.segments, start_time, end_time) file_name = FileMaker.make_avoiding_overwrite(Vod.title(vod_id) + '.ts') downloader = PlaylistDownloader(playlist) signal.signal(signal.SIGINT, lambda sig, frame: downloader.stop()) downloader.download_to(file_name)
def eval(tree: ProtoTree, test_loader: DataLoader, epoch, device, log: Log = None, sampling_strategy: str = 'distributed', log_prefix: str = 'log_eval_epochs', progress_prefix: str = 'Eval Epoch') -> dict: tree = tree.to(device) # Keep an info dict about the procedure info = dict() if sampling_strategy != 'distributed': info['out_leaf_ix'] = [] # Build a confusion matrix cm = np.zeros((tree._num_classes, tree._num_classes), dtype=int) # Make sure the model is in evaluation mode tree.eval() # Show progress on progress bar test_iter = tqdm(enumerate(test_loader), total=len(test_loader), desc=progress_prefix + ' %s' % epoch, ncols=0) # Iterate through the test set for i, (xs, ys) in test_iter: xs, ys = xs.to(device), ys.to(device) # Use the model to classify this batch of input data out, test_info = tree.forward(xs, sampling_strategy) ys_pred = torch.argmax(out, dim=1) # Update the confusion matrix cm_batch = np.zeros((tree._num_classes, tree._num_classes), dtype=int) for y_pred, y_true in zip(ys_pred, ys): cm[y_true][y_pred] += 1 cm_batch[y_true][y_pred] += 1 acc = acc_from_cm(cm_batch) test_iter.set_postfix_str( f'Batch [{i + 1}/{len(test_iter)}], Acc: {acc:.3f}') # keep list of leaf indices where test sample ends up when deterministic routing is used. if sampling_strategy != 'distributed': info['out_leaf_ix'] += test_info['out_leaf_ix'] del out del ys_pred del test_info info['confusion_matrix'] = cm info['test_accuracy'] = acc_from_cm(cm) log.log_message("\nEpoch %s - Test accuracy with %s routing: " % (epoch, sampling_strategy) + str(info['test_accuracy'])) return info
def analyse_leaf_distributions(tree: ProtoTree, log: Log): # print for experimental purposes max_values = [] for leaf in tree.leaves: if leaf._log_probabilities: max_values.append(torch.max(torch.exp(leaf.distribution())).item()) else: max_values.append(torch.max(leaf.distribution()).item()) max_values.sort() log.log_message("Max values in softmax leaf distributions: \n" + str(max_values))
def __init__(self, src_w2i, src_i2w, tgt_w2i, tgt_i2w, embedding_dim, encoder_hidden_dim, decoder_hidden_dim, encoder_n_layers = 1, decoder_n_layers = 1, encoder_drop_prob=0.5, decoder_drop_prob=0.5, latent_size = 64, lr = 0.01, teacher_forcing_ratio=0.5, gradient_clip = 5, model_store_path = None, vae_kld_anneal_k = 0.0025, vae_kld_anneal_x0 = 2500, vae_kld_anneal_function="linear", decoder_word_input_drop = 0.5): super(LSTMVAE, self).__init__() self.encoder_hidden_dim = encoder_hidden_dim self.decoder_hidden_dim = decoder_hidden_dim self.decoder_n_layers = decoder_n_layers self.latent_size = latent_size self.teacher_forcing_ratio = teacher_forcing_ratio self.gradient_clip = gradient_clip self.vae_kld_anneal_k = vae_kld_anneal_k self.vae_kld_anneal_x0 = vae_kld_anneal_x0 self.vae_kld_anneal_function = vae_kld_anneal_function self.decoder_word_input_drop = decoder_word_input_drop self.encoder = SimpleLSTMEncoderLayer(len(src_w2i), embedding_dim, encoder_hidden_dim, encoder_n_layers, encoder_drop_prob) #self.decoder = SimpleLSTMDecoderLayer(len(tgt_w2i), embedding_dim, self.latent_size, decoder_hidden_dim, decoder_n_layers, decoder_drop_prob) self.decoder = DroppedLSTMDecoderLayer(len(tgt_w2i), embedding_dim, self.latent_size, decoder_hidden_dim, decoder_n_layers, decoder_drop_prob, decoder_word_input_drop) #self.attention = AttentionLayer(encoder_hidden_dim*2, decoder_hidden_dim) # *2 because encoder is bidirectional an thus hidden is double self.vae = VAE(encoder_hidden_dim*2, self.latent_size) self.optimizer = torch.optim.Adam(list(self.encoder.parameters())+list(self.decoder.parameters())+list(self.vae.parameters()), lr=lr) self.criterion = nn.CrossEntropyLoss(ignore_index = 0) self.src_w2i = src_w2i self.src_i2w = src_i2w self.tgt_w2i = tgt_w2i self.tgt_i2w = tgt_i2w self.epoch = 0 self.lr = lr self.src_vocab_size = len(src_w2i) self.tgt_vocab_size = len(tgt_w2i) print("Source vocab size: {}".format(self.src_vocab_size)) print("Target vocab size: {}".format(self.tgt_vocab_size)) self.train_on_gpu=torch.cuda.is_available() if(self.train_on_gpu): print('Training on GPU.') else: print('No GPU available, training on CPU.') self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if model_store_path == None: self.model_store_path = os.path.dirname(os.path.realpath(__file__)) else: self.model_store_path = model_store_path if not os.path.exists(model_store_path): os.makedirs(model_store_path) self.log_path = os.path.join(self.model_store_path,"log") self.log = Log(self.log_path, clear=True) """
def get_similarity_maps(tree: ProtoTree, project_info: dict, log: Log = None): log.log_message("\nCalculating similarity maps (after projection)...") sim_maps = dict() for j in project_info.keys(): nearest_x = project_info[j]['nearest_input'] with torch.no_grad(): _, distances_batch, _ = tree.forward_partial(nearest_x) sim_maps[j] = torch.exp(-distances_batch[0, j, :, :]).cpu().numpy() del nearest_x del project_info[j]['nearest_input'] return sim_maps, project_info
def save_binary(self, filename, content): """ Uses joblib implementation over pickle for performance and memory. Saves content to binary format :param filename: String :param content: Object :return: None """ file_path = os.path.join(Path.binary_directory, filename) Log.write("saving" + filename + " to: " + file_path) joblib.dump(content, file_path) Log.write(filename + " saved to: " + file_path)
def load(self): """ Loads the regressors different components """ regressor_name = self.regressor_name Log.write("Loading " + '{}_regressor.bin'.format(regressor_name)) file_path = os.path.join(Path.binary_directory, '{}_regressor.bin'.format(regressor_name)) Log.write('{}_regressor.bin'.format(regressor_name) + " is successfully loaded") regressor = load_model(file_path) scaler = Load.load_binary('{}_scaler.bin'.format(regressor_name)) self.model = AbstractRegressor._create_pipeline(scaler, regressor) self.mean_facts_vector = Load.load_binary('model_metrics.bin')['regressor'][regressor_name]['mean_facts_vector']
def __init__(self, v): from dto.vehicledto import VehicleDTO self.pose = v.getPose() self.initialPosition = self.getPosition() self.desiredHeading = self.pose.getOrientation() self.currentSpeed = v.getCurrentSpeed() self.desiredSpeed = self.currentSpeed self.maxSpeed = v.getMaxSpeed() self.turningRadius = v.getTurningRadius() self.acceleration = self.maxSpeed / 2.0 self.sensor = v.getSensor() self.log = Log([self])
def save(self): """ Saves the scaler and regressor. Does not use joblib for the regressor as it is not supported """ regressor_name = self.regressor_name file_path = os.path.join(Path.binary_directory, '{}_regressor.bin'.format(regressor_name)) Log.write("saving" + '{}_regressor.bin'.format(regressor_name) + " to: " + file_path) Log.write('{}_regressor.bin'.format(regressor_name) + " saved to: " + file_path) self.model.steps[1][1].model.save(file_path) Save().save_binary('{}_scaler.bin'.format(regressor_name), self.model.steps[0][1]) Save().save_binary('model_metrics.bin', self.data_metrics()) self.dataset = None
def __init__(self, key, controller, req_args=False, description="No description provided.", restriction="none"): self.key = key self.log = Log() self.req_args = req_args self.controller = controller self.description = description self.restriction = restriction _controller_dir = self.controller.split(".") _module_name = _controller_dir[0] self.controller_name = _controller_dir[1] _module = importlib.import_module("controllers.{0}".format(_module_name)) self.controller_class = getattr(_module, _module_name.capitalize())()
def run(command_list): """ 1) Converts dictionary a precedent vectors to a list of dictionaries 2) Train the support vector machine model 3) train the similarity finder model :param command_list: List of command line arguments. Not used yet since there is only 1 training technique :return: boolean """ # ------------------- COMMAND LINE SYNTAX -------------------------- if '--' == command_list[0][:2]: if command_list[0] not in CommandEnum.command_list: Log.write(command_list[0] + " not recognized") return False precedent_vector = __dictionary_to_list() if len(precedent_vector) == 0: return False try: data_size = command_list[-1] precedent_vector = precedent_vector[:int(data_size)] except IndexError: pass except ValueError: pass except TypeError: Log.write( "create the precedent vector model first.\nCommand: python main.py -post" ) return False # ------------------- TRAINING -------------------------- if CommandEnum.ALL in command_list: classifier_driver.run(command_list[1:], precedent_vector) regression_driver.run(command_list[1:], precedent_vector) SimilarFinder(train=True, dataset=precedent_vector) return True if CommandEnum.SVM in command_list: classifier_driver.run(command_list[1:], precedent_vector) if CommandEnum.SVR in command_list: regression_driver.run(command_list[1:], precedent_vector) if CommandEnum.SIMILARITY_FINDER in command_list: SimilarFinder(train=True, dataset=precedent_vector) return True
def train(self): """ Train a classifier using Linear SVC 1) reshape date in a format that sklearn understands 2) Binarize data for multi output 3) split training data 4) train (fit) 5) test model :return: None """ x_total, y_total = self.reshape_dataset() # 1 self.mlb = MultiLabelBinarizer() # 2 y_total = self.mlb.fit_transform(y_total) x_train, x_test, y_train, y_test = train_test_split( x_total, y_total, test_size=0.20, random_state=42) # 3 Log.write("Sample size: {}".format(len(x_total))) Log.write("Train size: {}".format(len(x_train))) Log.write("Test size: {}".format(len(x_test))) Log.write("Training Classifier Using Multi Class SVM") clf = OneVsRestClassifier(SVC(kernel='linear', random_state=42, probability=True)) # 4 clf.fit(x_train, y_train) self.model = clf self.__test(x_test, y_test) # 5
def __init__(self, src_w2i, src_i2w, tgt_w2i, tgt_i2w, embedding_dim, encoder_hidden_dim, decoder_hidden_dim, encoder_n_layers = 1, decoder_n_layers = 1, encoder_drop_prob=0.5, decoder_drop_prob=0.5, latent_size = 64, lr = 0.01, teacher_forcing_ratio=0.5, gradient_clip = 5, model_store_path = None, decoder_word_input_drop = 0.5): super(LSTMTransformer, self).__init__() self.encoder_hidden_dim = encoder_hidden_dim self.decoder_hidden_dim = decoder_hidden_dim self.decoder_n_layers = decoder_n_layers self.latent_size = latent_size self.teacher_forcing_ratio = teacher_forcing_ratio self.gradient_clip = gradient_clip self.decoder_word_input_drop = decoder_word_input_drop self.input = InputLayerWithAbsolutePosition(vocab_size=len(src_w2i), embedding_dim=embedding_dim, max_seq_len=512) self.encoder = SelfAttentionLSTMEncoderStack(2, embedding_dim, encoder_hidden_dim, max_seq_len=512, rnn_layers=1, drop_prob=encoder_drop_prob, attention_probs_dropout_prob=0.2, num_attention_heads = 8) #self.decoder = SimpleLSTMDecoderLayer(len(tgt_w2i), embedding_dim, self.latent_size, decoder_hidden_dim, decoder_n_layers, decoder_drop_prob) self.decoder = DroppedLSTMDecoderLayer(len(tgt_w2i), embedding_dim, self.latent_size, decoder_hidden_dim, decoder_n_layers, decoder_drop_prob, decoder_word_input_drop) self.attention = AdditiveAttention(encoder_hidden_dim*2, decoder_hidden_dim) # *2 because encoder is bidirectional an thus hidden is double #self.vae = VAE(encoder_hidden_dim*2, self.latent_size) self.optimizer = torch.optim.Adam(list(self.encoder.parameters())+list(self.decoder.parameters())+list(self.attention.parameters()), lr=lr) self.criterion = nn.CrossEntropyLoss(ignore_index = 0) self.src_w2i = src_w2i self.src_i2w = src_i2w self.tgt_w2i = tgt_w2i self.tgt_i2w = tgt_i2w self.epoch = 0 self.lr = lr self.src_vocab_size = len(src_w2i) self.tgt_vocab_size = len(tgt_w2i) print("Source vocab size: {}".format(self.src_vocab_size)) print("Target vocab size: {}".format(self.tgt_vocab_size)) self.train_on_gpu=torch.cuda.is_available() if(self.train_on_gpu): print('Training on GPU.') else: print('No GPU available, training on CPU.') self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if model_store_path == None: self.model_store_path = os.path.dirname(os.path.realpath(__file__)) else: self.model_store_path = model_store_path if not os.path.exists(model_store_path): os.makedirs(model_store_path) self.log_path = os.path.join(self.model_store_path,"log") self.log = Log(self.log_path, clear=True)
def download_to(self, file_name): playlist = self.playlist progress_bar = ProgressBar(file_name, len(playlist.segments)) with open(file_name, 'wb+') as file: for segment in playlist.segments: if self.stopped: print('') break try: for chunk in Contents.chunked(segment): if chunk: file.write(chunk) except IOError as e: Log.fatal(str(e)) progress_bar.update_by(1)
def run(command_list): """ Driver for feature extraction subsystem :param command_list: Command line arguments :return: None """ command = command_list[0] if command == CommandEnum.PRE_PROCESSING: pre_processing_driver.run(command_list[1:]) elif command == CommandEnum.POST_PROCESSING: post_processing_driver.run(command_list[1:]) else: Log.write("Command not recognized: " + command_list[0]) return False return True
def train(self): """ Trains the pipeline. After training the dataset is removed from the object to save space. """ Log.write("Size of dataset: %d" % (len(self.dataset))) X = np.array([precedent['facts_vector'][self.important_facts_index] for precedent in self.dataset]) Y = np.array([precedent['outcomes_vector'][self.outcome_index] for precedent in self.dataset]) self.input_dimensions = len(X[0]) regressor = KerasRegressor( build_fn=self._nn_architecture, epochs=1000, batch_size=1024, verbose=0) scaler = StandardScaler() self.model = AbstractRegressor._create_pipeline(scaler, regressor) self.model.fit(X, Y) self.test()
def __init__(self, test_file, timeout): self.log = Log() self.timeout = timeout self.test_file = test_file self.src_file = "%s.ml" % test_file[:-(len("_test.ml"))] self.test_name = self.test_file.split("/")[-1] self.src_name = self.src_file.split("/")[-1] self.failures = self.run() print("") # Separator
def unsubscribe_to_stream(self): super(RtspStreamHandler, self).unsubscribe_to_stream() Log.info("Unsubscribing to RTSP stream")
class RunTest: """ 2013-08-23: Execute all tests cases found inside a module. Specifically: - Accept the path to a test file as input (like "my_test.ml" or "worker_test.ml") - Compile the test case and the corresponding .ml file ("file.ml" + "file_test.ml") - Extract the inferred interface from the test file - Extract all test cases from the interface ("unit -> unit" functions beginning with "test_") - Execute each test case in a separate ocaml toplevel, record output """ # In order of dependence LIBS = [ "serializer.cma", "assertions.cma", ] def __init__(self, test_file, timeout): self.log = Log() self.subprocess = SubprocessWrapper() self.timeout = timeout self.test_file = test_file self.src_file = "%s.ml" % test_file[:-(len("_test.ml"))] self.test_name = self.test_file.split("/")[-1] self.src_name = self.src_file.split("/")[-1] self.failures = self.run() print("") # Separator def compile(self): """ 2013-08-23: Compile the source file + test file, generate the interface for the test file. In detail: - Generate the ocamlc command to compile source + test in unison, pulling in all necessary external libraries - Generate the ocamlc command to get the interface for the test, using the .cmo file generated by compiling the source as a library """ self.log.info("Compiling %s and %s" % (self.src_name, self.test_name)) # Prepare compilation commands. # 2013-08-23: Base command includes standard testing library base_command = " ".join(["ocamlc -c"] + self.LIBS) # 2013-08-23: Full compilations uses '-g' option to generate debug information compile_all = "%s -g %s %s" % (base_command, self.src_file, self.test_file) # Name of the .cmo file generated after compiling the source src_cmo = "%s.cmo" % self.src_file[:-(len(".ml"))] # 2013-08-23: Use '-i' option to just generate the interface for the function infer_interface = "%s -i %s %s" % (base_command, src_cmo, self.test_file) # Compile both files, then infer and return the interface self.subprocess.execute(compile_all, on_failure=self.compile_error) # 2013-08-23: Reached this line without making a .cmo Dear diary, this was bad interface = self.subprocess.execute(infer_interface) return interface.split("\n") def compile_error(self, cpe): # NO COMPILEEEEEEEE err_msg = cpe.output.strip() # 2013-08-23: Retrieve failing line from the file sourceError = self._source_of_exception(err_msg) # Put the OCaml exception + line from source into one string. # Replace vanilla newlines with indented newlines. nocompile_msg = ("%s\n%s" % (err_msg, sourceError)).replace("\n", "\n ") self.log.nocompile(nocompile_msg) raise NoCompileException(1) def generate_scripts(self, test_interface): """ 2013-08-23: Given the interface of a test file, generate a toplevel script for each test case. For instance, if the test file had an interface like: val test_one : unit -> unit val helper : int -> string val test_two : unit -> unit val test_three : int -> unit Then this function would generate scripts for `test_one` and `test_two`, because they are `unit -> unit` functions that start with the magic prefix "test_" """ test_cases = [] for defined_name in ( x for x in test_interface if x.startswith("val test_") ): val_name, val_type = defined_name[4:].split(" : ", 1) if val_type != "unit -> unit": self.log.warn("skipping test case %s with type %s" % (val_name, val_type)) else: test_cases.append(val_name) if test_cases == []: return None else: # Change "my_test.ml" to the module "My_test" test_name = self.test_name[:-(len(".ml"))].capitalize() return ( (case, self._toplevel_input(test_name, case)) for case in test_cases ) def run(self): """ 2013-08-23: """ self._check_paths() # Get the directory containing the test file, move to it if "/" in self.test_file: testcase_dir = self.test_file[::-1].split("/", 1)[1][::-1] os.chdir(testcase_dir) # Compile the test + source files self.log.header("Testing %s" % self.src_name) test_interface = self.compile() # Generate the test scripts self.log.info("Compilation succeeded! Generating test scripts...") test_scripts = self.generate_scripts(test_interface) if test_scripts is None: self.log.warn("No test cases in %s" % self.test_name) else: # Execute tests return self.run_tests(test_scripts) def run_test(self, script): """ 2013-08-23: Execute a single test script in a toplevel environment. Start a toplevel with the module and test case object files loaded, pipe in the test script as an argument. I'm not entirely happy with the piping because it means that subprocess fails to throw an error when the test fails. Maybe fix that later. """ run_test = " ".join([ "echo \"%s\" |" % script, "ocaml", ] + self.LIBS + [ "%s.cmo" % self.src_file[:-(len(".ml"))], "%s.cmo" % self.test_file[:-(len(".ml"))] ]) with Timer() as t: try: output, err = TimedProcess(run_test).run(self.timeout) err_msg = self._error_of_output(output) # Maybe None except TimeoutException: err_msg = "TIMEOUT" if not err_msg: self.log.success("PASS in %0.3f seconds" % t.duration) else: self.log.failure("FAIL with '%s' in %0.3f seconds" % (err_msg, t.duration)) return err_msg def run_tests(self, test_scripts): """ 2013-08-23: Given an association list of ("test_case_name", "toplevel script"), execute each test in an ocaml toplevel and record the output. """ errors = [] for (fn_name, script) in test_scripts: self.log.run("Running %s..." % fn_name) err_msg = self.run_test(script) if err_msg: errors.append((fn_name, err_msg)) return errors def _check_paths(self): """ 2013-08-23: Make sure the source and test files (still) exist. """ if not os.path.exists(self.src_file): self.log.warn("Source file '%s' not found. Skipping %s..." % (self.src_name, self.test_name)) raise InvalidTestException(0) if not os.path.exists(self.test_file): self.log.warn("Test file '%s' not found. Exiting..." % self.test_name) raise InvalidTestException(0) def _error_of_output(self, toplevel_output): """ 2013-08-04: Toplevel output is always echoed to subprocess, regardless of whether the tests passed. Manually check if the code raised an assertion error. TODO this is not very rigorous! It assumes there will be an octothorp at the end of the output! This is a reasonable assumption but still it makes me nervous 2013-08-23: Ignores input errors. If the code this file sends to the toplevel has a syntax error or whatever, things will break down seriously. I think its safe to assume that'll never happen in a release. 2013-08-24: Added logic to print the non-exception printouts You know, we could probably just check that the output's "- : unit" """ match = re.search(r"#.*?(Exception:[\s].*)\n#", toplevel_output, re.DOTALL) if match is not None: # Debug output will be octothorp to exception. debug_match = re.search(r"# (.*?)Exception:", toplevel_output, re.DOTALL) message = match.group(1).strip() else: # Debug output will be octothorp to return value debug_match = re.search(r"# (.*?)\n- :", toplevel_output, re.DOTALL) message = None # Print the debug output, if any if debug_match is not None and debug_match.group(1): print(debug_match.group(1).rstrip()) return message def _source_of_exception(self, errorMessage): """ 2013-08-23: Get the line number and source file that spawned `errorMessage`, extract that line of code from that source file. """ match = re.search(r"File \"(.*?)\", line ([0-9]+),", errorMessage) if match is None: return "" else: fname = match.group(1) line_num = int(match.group(2)) with open(fname, "r") as f: currentLine = 1 message = "" while currentLine < line_num: currentLine += 1 message = next(f) try: if message: return(" %s %s---> %s %s" % \ (line_num-1, message, line_num, next(f).rstrip())) else: return("---> %s %s" % (line_num, next(f).rstrip())) except StopIteration: # File ended unexpectedly. Add an empty line and point to it return(" %s %s---> %s <unexpected end of file>" \ % (line_num-1, message, line_num)) def _toplevel_input(self, module_name, test_case): """ 2013-07-28: Write a script for the toplevel. Call the right function from the right module """ return "%s.%s ();;" % (module_name.capitalize(), test_case)
def subscribe_to_stream(self): super(RtspStreamHandler, self).subscribe_to_stream() url = "RtspStreamHandler#subscribe_to_stream" Log.info("Subscribing to RTSP stream") return url
def _stop_streaming(self): super(RtspStreamHandler, self)._stop_streaming() Log.info("Stopping RTSP stream")