def autotune(input_train, input_validation, output_model, output_parameters, metric, k, duration, model_size): input_train_path = get_input_path(input_train) input_validation_path = get_input_path(input_validation) output_model_path = get_output_path(output_model) output_parameters_path = get_output_path(output_parameters) # Autotune model model = fasttext.train_supervised( input=input_train_path, autotuneValidationFile=input_validation_path, autotuneMetric=metric, autotuneDuration=duration, autotuneModelSize=model_size, verbose=VERBOSE) # Log best model metrics n, p, r = model.test(input_validation_path, k=k) print(json.dumps({'n': n, 'precision': p, 'recall': r, 'k': k})) # Save best parameters with open(output_parameters_path, 'w') as f: json.dump(get_model_parameters(model), f) # Save best model model.save_model(output_model_path)
def split(input_data, output_train, output_validation, output_test, train_ratio, validation_ratio, test_ratio, shuffle): input_data_path = get_input_path(input_data) output_train_path = get_output_path(output_train) output_validation_path = get_output_path(output_validation) output_test_path = get_output_path(output_test) with open(input_data_path, 'r') as f: data = f.read().strip().split('\n') # Shuffle data if shuffle: print('Shuffling data') random.seed(RANDOM_SEED) random.shuffle(data) # Split train, validation and test data validation_index = round(len(data) * train_ratio) test_index = round(len(data) * (train_ratio + validation_ratio)) end_index = round( len(data) * (train_ratio + validation_ratio + test_ratio)) with open(output_train_path, 'w') as f: f.write('\n'.join(data[:validation_index])) with open(output_validation_path, 'w') as f: f.write('\n'.join(data[validation_index:test_index])) with open(output_test_path, 'w') as f: f.write('\n'.join(data[test_index:end_index]))
def test(input_test, input_model, output_predictions, k): input_test_path = get_input_path(input_test) input_model_path = get_input_path(input_model) output_predictions_path = get_output_path(output_predictions) model = fasttext.load_model(input_model_path) # Log model metrics n, p, r = model.test(input_test_path, k=k) print(json.dumps({'n': n, 'precision': p, 'recall': r, 'k': k})) # Split feature and category in a DataFrame with open(input_test_path) as f: df = pd.DataFrame((split_text(line) for line in f), columns=[TEXT_COLUMN, LABEL_COLUMN]) # Get predictions all_labels, all_probs = model.predict(list(df[TEXT_COLUMN]), k=k) # Add formatted predictions predictions_df = get_predictions_df(all_labels, all_probs, k) df = df.join(predictions_df) # Add error column df['error'] = (df[f'{LABEL_COLUMN}'] != df[f'{LABEL_COLUMN}@1']) # Save predictions df.to_csv(output_predictions_path, index=False)
def bag_to_json(options): stream_array = StreamArray(json_generator(options)) path = utils.get_output_path(options) path += ".json" with open(path, 'w') as outfile: for chunk in json.JSONEncoder().iterencode(stream_array): outfile.write(chunk)
def save_gallery(title, data): log("Saving gallery {0}...".format(title)) output_path = get_output_path(title) with open(output_path, "w") as jin: jin.write(data) log_replace_line("Saving gallery {0}...Done".format(title))
def preprocess(input_data, output_data): # TODO: make it work also with prediction data without label input_data_path = get_input_path(input_data) output_data_path = get_output_path(output_data) df = pd.read_csv( input_data_path, engine='python') with open(output_data_path, 'w') as output: for text, label in zip(df[TEXT_COLUMN], df[LABEL_COLUMN]): output.write(f'{process_text(text)} {LABEL_SEPARATOR}{label}\n')
def main(): url = utils.get_url() output_path = utils.get_output_path() audio_only = utils.get_audio_only() if not audio_only: video_quality = utils.get_stream_quality() if audio_only: stream_list = utils.list_streams(url, audio_only) if not stream_list: print( 'There are no streams available for the options that you specified.' ) return itag = utils.get_itag() utils.download_stream(url, itag, audio_only, output_path) utils.print_video_statistics(url) elif video_quality == 1: progressive = True stream_list = utils.list_streams(url, audio_only, progressive) if not stream_list: print( 'There are no streams available for the options that you specified.' ) return itag = utils.get_itag() utils.download_stream(url, itag, audio_only, output_path) utils.print_video_statistics(url) else: progressive = False stream_lists = utils.list_streams(url, audio_only, progressive) if not stream_lists[0]: print( 'There are no video streams available for the options that you specified.' ) return elif not stream_lists[1]: print( 'There are no audio streams available for the options that you specified.' ) return elif not stream_lists: print( 'There are no video or audio streams available for the options that you specified.' ) return itags = utils.get_adaptive_itags() utils.download_and_mux(url, itags[0], itags[1], output_path)
def train(input_data, input_parameters, output_model): input_data_path = get_input_path(input_data) input_parameters_path = get_input_path(input_parameters) output_model_path = get_output_path(output_model) # Parse parameters with open(input_parameters_path) as f: parameters = json.load(f) # Train model model = fasttext.train_supervised(input=input_data_path, **parameters) # Save model model.save_model(output_model_path)
def preprocess(input_data, output_data, text_column, label_column, engine): # TODO: make it work also with prediction data without label input_data_path = get_input_path(input_data) output_data_path = get_output_path(output_data) df = pd.read_csv(input_data_path, engine=engine).fillna('') # Concatenate strings if multiple text columns if ',' in text_column: df[text_column] = df[text_column.split(',')].agg(' '.join, axis=1) with open(output_data_path, 'w') as output: for text, label in zip(df[text_column], df[label_column]): if not_empty_str(text) and not_empty_str(label): output.write( f'{process_text(text)} {LABEL_SEPARATOR}{label}\n')
def predict(input_data, input_model, output_predictions, k): input_data_path = get_input_path(input_data) input_model_path = get_input_path(input_model) output_predictions_path = get_output_path(output_predictions) model = fasttext.load_model(input_model_path) # Create text DataFrame with open(input_data_path) as f: df = pd.DataFrame((line for line in f), columns=[TEXT_COLUMN]) # Get predictions all_labels, all_probs = model.predict(list(df[TEXT_COLUMN]), k=k) # Add formatted predictions predictions_df = get_predictions_df(all_labels, all_probs, k) df = df.join(predictions_df) # Save predictions df.to_csv(output_predictions_path, index=False)
def open_csv(options, topic_name): path = utils.get_output_path(options, topic_name) path += ".csv" return open(path, 'w')
RHO = 0.2 # mode parameters ACO_MODE = 'elite_system' RANK_FRAC = 0.1 # plotting parameters SAVE_OUTPUT = True N_PLOTS = 10 ##### -------------------------------------------- ##### indx_list = [] dist_list = [] plot_steps = np.linspace(1, N_ITERATIONS - 1, N_PLOTS, dtype=int) plot_path = get_output_path() if SAVE_OUTPUT else None # initializing world world = World(N_NODES, N_ANTS, 20, alpha=ALPHA, beta=BETA, gamma=GAMMA, rho=RHO, aco_mode=ACO_MODE, rank_frac=RANK_FRAC) # Step 0: Populating world with nodes and ants world.populate_world()
jin.write(data) log_replace_line("Saving gallery {0}...Done".format(title)) if __name__ == "__main__": start_time = timeit.default_timer() cfg = get_gallery_config() gallery_path = cfg.get("setup", "location") title = cfg.get("setup", "title") row_height = cfg.get("setup", "row_height", fallback=150) recursive = cfg.getboolean("setup", "recursive", fallback=False) colours = get_section(cfg, 'colours', DEFAULT_COLOURS) output_path = get_output_path(title) if os.path.exists(output_path): has_confirmed = ask_user_to_confirm(title) if not has_confirmed: log("Exiting...") os._exit(0) log_dict("Generating gallery with config:", cfg._sections) groups = read_files(gallery_path, recursive) template = get_gallery_template() output = template.render(title=title, groups=groups,
model_name = opt.model_type model = get_resnet_model(resnet_type=opt.model_type, n_classes=opt.n_classes) else: model_name = get_ViT_name(model_type=opt.model_type, patch_size=opt.patch_size, hybrid=opt.hybrid) model = get_ViT_model(type=opt.model_type, image_size=opt.image_size, patch_size=opt.patch_size, n_classes=opt.n_classes, n_channels=opt.n_channels, dropout=opt.dropout, hybrid=opt.hybrid) output_graph_path, dump_file = get_output_path( model_name=model_name, root_path=opt.output_root_path, dataset_name=opt.dataset_name) csv_result_path = os.path.join(opt.output_root_path, "models_results.csv") val_ratio = opt.val_ratio if opt.eval_type == "test": val_ratio = 0 train_loader, validation_loader, test_loader = get_loader_from_dataset( dataset_name=opt.dataset_name, root_path=opt.dataset_path, batch_size_train=opt.batch_size_train, batch_size_test=opt.batch_size_test, image_size=opt.image_size, augmentation=opt.data_augmentation, val_ratio=val_ratio, n_cpu=opt.n_cpu) device = device("cuda:0" if opt.cuda else "cpu")