def _create_csvs(): print('creating CSV...') # create no_cluster/full path = 'dataset/preprocessed/no_cluster' full = data.full_df() train_len = data.read_config()[data.TRAIN_LEN_KEY] train = full.iloc[0:train_len] test = full.iloc[train_len:len(full)] target_indices = get_target_indices(test) check_folder('dataset/preprocessed/no_cluster/full') train.to_csv(os.path.join(path, 'full/train.csv')) test.to_csv(os.path.join(path, 'full/test.csv')) np.save(os.path.join(path, 'full/train_indices'), train.index) np.save(os.path.join(path, 'full/test_indices'), test.index) np.save(os.path.join(path, 'full/target_indices'), target_indices) no_of_rows_in_small = int( input('How many rows do you want in small.csv? ')) train_small = get_small_dataset(train, maximum_rows=no_of_rows_in_small) check_folder('dataset/preprocessed/no_cluster/small') split(train_small, os.path.join(path, 'small')) check_folder('dataset/preprocessed/no_cluster/local') split(train, os.path.join(path, 'local')) # create item_metadata in preprocess folder original_item_metadata = data.accomodations_original_df() original_item_metadata.to_csv(data.ITEMS_PATH) # append missing accomodations to item metadata append_missing_accomodations('full')
def build(self, configuration_file): self.config = read_config(configuration_file) self.simulation_time = float(self.config['T']['TTS'][0]) self.create_employees() self.initialize_event_queue() self.log.header(self)
help='load model state dict') args = parser.parse_args() pretrain = args.pretrain train = args.train restart = args.restart config_path = args.config_path # 10/29/2020 model_path = args.model_path max_patience = args.max_patience training_lr = args.training_lr # 11/22/2020 model_state_num = args.model_state_num # Read config file config = read_config(config_path) torch.manual_seed(config.seed) np.random.seed(config.seed) if pretrain: # Generate datasets train_dataset, valid_dataset, test_dataset = get_ASR_datasets(config) # Initialize base model pretrained_model = PretrainedModel(config=config) # Train the base model trainer = Trainer(model=pretrained_model, config=config) if restart: trainer.load_checkpoint() for epoch in range(config.pretraining_num_epochs):
''' Script to generate the data (train/dev/test splits) ''' import sys import os import argparse from data import read_config parser = argparse.ArgumentParser() parser.add_argument("--config", help="path to json config", required=True) args = parser.parse_args() config_file_path = args.config config = read_config(config_file_path) import random random.seed(config['data']['seed']) def check_context_size(slideshow, cur_idx, title_checker, content_checker, config): ''' checks if the current slide has expected number of context slides ''' if config['data']['strict_context_size'] == False: return True for i in xrange(config['data']['context_size']): if config['data']['use_left_context'] == True: left_idx = cur_idx - i - 1 if left_idx < 0:
if opt.verbose: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) logging.info(opt) if opt.random_seed != 0: random.seed(opt.random_seed) np.random.seed(opt.random_seed) torch.manual_seed(opt.random_seed) torch.cuda.manual_seed_all(opt.random_seed) if opt.whattodo == 'train': config = data.read_config(opt.config) logging.info(config) makedir_and_clear(opt.save) logging.info("load data ...") train_data = data.loadData(opt.train_file, True, opt.types, opt.type_filter) dev_data = data.loadData(opt.dev_file, True, opt.types, opt.type_filter) if opt.test_file: test_data = data.loadData(opt.test_file, False, opt.types, opt.type_filter) else: test_data = None
UNSURE = 31 def predict(wav): signal, _ = sf.read(wav) signal = torch.tensor(signal, device=device).float().unsqueeze(0) label = model.decode_intents(signal) return label def set_label(category, intents): category = intents.loc[intents.intent == category] return UNSURE if category.empty else category.category.item() # make output directory if not os.path.isfile(OUTPUT): os.makedirs(OUTPUT) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') config = data.read_config('../input/myinput/no_unfreezing/no_unfreezing.cfg'); _,_,_=data.get_SLU_datasets(config) model = models.Model(config).eval() model.load_state_dict(torch.load('../input/myinput/no_unfreezing/model_state.pth', map_location=device)) # load trained model # predict label of each .wav file and store it as a pickle test = pd.read_csv(TEST) if not os.path.isfile(PRED): df, paths = list(), list() files = set(test['file'].apply(lambda f: f.replace('.png', '.wav'))) for i, speaker in enumerate(os.listdir(SPEAKERS)): speaker = os.path.join(SPEAKERS, speaker) for wav in os.listdir(speaker): if wav not in files: continue wav = os.path.join(speaker, wav) paths.append(wav)
import torch from models import HMM from data import get_datasets, read_config from training import Trainer # Generate datasets from text file path = "data" N = 128 config = read_config(N,path) train_dataset, valid_dataset = get_datasets(config) checkpoint_path = "." # Initialize model model = HMM(config=config) # Train the model num_epochs = 10 trainer = Trainer(model, config, lr=0.003) trainer.load_checkpoint(checkpoint_path) for epoch in range(num_epochs): print("========= Epoch %d of %d =========" % (epoch+1, num_epochs)) train_loss = trainer.train(train_dataset) valid_loss = trainer.test(valid_dataset) trainer.save_checkpoint(epoch, checkpoint_path) print("========= Results: epoch %d of %d =========" % (epoch+1, num_epochs)) print("train loss: %.2f| valid loss: %.2f\n" % (train_loss, valid_loss) )
import time from model import MainTextIdea from data import read_config from utils import get_score if __name__ == '__main__': DATA_PATH = './data' data = read_config(f'{DATA_PATH}/test.json') solver = MainTextIdea() scores = 0 max_scores = len(data) for i, task in enumerate(data): start = time.time() task_index, task_type = i + 1, 'multiple_choice' print("Predicting task {}...".format(task_index)) y_true = task["solution"] try: prediction = solver.predict_from_model(task) except BaseException as e: print(e) print("Solver {} failed to solve task №{}".format('1', task_index)) prediction = "" score = get_score(y_true, prediction) scores += score print("Score: {}\nCorrect: {}\nPrediction: {}\n".format( score, y_true, prediction)) print(f'max_scores={max_scores}, scores={scores}')