def main(): transformations = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) train_dataset = CifarDataset(TRAIN_CSV_PATH, TRAIN_IMG_PATH, transformations) train_loader = CifarDataloader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) test_dataset = CifarDataset(TEST_CSV_PATH, TEST_IMG_PATH, transformations) test_loader = CifarDataloader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) model = resnet50(pretrained=True, num_classes=10) criterion = nn.CrossEntropyLoss() if USE_GPU: model = model.cuda() criterion = criterion.cuda() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) # load_checkpoint(os.path.join('checkpoint', 'last_checkpoint.pth.tar'), model, optimizer) for epoch in range(EPOCHS): train(train_loader, model, criterion, optimizer, epoch+1, USE_GPU) test(test_loader, model, USE_GPU) save_checkpoint({ 'epoch': epoch+1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, os.path.join('checkpoint'))
def new_training_data(): try: training_data = request.get_json() if training_data is None: raise BadRequest(description=NO_JSON) train(training_data) except BadRequest as e: return json_error_message('Failed to parse JSON data', error_data=e.description) return json_success('Training data added successfully')
train_batch_size = 50 test_batch_size = 50 lr = 1e-4 if (__name__ == '__main__'): # use pre-trained embed if avalible word_embed = th.Tensor(vocab_size, embed_dim) label_embed = th.Tensor(label_num, embed_dim) net = HyperIM(word_num, word_embed, label_embed, hidden_size=embed_dim, if_gru=if_gru) net.to(cuda_device) loss = nn.BCEWithLogitsLoss() optim = gt.optim.RiemannianAdam(net.parameters(), lr=lr) train_data_loader, test_data_loader = data.load_data( data_path, train_batch_size, test_batch_size, word_num) train.train(epoch, net, loss, optim, if_neg_samp=False, train_data_loader=train_data_loader) evalu.evaluate(net, if_log=if_log, test_data_loader=test_data_loader)
if __name__ == '__main__': tf.enable_eager_execution() parser = argparse.ArgumentParser( description='Train using a specified config') parser.add_argument('--config', help='config to run') parser.add_argument('--experiment', help='experiment to run') parser.add_argument('--experiment_name', help='name of the experiment for logging') parser.add_argument('--experiment_index', help='index of the experiment to run', type=int) parser.add_argument( '--split', help='cross validation split number to use as validation data', default=None, type=int) args = parser.parse_args() if args.experiment: experiments = generate_configs_from_experiment(args.experiment) conf = copy.deepcopy(experiments[args.experiment_index]) if args.split is not None: conf['dataset']['cross_validation_split'] = args.split train(conf, args.experiment_name) else: conf = copy.deepcopy(configs[args.config]) if args.split is not None: conf['dataset']['cross_validation_split'] = args.split train(conf, args.experiment_name)
import tensorflow as tf import boto3 import json import os import sys import time from util.train import train conn_sqs = boto3.resource('sqs') if __name__ == '__main__': tf.enable_eager_execution() queue = conn_sqs.get_queue_by_name(QueueName='experiment-configs') messages = queue.receive_messages( MaxNumberOfMessages=1, MessageAttributeNames=['experiment_name'], WaitTimeSeconds=0) for message in messages: conf = json.loads(message.body) experiment_name = message.message_attributes.get( 'experiment_name').get('StringValue') message.delete() train(conf, experiment_name)
) scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.1, patience=0) with open(log_file, "a") as f: f.write(f"Run: {run + 1}\n") for epoch in tqdm(range(_epoch_num), desc="[Epoch]"): start_time = time.time() train_loss, train_acc, dev_loss, dev_acc = train( train_data_loader, probe, bert, loss_fct, optimizer, dev_data_loader=dev_data_loader, scheduler=scheduler, ) secs = int(time.time() - start_time) mins = secs / 60 secs = secs % 60 if optimizer.param_groups[0][ "lr"] < _stop_lr or epoch == _epoch_num - 1: test_loss, test_acc = evaluate(test_data_loader, probe, bert, loss_fct) with open(log_file, "a") as f:
def main(): #Data preprocessing for Transfrom , and Normalize the data with the imageNet weight transformations = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) #Get traing data and test data train_dataset = CifarDataset(TRAIN_CSV_PATH, TRAIN_IMG_PATH, transformations) train_loader = CifarDataloader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) test_dataset = CifarDataset(TEST_CSV_PATH, TEST_IMG_PATH, transformations) test_loader = CifarDataloader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) #choose the model with resnet50 model = resnet50(pretrained=True, num_classes=10) criterion = nn.CrossEntropyLoss() if USE_GPU: model = model.cuda() criterion = criterion.cuda() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) # load_checkpoint(os.path.join('checkpoint', 'last_checkpoint.pth.tar'), model, optimizer) # make optimizer into a horovod Version . hvd.broadcast_parameters(model.state_dict(), root_rank=0) optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters()) # def save_checkpoint(epoch): # if hvd.rank() == 0: # filepath = args.checkpoint_format.format(epoch=epoch + 1) # state = { # 'model': model.state_dict(), # 'optimizer': optimizer.state_dict(), # } # torch.save(state, filepath) for epoch in range(EPOCHS): train(train_loader, model, criterion, optimizer, epoch + 1, USE_GPU, writer=writer) test(test_loader, model, USE_GPU) save_checkpoint(epoch) if hvd.rank() == 0: save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, os.path.join('checkpoint'))