def main(args):

    torch.cuda.empty_cache()
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)
    # Lines below are make sure cuda is (almost) deterministic, can slow down training
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

    TEXT = Field(lower=True, tokenize="spacy", tokenizer_language="en", include_lengths=False,
                 batch_first=True, fix_length=args.fix_length, init_token="[cls]")

    output_dimensions = get_num_classes_dataset(args.data_path, args.target_name)

    # Use name of dataset to get the arguments needed
    print("--- Starting with reading in the dataset ---")
    dataset = CSVDataset(text_field=TEXT, path_to_datadir=args.data_path).load(targets=args.target_name)
    print("--- Finished with reading in the dataset ---")

    dloader = CustomDataLoader(dataset, TEXT, args.target_name)
    data_iterators = dloader.construct_iterators(vectors="glove.6B.300d", vector_cache="../.vector_cache",
                                                 batch_size=args.batch_size, device=torch.device("cpu"))

    model = TransformerModel(max_seq_len=args.fix_length,
                             num_outputs=output_dimensions,
                             word_embedding_matrix=TEXT.vocab.vectors,
                             feed_fwd_dim=args.fwd_dim,
                             num_transformer_layers=args.num_transformer_layers,
                             num_transformer_heads=args.num_transformer_heads,
                             pos_encoding_dropout=args.pos_encoding_dropout,
                             classification_dropout=args.fc_layer_dropout,
                             batch_first=True,
                             pad_index=TEXT.vocab.stoi['pad'])

    if args.class_weighting:
        weights = single_task_class_weighting(data_iterators[0])
        criterion = nn.CrossEntropyLoss(weight=weights.to(args.device))
    else:
        criterion = nn.CrossEntropyLoss()

    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.90, 0.98), eps=10e-9)
    scheduler = StepLR(optimizer, step_size=args.scheduler_stepsize, gamma=args.scheduler_gamma)

    train(model, criterion, optimizer, scheduler, data_iterators[0], device=args.device,
          include_lengths=False, save_path=args.logdir, save_name="csv_dataset",
          tensorboard_dir=args.logdir+"/runs", n_epochs=args.n_epochs, checkpoint_interval=args.save_interval,
          clip_val=args.gradient_clip)

    print("Evaluating model")
    model.load_state_dict(torch.load(args.logdir+"/csv_dataset_epoch_%d.pt" % (args.n_epochs-1)))
    evaluation(model, data_iterators[-1], criterion, device=args.device, include_lengths=False)
示例#2
0
def main(args):

    torch.cuda.empty_cache()
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)

    TEXT = Field(lower=True, tokenize="spacy", tokenizer_language="en", include_lengths=args.use_lengths, batch_first=True,
                 fix_length=args.fix_length)

    output_dimensions = get_num_classes_dataset(args.data_path, args.target_name)

    # Use name of dataset to get the arguments needed
    print("--- Starting with reading in the dataset ---")
    dataset = CSVDataset(text_field=TEXT, path_to_datadir=args.data_path).load(targets=args.target_name)
    print("--- Finished with reading in the dataset ---")

    dloader = CustomDataLoader(dataset, TEXT, args.target_name)
    data_iterators = dloader.construct_iterators(vectors="glove.6B.300d", vector_cache="../.vector_cache",
                                                 batch_size=args.batch_size, device=torch.device("cpu"))

    model = SimpleLSTM(vocab=TEXT.vocab.vectors, hidden_dim=args.hidden_dim, output_dim=output_dimensions,
                  device=args.device, use_lengths=args.use_lengths, dropout=args.dropout)

    if args.class_weighting:
        weights = single_task_class_weighting(data_iterators[0])
        criterion = nn.CrossEntropyLoss(weight=weights.to(args.device))
    else:
        criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(model.parameters(), lr=args.learning_rate)
    scheduler = StepLR(optimizer, step_size=args.scheduler_stepsize, gamma=args.scheduler_gamma)

    train(model, criterion, optimizer, scheduler, data_iterators[0], device=args.device,
          include_lengths=args.use_lengths, save_path=args.logdir, save_name="csv_dataset",
          tensorboard_dir=args.logdir+"/runs", n_epochs=args.n_epochs, checkpoint_interval=args.save_interval,
          clip_val=args.gradient_clip)

    print("Evaluating model")
    model.load_state_dict(torch.load(args.logdir+"/csv_dataset_epoch_%d.pt" % (args.n_epochs-1)))
    evaluation(model, data_iterators[-1], criterion, device=args.device, include_lengths=args.use_lengths)
示例#3
0
def main(args):

    # TODO: clip gradients
    # for the multitask learning, make a dictionary containing "task": data
    # Set the random seed for experiments (check if I need to do this for all the other files as well)
    torch.cuda.empty_cache()
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

    TEXT = Field(lower=True,
                 tokenize="spacy",
                 tokenizer_language="en",
                 include_lengths=args.use_lengths,
                 batch_first=True,
                 fix_length=args.fix_length)
    # Load datasets

    output_dimensions = get_num_classes_dataset(args.data_path,
                                                args.target_names)

    # Use name of dataset to get the arguments needed
    print("--- Starting with reading in the dataset ---")
    dataset = CSVDataset(
        text_field=TEXT,
        path_to_datadir=args.data_path).load(targets=args.target_names)
    print("--- Finished with reading in the dataset ---")
    # Load the dataset and split it into train and test portions

    dloader = CustomDataLoader(dataset, TEXT, args.target_names)
    data_iterators = dloader.construct_iterators(
        vectors="glove.6B.300d",
        vector_cache="../.vector_cache",
        batch_size=args.batch_size,
        device=torch.device("cpu"))

    towers = {
        MLP(
            len(args.filter_list) * args.num_filters, args.linear_layers,
            output_dim): name
        for output_dim, name in zip(output_dimensions, args.target_names)
    }

    model = MultitaskConvNet(1,
                             args.filter_list,
                             TEXT.vocab.vectors,
                             args.num_filters,
                             dropbout_probs=args.dropout)

    multitask_model = MultiTaskModel(
        shared_layer=model,
        towers=towers,
        batch_size=args.batch_size,
        input_dimension=TEXT.vocab.vectors.shape[1],
        device=args.device,
        include_lens=args.use_lengths)

    if args.class_weighting:
        task_weights = multitask_class_weighting(data_iterators[0],
                                                 args.target_names)
        losses = {
            name:
            nn.CrossEntropyLoss(weight=task_weights[name].to(args.device))
            for name in args.target_names
        }
    else:
        losses = {name: nn.CrossEntropyLoss() for name in args.target_names}

    optimizer = optim.SGD(multitask_model.parameters(), lr=args.learning_rate)
    scheduler = StepLR(optimizer,
                       step_size=args.scheduler_stepsize,
                       gamma=args.scheduler_gamma)

    train(multitask_model,
          losses,
          optimizer,
          scheduler,
          data_iterators[0],
          device=args.device,
          include_lengths=args.use_lengths,
          save_path=args.logdir,
          save_name="%s_datasets" % "_".join(args.target_names),
          tensorboard_dir=args.logdir + "/runs",
          n_epochs=args.n_epochs,
          checkpoint_interval=args.save_interval,
          clip_val=args.gradient_clip)

    print("Evaluating model")
    multitask_model.load_state_dict(
        torch.load(
            "%s/%s_datasets_epoch_%d.pt" %
            (args.logdir, "_".join(args.target_names), args.n_epochs - 1)))
    evaluation(multitask_model, data_iterators[-1], losses, device=args.device)
def main(args):

    torch.cuda.empty_cache()
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)

    TEXT = Field(lower=True,
                 tokenize="spacy",
                 tokenizer_language="en",
                 include_lengths=args.use_lengths,
                 batch_first=True,
                 fix_length=args.fix_length)

    output_dimensions = get_num_classes_dataset(args.data_path,
                                                args.target_names)

    # Use name of dataset to get the arguments needed
    print("--- Starting with reading in the dataset ---")
    dataset = CSVDataset(
        text_field=TEXT,
        path_to_datadir=args.data_path).load(targets=args.target_names)
    print("--- Finished with reading in the dataset ---")

    towers = {
        MLP(args.hidden_dim, args.linear_layers, output_dim): name
        for output_dim, name in zip(output_dimensions, args.target_names)
    }
    # Load the dataset and split it into train and test portions

    dloader = CustomDataLoader(dataset, TEXT, args.target_names)
    data_iterators = dloader.construct_iterators(
        vectors="glove.6B.300d",
        vector_cache="../.vector_cache",
        batch_size=args.batch_size,
        device=torch.device("cpu"))

    model = MultiTaskLSTM(vocab=TEXT.vocab.vectors,
                          hidden_dim=args.hidden_dim,
                          device=args.device,
                          use_lengths=args.use_lengths)

    multitask_model = MultiTaskModel(shared_layer=model,
                                     towers=towers,
                                     batch_size=args.batch_size,
                                     input_dimension=args.embedding_dim,
                                     device=args.device,
                                     include_lens=args.use_lengths)

    if args.class_weighting:
        task_weights = multitask_class_weighting(data_iterators[0],
                                                 args.target_names)
        losses = {
            name:
            nn.CrossEntropyLoss(weight=task_weights[name].to(args.device))
            for name in args.target_names
        }
    else:
        losses = {name: nn.CrossEntropyLoss() for name in args.target_names}

    optimizer = optim.SGD(multitask_model.parameters(), lr=args.learning_rate)
    scheduler = StepLR(optimizer,
                       step_size=args.scheduler_stepsize,
                       gamma=args.scheduler_gamma)

    train(multitask_model,
          losses,
          optimizer,
          scheduler,
          data_iterators[0],
          device=args.device,
          include_lengths=args.use_lengths,
          save_path=args.logdir,
          save_name="%s_datasets" % "_".join(args.target_names),
          tensorboard_dir=args.logdir + "/runs",
          n_epochs=args.n_epochs,
          checkpoint_interval=args.save_interval,
          clip_val=args.gradient_clip)

    print("Evaluating model")
    multitask_model.load_state_dict(
        torch.load(
            "%s/%s_datasets_epoch_%d.pt" %
            (args.logdir, "_".join(args.target_names), args.n_epochs - 1)))
    evaluation(multitask_model, data_iterators[-1], losses, device=args.device)
def main(args):

    torch.cuda.empty_cache()
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)

    # Lines below are make sure cuda is (almost) deterministic, can slow down training
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

    TEXT = Field(lower=True,
                 tokenize="spacy",
                 tokenizer_language="en",
                 include_lengths=args.use_lengths,
                 batch_first=True,
                 fix_length=args.fix_length)

    output_dimensions = get_num_classes_dataset(args.data_path,
                                                args.target_name)

    # Use name of dataset to get the arguments needed
    print("--- Starting with reading in the dataset ---")
    dataset = CSVDataset(
        text_field=TEXT,
        path_to_datadir=args.data_path).load(targets=args.target_name)
    print("--- Finished with reading in the dataset ---")

    # Load the dataset and split it into train and test portions
    dloader = CustomDataLoader(dataset, TEXT, args.target_name)
    data_iterators = dloader.construct_iterators(
        vectors="glove.6B.300d",
        vector_cache="../.vector_cache",
        batch_size=args.batch_size,
        device=torch.device("cpu"))

    model = ConvNet(input_channels=1,
                    output_dim=output_dimensions,
                    filter_list=args.kernel_sizes,
                    embed_matrix=TEXT.vocab.vectors,
                    num_filters=args.num_filters,
                    dropbout_probs=args.dropout)

    if args.class_weighting:
        weights = single_task_class_weighting(data_iterators[0])
        criterion = nn.CrossEntropyLoss(weight=weights.to(args.device))
    else:
        criterion = nn.CrossEntropyLoss()

    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=1e-5)
    scheduler = StepLR(optimizer,
                       step_size=args.scheduler_stepsize,
                       gamma=args.scheduler_gamma)

    train(model,
          criterion,
          optimizer,
          scheduler,
          data_iterators[0],
          device=args.device,
          include_lengths=args.use_lengths,
          save_path=args.logdir,
          save_name="csv_dataset",
          tensorboard_dir=args.logdir + "/runs",
          n_epochs=args.n_epochs,
          checkpoint_interval=args.save_interval,
          clip_val=args.gradient_clip)

    print("Evaluating model")

    model.load_state_dict(
        torch.load(args.logdir + "/csv_dataset_epoch_%d.pt" %
                   (args.n_epochs - 1)))
    evaluation(model,
               data_iterators[-1],
               criterion,
               device=args.device,
               include_lengths=args.use_lengths)
示例#6
0
def main(args):
    torch.cuda.empty_cache()
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)

    TEXT = Field(lower=True,
                 tokenize="spacy",
                 tokenizer_language="en",
                 include_lengths=args.use_lengths,
                 batch_first=True,
                 fix_length=args.fix_length)

    output_dimensions = get_num_classes_dataset(args.data_path,
                                                args.target_names)

    towers = {
        MLP(args.num_filters_experts * len(args.filter_list_experts),
            args.linear_layers, output_dim): name
        for output_dim, name in zip(output_dimensions, args.target_names)
    }

    # Use name of dataset to get the arguments needed
    dataset = CSVDataset(
        text_field=TEXT,
        path_to_datadir=args.data_path).load(targets=args.target_names)
    # Load the dataset and split it into train and test portions

    dloader = CustomDataLoader(dataset, TEXT, args.target_names)
    data_iterators = dloader.construct_iterators(
        vectors="glove.6B.300d",
        vector_cache="../.vector_cache",
        batch_size=args.batch_size,
        device=torch.device("cpu"))

    # initialize the multiple CNNs and gating functions
    if args.gating_nets_type == "CNN":
        gating_networks = [
            ConvNet(input_channels=1,
                    filter_list=args.filter_list_g,
                    embed_matrix=TEXT.vocab.vectors,
                    num_filters=args.num_filters_g,
                    output_dim=args.n_experts)
            for _ in range(len(args.target_names))
        ]
    elif args.gating_nets_type == "LSTM":
        gating_networks = [
            SimpleLSTM(TEXT.vocab.vectors,
                       args.hidden_dim_g,
                       args.n_experts,
                       device=args.device,
                       use_lengths=args.use_lengths)
            for _ in range(len(args.target_names))
        ]

    elif args.gating_nets_type == "MLP":
        gating_networks = [
            MLPGate(args.fix_length, args.n_experts, TEXT.vocab.vectors)
            for _ in range(len(args.target_names))
        ]

    elif args.gating_nets_type == "TRANSFORMER":
        gating_networks = [
            TransformerModel(
                max_seq_len=args.fix_length,
                num_outputs=args.n_experts,
                word_embedding_matrix=TEXT.vocab.vectors,
                feed_fwd_dim=args.transformer_fwd_dim,
                num_transformer_layers=args.num_transformer_layers,
                num_transformer_heads=args.num_transformer_heads,
                pos_encoding_dropout=0.2,
                classification_dropout=0.3,
                batch_first=True,
                pad_index=TEXT.vocab.stoi['pad'])
            for _ in range(len(args.target_names))
        ]

    shared_layers = [
        MultitaskConvNet(input_channels=1,
                         filter_list=args.filter_list_experts,
                         embed_matrix=TEXT.vocab.vectors,
                         num_filters=args.num_filters_experts)
        for _ in range(args.n_experts)
    ]

    model = MultiGateMixtureofExperts(
        shared_layers=shared_layers,
        gating_networks=gating_networks,
        towers=towers,
        device=args.device,
        include_lens=args.use_lengths,
        batch_size=args.batch_size,
        gating_drop=args.gate_dropout,
        mean_diff=args.mean_diff,
        weight_adjust_mode=args.balancing_strategy)

    if args.class_weighting:
        task_weights = multitask_class_weighting(data_iterators[0],
                                                 args.target_names)
        losses = {
            name:
            nn.CrossEntropyLoss(weight=task_weights[name].to(args.device))
            for name in args.target_names
        }
    else:
        losses = {name: nn.CrossEntropyLoss() for name in args.target_names}

    optimizer = optim.SGD(model.parameters(), lr=args.learning_rate)
    scheduler = StepLR(optimizer,
                       step_size=args.scheduler_stepsize,
                       gamma=args.scheduler_gamma)

    train(model,
          losses,
          optimizer,
          scheduler,
          data_iterators[0],
          device=args.device,
          include_lengths=args.use_lengths,
          save_path=args.logdir,
          save_name="%s_datasets" % "_".join(args.target_names),
          tensorboard_dir=args.logdir + "/runs",
          n_epochs=args.n_epochs,
          checkpoint_interval=args.save_interval,
          clip_val=args.gradient_clip,
          balancing_epoch_num=args.balance_epoch_cnt,
          balancing_mode=args.balancing_strategy)

    print("Evaluating model")
    model.load_state_dict(
        torch.load(
            "%s/%s_datasets_epoch_%d.pt" %
            (args.logdir, "_".join(args.target_names), args.n_epochs - 1)))
    evaluation(model, data_iterators[-1], losses, device=args.device)