Exemplo n.º 1
0
    except FileExistsError:
        pass
    create_log(os.path.join(output_path, 'train.log'), params_log + '\n')
    # use GPU if available
    isCuda = torch.cuda.is_available()
    if isCuda:
        device = 'cuda'
    else:
        device = 'cpu'

    # Set the random seed for reproducible experiments
    torch.manual_seed(42)
    if isCuda:
        torch.cuda.manual_seed(42)

    text_field, label_field, train_dataset, valid_dataset, train_iterator, valid_iterator = loadDataset(
        dataset, batch_size=batch_size, device=device)

    # Build Vocabulary
    # vec = vocab.Vectors(embedding_path)
    text_field.build_vocab(train_dataset,
                           valid_dataset,
                           max_size=35000,
                           min_freq=2)
    label_field.build_vocab(train_dataset, valid_dataset)
    vocab_size = len(text_field.vocab)
    label_size = len(label_field.vocab) - 1

    with open(os.path.join(output_path, "text_field.field"), "wb") as f:
        dill.dump(text_field, f)

    with open(os.path.join(output_path, "label_field.field"), "wb") as f:
Exemplo n.º 2
0
    isCuda = torch.cuda.is_available()
    if isCuda:
        device = 'cuda'
    else:
        device = 'cpu'

    # Set the random seed for reproducible experiments
    random.seed(random_seed)
    # Set the random seed for reproducible experiments
    torch.manual_seed(random_seed)
    if isCuda:
        torch.cuda.manual_seed(random_seed)

    else:
        raise Exception
    text_field1, label_field1, train_dataset1, valid_dataset1, train_iterator1, valid_iterator1 = loadDataset(
        dataset1, batch_size, device)
    text_field2, label_field2, train_dataset2, valid_dataset2, train_iterator2, valid_iterator2 = loadDataset(
        dataset2, batch_size, device)

    # Build Vocabulary
    vec = vocab.Vectors(embedding_path)

    text_field1.build_vocab(train_dataset1, valid_dataset1, train_dataset2, valid_dataset2,
                            max_size=35000, min_freq=2, vectors=vec)
    text_field2.build_vocab(train_dataset1, valid_dataset1, train_dataset2, valid_dataset2,
                            max_size=35000, min_freq=2, vectors=vec)
    label_field1.build_vocab(train_dataset1, valid_dataset1)
    label_field2.build_vocab(train_dataset2, valid_dataset2)

    vocab_size1 = len(text_field1.vocab)
    label_size1 = len(label_field1.vocab) - 1