Пример #1
0
    parser.add_argument('--dim_hidden', type=int, default=300)
    parser.add_argument('--dim_gram', type=int, default=1)
    parser.add_argument('--dataset', type=str, default='data')
    parser.add_argument('--fast', type=int, choices=[0, 1], default=0)
    parser.add_argument('--screen', type=int, choices=[0, 1], default=0)
    parser.add_argument('--optimizer', type=str, default='ADAGRAD')
    parser.add_argument('--grained', type=int, default=3)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--lr_word_vector', type=float, default=0.1)
    parser.add_argument('--epoch', type=int, default=25)
    parser.add_argument('--batch', type=int, default=25)
    args, _ = parser.parse_known_args(argv)

    torch.random.manual_seed(args.seed)
    datamanager = DataManager(args.dataset, train=True)
    wordlist = datamanager.gen_word()
    train_data, val_data, test_data = datamanager.gen_data()
    model = Model(wordlist, argv, len(datamanager.dict_target))
    batch_n = (len(train_data) - 1) // args.batch + 1

    details = {'acc_train': [], 'acc_dev': [], 'acc_test': []}

    for epoch in range(args.epoch):
        np.random.shuffle(train_data)
        now = {}
        now['loss'], now['acc_train'] = train(model,
                                              train_data,
                                              batch_size=args.batch,
                                              batch_n=batch_n)
        now['sum_loss'] = torch.sum(now['loss'])
        _, now['acc_dev'] = test(model, val_data)
    parser.add_argument('--dataset', type=str, default='data')
    parser.add_argument('--fast', type=int, choices=[0, 1], default=0)
    parser.add_argument('--screen', type=int, choices=[0, 1], default=0)
    parser.add_argument('--optimizer', type=str, default='ADAGRAD')
    parser.add_argument('--grained', type=int, default=3)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--lr_word_vector', type=float, default=0.1)
    parser.add_argument('--epoch', type=int, default=15)
    parser.add_argument('--batch', type=int, default=25)
    parser.add_argument('--is_train', type=bool, default=False)
    args, _ = parser.parse_known_args(argv)
    is_train = args.is_train
    random.seed(args.seed)
    if is_train:   
		data = DataManager(args.dataset)
		wordlist = data.gen_word()
		train_data, test_data, classify_data  = data.gen_data(args.grained)
		model = Model(wordlist, argv, len(data.dict_target))
		batch_n = (len(train_data)-1) / args.batch + 1
		optimizer = OptimizerList[args.optimizer](model.params, args.lr, args.lr_word_vector)
		details = {'loss': [], 'loss_train':[], 'loss_dev':[], 'loss_test':[], \
            'acc_train':[], 'acc_dev':[], 'acc_test':[], 'loss_l2':[]}
		index_to_word=[]
		index_to_word=data.index_to_word
		f=open('word.txt','wb')
		for word in index_to_word:
			f.write(word)
			f.write('\n')
		for e in range(args.epoch):
   
			print('train %d epoch:' %e)
Пример #3
0
                        default=25)  # Size of data batches when doing training
    #############################################
    ## END SETTING UP DEFAULT HYPERPARAMETERS  ##
    #############################################

    args, _ = parser.parse_known_args(
        argv
    )  # Overwrite default hyperparameters if specified in command-line call

    random.seed(args.seed)  # Will be used to shuffle training data
    print("### MAIN.PY: Initializing data set ...")
    data = DataManager(
        args.dataset
    )  # New instance of DataManage obect, defined in DataManager.py
    print("### MAIN.PY: Preparing list of words from dictionary...")
    wordlist = data.gen_word()  # Get comprehensive list of words from data
    print("### MAIN.PY: Formatting data...")
    train_data, dev_data, test_data = data.gen_data(
        args.grained)  # Store formatted data retrieved from *.cor files
    print("### MAIN.PY: Initializing model...")
    model = Model(
        wordlist, argv, len(data.dict_target)
    )  # Initialize new model (specified in lstm_att_con.py in this case)
    batch_n = int(
        (len(train_data) - 1) / args.batch +
        1)  # Determine the number of batches to split training data into
    print("### MAIN.PY: Initializing optimizer...")
    optimizer = OptimizerList[args.optimizer](
        model.params, args.lr, args.lr_word_vector
    )  # Intialize instance of OptimizerList (defined as ADAGRAD object in Optimizer.py)
    details = {'loss': [], 'loss_train':[], 'loss_dev':[], 'loss_test':[], \
Пример #4
0
    parser.add_argument('--dim_hidden', type=int, default=300)
    parser.add_argument('--dim_gram', type=int, default=1)
    parser.add_argument('--dataset', type=str, default='data')
    parser.add_argument('--fast', type=int, choices=[0, 1], default=0)
    parser.add_argument('--screen', type=int, choices=[0, 1], default=0)
    parser.add_argument('--optimizer', type=str, default='ADAGRAD')
    parser.add_argument('--grained', type=int, default=3)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--lr_word_vector', type=float, default=0.1)
    parser.add_argument('--epoch', type=int, default=25)
    parser.add_argument('--batch', type=int, default=25)
    args, _ = parser.parse_known_args(argv)

    random.seed(args.seed)
    data = DataManager(args.dataset)
    wordlist = data.gen_word()
    train_data, dev_data, test_data = data.gen_data(args.grained)
    model = Model(wordlist, argv, len(data.dict_target))
    #batch_n = (len(train_data)-1) / args.batch + 1
    batch_n = int((len(train_data) - 1) / args.batch + 1)
    optimizer = OptimizerList[args.optimizer](model.params, args.lr, args.lr_word_vector)
    details = {'loss': [], 'loss_train':[], 'loss_dev':[], 'loss_test':[], \
            'acc_train':[], 'acc_dev':[], 'acc_test':[], 'loss_l2':[]}

    for e in range(args.epoch):
        random.shuffle(train_data)
        now = {}
        now['loss'], now['loss_l2'] = train(model, train_data, optimizer, e, args.batch, batch_n)
        now['loss_train'], now['acc_train'] = test(model, train_data, args.grained)
        now['loss_dev'], now['acc_dev'] = test(model, dev_data, args.grained)
        now['loss_test'], now['acc_test'] = test(model, test_data, args.grained)