hidden_dim = 50 sentence_len = 32 train_file = os.path.join(DATA_DIR, TRAIN_FILE) test_file = os.path.join(DATA_DIR, TEST_FILE) fp_train = open(train_file, 'r') train_filenames = [ os.path.join(TRAIN_DIR, line.strip()) for line in fp_train ] filenames = copy.deepcopy(train_filenames) fp_train.close() fp_test = open(test_file, 'r') test_filenames = [os.path.join(TEST_DIR, line.strip()) for line in fp_test] fp_test.close() filenames.extend(test_filenames) corpus = DP.Corpus(DATA_DIR, filenames) nlabel = 8 ### create model model = LSTMC.LSTMClassifier(embedding_dim=embedding_dim, hidden_dim=hidden_dim, vocab_size=len(corpus.dictionary), label_size=nlabel, batch_size=batch_size, use_gpu=use_gpu) if use_gpu: model = model.cuda() ### data processing dtrain_set = DP.TxtDatasetProcessing(DATA_DIR, TRAIN_DIR, TRAIN_FILE, TRAIN_LABEL, sentence_len, corpus)
print('[!] available gpus:', torch.cuda.device_count()) # DONOTCHANGE: Reserved for nsml use bind_model(model, config) # DONOTCHANGE: They are reserved for nsml if config.pause: nsml.paused(scope=locals()) ### Training mode # 학습 모드일 때 사용합니다. (기본값) if config.mode == 'train': # 데이터를 로드합니다. if not HAS_DATASET and not IS_ON_NSML: # It is not running on nsml DATASET_PATH = '../sample_data/movie_review/' corpus = DP.Corpus(DATASET_PATH, total_train) print('[*]', 'Load corpus') # Load training data train_dataset = MovieReviewDataset(DATASET_PATH, config.strmaxlen, True, corpus) print('[*]', 'Load train dataset') train_loader = DataLoader(dataset=train_dataset, batch_size=config.batch, shuffle=True, collate_fn=collate_fn, num_workers=1) total_train = len(train_loader) # Load validation data test_dataset = MovieReviewDataset(DATASET_PATH,