#sequence_groups = np.real(data.transform.ifft(sequence_groups)) # return sequence_groups #### Load data # sequence_groups[i] contains data for class i # 4-dimensional data structure: (class, sequence_num, timestep, channel_num) # Manually selecting different training and validation datasets #training_sequence_groups, validation_sequence_groups = data.split( # data.digits_session_7_dataset(channels=range(4, 8)), 1./6) training_sequence_groups = transform_data( data.combine([ data.digits_session_5_dataset(channels=range(4, 8)), data.digits_session_6_dataset(channels=range(4, 8)), # data.digits_session_7_dataset(channels=range(4, 8)), # training_sequence_groups, ])) validation_sequence_groups = transform_data( data.combine([ # data.digits_session_5_dataset(channels=range(4, 8)), # data.digits_session_6_dataset(channels=range(4, 8)), data.digits_session_7_dataset(channels=range(4, 8)), # validation_sequence_groups, ])) #sequence_groups = transform_data( # data.combine([ # data.digits_session_5_dataset(channels=range(4, 8)), # data.digits_session_6_dataset(channels=range(4, 8)), # data.digits_session_7_dataset(channels=range(4, 8)), # ]))
# #print map(len, sequence_groups) #lens = map(len, data.get_inputs(sequence_groups)[0]) #print min(lens), np.mean(lens), max(lens) # Split sequence_groups into training and validation data #training_sequence_groups, validation_sequence_groups = data.split(sequence_groups, 1./6) # Manually selecting different training and validation datasets training_sequence_groups, validation_sequence_groups = data.split( data.digits_session_dependence_3_dataset(channels=range(1, 8)), 1. / 6) training_sequence_groups = transform_data( data.combine([ # map(lambda x: x[:30], data.digits_session_dependence_1_dataset(channels=range(1, 8))), # map(lambda x: x[:30], data.digits_session_dependence_2_dataset(channels=range(1, 8))), # map(lambda x: x[:40], data.digits_session_dependence_3_dataset(channels=range(1, 8))), training_sequence_groups, ])) validation_sequence_groups = transform_data( data.combine([ # map(lambda x: x[:30], data.digits_session_dependence_1_dataset(channels=range(1, 8))), # map(lambda x: x[:30], data.digits_session_dependence_2_dataset(channels=range(1, 8))), # map(lambda x: x[:40], data.digits_session_dependence_3_dataset(channels=range(1, 8))), validation_sequence_groups, ])) # Pads or truncates each sequence to length length = 2000 # 300,600 training_sequence_groups = data.transform.pad_truncate( training_sequence_groups, length) validation_sequence_groups = data.transform.pad_truncate(
# setup logging logging.basicConfig(level=logging.INFO) batch_size = 32 device_name = "cuda" if torch.cuda.is_available() else "cpu" # device_name = "cpu" device = torch.device(device_name) print("Device: %s" % device) # Load data and make batches train_dataset = LyricsDataset("data/preprocessed_lyrics.csv", limit=10000, device=device) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=device_name == "cpu", collate_fn=lambda batch: combine(batch, device, "char_id_length")) # CE Loss (NLL + Softmax) criterion = nn.CrossEntropyLoss().to(device) # Init model model: LyricsGenerator = LyricsGenerator(device) model = model.to(device) model.train() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=5e-4) def prepare_target(target_ids, lengths, enforce_sorted=True): lengths = lengths.squeeze(dim=1).squeeze(dim=1)