def test_iterate_scheme(): from fuel.datasets import IndexableDataset from fuel.schemes import (SequentialScheme, ShuffledScheme,SequentialExampleScheme, ShuffledExampleScheme) seed = 1234 rng = numpy.random.RandomState(seed) features = rng.randint(256, size=(8, 2, 2)) targets = rng.randint(4, size=(8, 1)) dataset = IndexableDataset(indexables=OrderedDict([('features', features), ('targets', targets)]), axis_labels=OrderedDict([('features', ('batch', 'height', 'width')), ('targets', ('batch', 'index'))])) schemes = [SequentialScheme(examples=8, batch_size=5), ShuffledScheme(examples=8, batch_size=3), SequentialExampleScheme(examples=8), ShuffledExampleScheme(examples=8)] # for scheme in schemes: # print(list(scheme.get_request_iterator())) state = dataset.open() scheme = ShuffledScheme(examples=dataset.num_examples, batch_size=3) for request in scheme.get_request_iterator(): data = dataset.get_data(state=state, request=request) print(data[0].shape, data[1].shape) dataset.close(state)
def test_indexabel_dataset(): from fuel.datasets import IndexableDataset seed = 1234 rng = numpy.random.RandomState(seed) features = rng.randint(256, size=(8, 2, 2)) targets = rng.randint(4, size=(8, 1)) dataset = IndexableDataset(indexables=OrderedDict([('features', features), ('targets', targets)]), axis_labels=OrderedDict([('features', ('batch', 'height', 'width')), ('targets', ('batch', 'index'))])) state = dataset.open() print('State is {}.'.format(state)) print(dataset.get_data(state=state, request=[1, 0])) dataset.close(state=state)
val_prev = 1000 patience=0#patience counter val_counter=0 epoch=0 num_epochs=600 print("Starting training...") # We iterate over epochs: while 'true': # In each epoch, we do a full pass over the training data: train_err = 0 train_acc = 0 train_batches = 0 h1=train_set.open() h2=valid_set.open() scheme = ShuffledScheme(examples=train_set.num_examples, batch_size=64) scheme1 = SequentialScheme(examples=valid_set.num_examples, batch_size=128) train_stream = DataStream(dataset=train_set, iteration_scheme=scheme) valid_stream = DataStream(dataset=valid_set, iteration_scheme=scheme1) start_time = time.time() for data in train_stream.get_epoch_iterator(): t_data,t_labs,t_mask = data terr,tacc = train_func(t_data,t_mask, t_labs) train_err += terr
valid_set = IndexableDataset( indexables = OrderedDict([('features', val_Data), ('mask', val_Msk), ('targets', val_tars)]), axis_labels={'features':('batch','maxlen','feat_dim'),'mask':('batch','maxlen'), 'targets':('batch','index')}) trainerr=[] print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 tr_acc = 0 train_batches = 0 t_state = train_set.open() v_state = valid_set.open() scheme = ShuffledScheme(examples=train_set.num_examples, batch_size=32) scheme1 = ShuffledScheme(examples=valid_set.num_examples, batch_size=32) train_stream = DataStream(dataset=train_set, iteration_scheme=scheme) valid_stream = DataStream(dataset=valid_set, iteration_scheme=scheme1) start_time = time.time() for data in train_stream.get_epoch_iterator(): t_data, t_mask, t_labs = data terr, tacc = train_func(t_data, t_mask, t_labs) train_err += terr
num_epochs = 5 epoch = 0 print("Starting training...") # We iterate over epochs: val_prev = np.inf a_prev = -np.inf while 'true': #for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 tr_acc = 0 train_batches = 0 t_state = train_set.open() v_state = valid_set.open() scheme = ShuffledScheme(examples=train_set.num_examples, batch_size=128) scheme1 = SequentialScheme(examples=valid_set.num_examples, batch_size=128) train_stream = DataStream(dataset=train_set, iteration_scheme=scheme) valid_stream = DataStream(dataset=valid_set, iteration_scheme=scheme1) start_time = time.time() for data in train_stream.get_epoch_iterator(): t_data, t_mask, t_labs = data terr, tacc = train_func(t_data, t_mask, t_labs) train_err += terr tr_acc += tacc
data = fi.getall()[:,:20] Mask[ind,:data.shape[0]] = 1.0 pad = maxlen - data.shape[0] data = np.vstack((data, np.zeros((pad,20), dtype='float32'))) Data[ind,:,:] = data for ind,f in enumerate(val_features): fname = os.path.join(dpth,f+'.fea') fi = htkmfc.HTKFeat_read(fname) data = fi.getall()[:,:20] val_Mask[ind,:data.shape[0]] = 1.0 pad = maxlen - data.shape[0] data = np.vstack((data, np.zeros((pad,20), dtype='float32'))) val_Data[ind,:,:] = data return Data, Mask, np.asarray(labelz, dtype='int32'), val_Data, val_Mask, np.asarray(val_labelz, dtype='int32') Data, mask, labelz, val_data, val_mask, val_labels = load_dataset() train_set = IndexableDataset( indexables = OrderedDict([('features', Data), ('mask', mask), ('targets', labelz)]), axis_labels={'features':('batch','maxlen','feat_dim'),'mask':('batch','maxlen'), 'targets':('batch','index')}) state = train_set.open() scheme = ShuffledScheme(examples=train_set.num_examples, batch_size=32) data_stream = DataStream(dataset=train_set, iteration_scheme=scheme) for feats, mask,labs in data_stream.get_epoch_iterator(): print(feats.shape, mask.shape, labs.shape)