Exemplo n.º 1
0
def test_load_directory_data():
    invalid_directory_path = 'invalid_directory_path'
    valid_dummy_directory = './resources/dummy_data_directory'
    empty_dummy_directory = './resources/dummy_empty_data_directory'
    channels = 1

    # should raise error when receives an invalid directory path
    with pytest.raises(NotADirectoryError):
        DataLoader(from_csv=False, datapath=invalid_directory_path)

    # should raise error when tries to load empty directory
    data_loader = DataLoader(from_csv=False, datapath=empty_dummy_directory)
    with pytest.raises(AssertionError):
        data_loader.get_data()

    # should assign an image's parent directory name as its label
    data_loader = DataLoader(from_csv=False, datapath=valid_dummy_directory)
    images, labels, label_index_map = data_loader.get_data()
    label_count = len(label_index_map.keys())
    label = [0] * label_count
    label[label_index_map['happiness']] = 1
    assert label == labels[0]

    data_loader = DataLoader(from_csv=False, datapath=valid_dummy_directory)
    images, labels, label_index_map = data_loader.get_data()
    # should return non-empty image and label arrays when given valid arguments
    assert len(images) > 0 and len(labels) > 0
    # should return same number of labels and images when given valid arguments
    assert len(images) == len(labels)
    # should reshape image to contain channel_axis in channel_last format
    assert images.shape[-1] == channels
Exemplo n.º 2
0
 def __init__(self, config, predict_flag=False):
     # hyperparams
     self.max_len = config['max_len']
     self.max_features = config['max_features']
     self.embedding_size = config['embedding_size']
     self.lstm_hidden_size = config['lstm_hidden_size']
     self.dense_hidden_sizes = config['dense_hidden_sizes']
     self.dropout_rate = config['dropout_rate']
     # create model
     self.model = self.create_model()
     # train setup
     self.learning_rate = config['learning_rate']
     self.batch_size = config['batch_size']
     self.epochs = config['epochs']
     self.train_data_path = config['train_data_path']
     self.dev_data_path = config['dev_data_path']
     self.test_data_path = config['test_data_path']
     # save/restore model
     self.model_path = config['model_path']
     # dataloader
     self.dataloader = DataLoader()
     # predict step, load model weights
     if predict_flag:
         self.model.load_weights(self.model_path)
         print('Model weights loaded.')
Exemplo n.º 3
0
def main():
    args = Args().parse()
    loader = DataLoader(args.train_real_data_dir,
                        args.train_flow_data_dir,
                        video_length=args.video_length,
                        batch_size=args.batch_size)
    test_loader = DataLoader(args.test_real_data_dir,
                             args.test_flow_data_dir,
                             video_length=args.video_length,
                             batch_size=args.batch_size)
    graph = tf.Graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(graph=graph, config=config) as sess:
        gan = SEENET(args, sess, graph)
        if args.test:
            gan.test_all(test_loader)
        else:
            gan.train(loader, test_loader)
Exemplo n.º 4
0
def test_load_csv_data():
    invalid_csv_file_path = 'invalid_csv_file_path'
    channels = 1
    invalid_image_dimensions = (50, 77)
    invalid_target_labels = [8, 9, 10]

    # should raise error when not given csv column indices for images and labels
    with pytest.raises(ValueError):
        DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                   image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col)

    # should raise error when given invalid csv file path
    with pytest.raises(FileNotFoundError):
        DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=invalid_csv_file_path,
                   image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col, csv_image_col=csv_image_col)

    # should raise error when given invalid csv column indices
    with pytest.raises(ValueError):
        DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                   image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col, csv_image_col=10)

    # should raise error when given empty target_labels list
    with pytest.raises(ValueError):
        DataLoader(from_csv=True, datapath=valid_csv_file_path, image_dimensions=valid_image_dimensions,
                   csv_label_col=csv_label_col, csv_image_col=csv_image_col)

    # should raise error when not given image dimensions
    with pytest.raises(ValueError):
        DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                   csv_label_col=csv_label_col, csv_image_col=csv_image_col)

    # should raise error when given invalid image dimensions
    with pytest.raises(ValueError):
        DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                   image_dimensions=invalid_image_dimensions, csv_label_col=csv_label_col, csv_image_col=csv_image_col)

    # should raise error if no image samples found in csv file
    with pytest.raises(AssertionError):
        data_loader = DataLoader(from_csv=True, target_labels=invalid_target_labels, datapath=valid_csv_file_path,
                                 image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col,
                                 csv_image_col=csv_image_col)
        data_loader.get_data()

    data_loader = DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                             image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col,
                             csv_image_col=csv_image_col)
    images, labels = data_loader.get_data()
    # should return non-empty image and label arrays when given valid arguments
    assert len(images) > 0 and len(labels) > 0
    # should return same number of labels and images when given valid arguments
    assert len(images) == len(labels)
    # should reshape the images to given valid image_dimensions
    assert list(images.shape[1:]) == list(valid_image_dimensions) + [channels]
Exemplo n.º 5
0
def test_should_generate_images_based_on_out_channels_parameter():
    with pytest.raises(ValueError) as e:
        DataLoader(out_channels=0)
    assert "Out put channel should be either 3(RGB) or 1(Grey) but got 0" == str(e.value)

    # Should generate images with single channel
    channels = 1
    data_loader = DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                             image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col,
                             csv_image_col=csv_image_col, out_channels=channels)
    images, labels = data_loader.get_data()
    assert list(images.shape[1:]) == list(valid_image_dimensions) + [channels]

    # Should generate images with 3 channel
    channels = 3
    data_loader = DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                             image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col,
                             csv_image_col=csv_image_col, out_channels=channels)
    images, labels = data_loader.get_data()
    assert list(images.shape[1:]) == list(valid_image_dimensions) + [channels]
Exemplo n.º 6
0
async def map_doc(index_name="documents"):
    CHUNKSIZE = 50000
    document_loader = DataLoader(COLLECTION_PATH,
                                 chunksize=CHUNKSIZE,
                                 names=["pid", "passage"])
    for _, collection in tqdm(enumerate(document_loader.reader),
                              desc="collection"):
        for i, row in collection.iterrows():
            yield {
                '_op_type': 'index',
                '_index': index_name,
                'pid': row.pid,
                'passage': row.passage,
            }
Exemplo n.º 7
0
def test_load_time_series_directory_data():
    invalid_directory_path = 'invalid_directory_path'
    valid_dummy_directory = './resources/dummy_time_series_data_directory'
    empty_dummy_directory = './resources/dummy_empty_data_directory'
    valid_time_steps = 4
    channels = 1

    # should raise error when receives an invalid directory path
    with pytest.raises(NotADirectoryError):
        DataLoader(from_csv=False, datapath=invalid_directory_path, time_steps=4)

    # should raise error when tries to load empty directory
    data_loader = DataLoader(from_csv=False, datapath=empty_dummy_directory, time_steps=4)
    with pytest.raises(AssertionError):
        data_loader.get_data()

    # should raise error when given time_step argument that is less than 1
    with pytest.raises(ValueError):
        DataLoader(from_csv=False, datapath=valid_dummy_directory, time_steps=-4)

    # should raise error when given time_step argument that not an integer
    with pytest.raises(ValueError):
        DataLoader(from_csv=False, datapath=valid_dummy_directory, time_steps=4.7)

    # should raise error when tries to load time series sample
    # containing a quantity of images less than the time_steps argument
    with pytest.raises(ValueError):
        data_loader = DataLoader(from_csv=False, datapath=valid_dummy_directory, time_steps=10)
        data_loader.get_data()

    # should assign an image's parent directory name as its label
    data_loader = DataLoader(from_csv=False, datapath=valid_dummy_directory, time_steps=valid_time_steps)
    samples, labels, label_index_map = data_loader.get_data()
    label_count = len(label_index_map.keys())
    label = [0] * label_count
    label[label_index_map['happiness']] = 1
    assert label == labels[0]

    data_loader = DataLoader(from_csv=False, datapath=valid_dummy_directory, time_steps=valid_time_steps)
    samples, labels, label_index_map = data_loader.get_data()
    # should return non-empty image and label arrays when given valid arguments
    assert len(samples) > 0 and len(labels) > 0
    # should return same number of labels and images when given valid arguments
    assert len(samples) == len(labels)
    # should reshape image to contain channel_axis in channel_last format
    assert samples.shape[1] == valid_time_steps
    # should reshape image to contain channel_axis in channel_last format
    assert samples.shape[-1] == channels
Exemplo n.º 8
0
                           comment=cmd)

    is_inception = backbone == "inception"
    try:
        if is_inception:
            writer.add_graph(model, torch.rand(1, 3, 299, 299).to(device))
        else:
            writer.add_graph(model, torch.rand(1, 3, 224, 224).to(device))
    except:
        pass

    if opt.mix_precision:
        m, optimizer = amp.initialize(model, optimizer_ft, opt_level="O1")

    criterion = nn.CrossEntropyLoss()
    data_loader = DataLoader(data_dir, batch_size)
    print(data_dir)
    try:
        train_model(model,
                    data_loader.dataloaders_dict,
                    criterion,
                    optimizer_ft,
                    cmd,
                    writer,
                    is_inception=is_inception,
                    model_save_path=model_save_path)
    except:
        if os.path.exists('error.txt'):
            os.remove('error.txt')
        with open('error.txt', 'a+') as f:
            f.write(opt.expID)
Exemplo n.º 9
0
from src.dataloader import DataLoader
from src.neuralnets import ConvolutionalLstmNN
from sklearn.model_selection import train_test_split

time_delay = 2
raw_dimensions = (48, 48)
target_dimensions = (64, 64)
channels = 1
verbose = True
using_feature_extraction = True

print('--------------- Convolutional LSTM Model -------------------')
print('Loading data...')
directory_path = "image_data/sample_image_series_directory"
data_loader = DataLoader(from_csv=False,
                         datapath=directory_path,
                         time_steps=time_delay)
image_data, labels, emotion_map = data_loader.get_data()

if verbose:
    print('raw image data shape: ' + str(image_data.shape))
label_count = len(labels[0])

print('Training net...')
validation_split = 0.15
X_train, X_test, y_train, y_test = train_test_split(image_data,
                                                    labels,
                                                    test_size=validation_split,
                                                    random_state=42,
                                                    stratify=labels)
train_gen = DataGenerator(time_delay=time_delay).fit(X_train, y_train)
Exemplo n.º 10
0
from keras import backend as K
K.set_image_data_format("channels_last")

verbose = True
target_dimensions = (128, 128)
raw_dimensions = (48, 48)
model_name = 'inception_v3'
fer_dataset_label_map = {'0': 'anger', '2': 'fear'}

print('Loading data...')
csv_file_path = "image_data/sample.csv"

data_loader = DataLoader(from_csv=True,
                         emotion_map=fer_dataset_label_map,
                         datapath=csv_file_path,
                         image_dimensions=raw_dimensions,
                         csv_label_col=0,
                         csv_image_col=1,
                         out_channels=3)
images, labels, emotion_map = data_loader.get_data()
if verbose:
    print('raw image shape: ' + str(images.shape))

print('Creating training/testing data...')
validation_split = 0.15
X_train, X_test, y_train, y_test = train_test_split(images,
                                                    labels,
                                                    test_size=validation_split,
                                                    random_state=42,
                                                    stratify=labels)
train_gen = DataGenerator().fit(X_train, y_train)
Exemplo n.º 11
0
sys.path.append('../')
from src.data_generator import DataGenerator
from src.dataloader import DataLoader
from src.neuralnets import ConvolutionalNN
from sklearn.model_selection import train_test_split

target_dimensions = (64, 64)
channels = 1
verbose = True

print('--------------- Convolutional Model -------------------')
print('Loading data...')
directory_path = "image_data/sample_image_directory"

dataLoader = DataLoader(from_csv=False, datapath=directory_path)
image_data, labels, emotion_map = dataLoader.get_data()
if verbose:
    print('raw image data shape: ' + str(image_data.shape))

print('Creating training/testing data...')
validation_split = 0.15
X_train, X_test, y_train, y_test = train_test_split(image_data,
                                                    labels,
                                                    test_size=validation_split,
                                                    random_state=42,
                                                    stratify=labels)
train_gen = DataGenerator().fit(X_train, y_train)
test_gen = DataGenerator().fit(X_test, y_test)
print('Training net...')
model = ConvolutionalNN(target_dimensions, channels, emotion_map)
Exemplo n.º 12
0
 def __init__(self, model_config):
     self.configer = model_config
     self.dataloader = DataLoader()
     self.model = self.get_model()
     self.model_path = 'saved_models/esim_LCQMC_32_LSTM_0715_1036.h5'
     self._init_model()
Exemplo n.º 13
0
    def train(self, begin_epoch, end_epoch, batch_size, val_ratio,
              save_interval, lr, logdir, datasetroot, imgdirlist, tsdfdirlist,
              ids_train):

        self.begin_epoch = begin_epoch
        self.end_epoch = end_epoch
        self.batch_size = batch_size
        self.val_ratio = val_ratio
        self.save_interval = save_interval
        self.lr = lr
        self.logdir = logdir
        self.datasetroot = datasetroot
        self.imgdirlist = imgdirlist
        self.tsdfdirlist = tsdfdirlist
        self.ids_train = ids_train

        # Generate logdir
        if not os.path.exists(self.logdir):
            os.makedirs(self.logdir)
        print("logdir: " + self.logdir)

        # Prepare dataset
        self.dl = DataLoader(self.datasetroot, self.imgdirlist,
                             self.tsdfdirlist, self.batch_size, self.val_ratio,
                             self.ids_train)

        # Check existance of logs
        if not os.path.exists(self.logdir + "/log.csv"):
            with open(self.logdir + "/log.csv", 'w') as f:
                f.write("{0:%Y-%m-%d %H:%M:%S}\n".format(
                    datetime.datetime.now()))
        else:
            with open(self.logdir + "/log.csv", 'a') as f:
                f.write("{0:%Y-%m-%d %H:%M:%S}\n".format(
                    datetime.datetime.now()))

        # Load weight when starting from intermediate epoch
        if (self.begin_epoch > 0):
            if (os.path.exists(self.logdir +
                               "/weights_{0:d}.hdf5".format(self.begin_epoch))
                ):
                print("Begin from " + self.logdir +
                      "/weights_{0:d}.hdf5".format(self.begin_epoch))
                self.tetranet.load_weights(
                    self.logdir +
                    "/weights_{0:d}.hdf5".format(self.begin_epoch))
            else:
                print("File " + self.logdir +
                      "/weights_{0:d}.hdf5".format(self.begin_epoch) +
                      "does not exist")
                print("Start training from epoch 0")
                self.begin_epoch = 0

        # Start training
        start_time = datetime.datetime.now()
        generator_train = self.dl.load_batch("train")

        if self.dl.num_val > 0:
            generator_val = self.dl.load_batch("val")
            steps_per_epoch_val = self.dl.steps_per_epoch_val
        else:
            generator_val = None
            steps_per_epoch_val = None

        self.tetranet.fit_generator(
            generator_train,
            steps_per_epoch=self.dl.steps_per_epoch_train,
            initial_epoch=begin_epoch,
            epochs=end_epoch,
            verbose=1,
            callbacks=[
                keras.callbacks.CSVLogger(self.logdir + "/log.csv",
                                          separator=',',
                                          append=True),
                keras.callbacks.ModelCheckpoint(self.logdir +
                                                "/weights_{epoch:d}.hdf5",
                                                period=save_interval),
                keras.callbacks.LearningRateScheduler(self.step_decay)
            ],
            validation_data=generator_val,
            validation_steps=steps_per_epoch_val,
            use_multiprocessing=True,
            workers=0,
            max_queue_size=5,
            shuffle=True)

        print("All processing time: ", datetime.datetime.now() - start_time)