Пример #1
0
from sklearn.model_selection import train_test_split

from src.data_generator import DataGenerator
from src.dataloader import DataLoader
from src.neuralnets import TimeDelayConvNN

print('--------------- Time-Delay Convolutional Model -------------------')
print('Loading data...')
directory_path = "image_data/sample_image_series_directory"
target_dimensions = (64, 64)
time_delay = 3
channels = 1
verbose = True

dataLoader = DataLoader(from_csv=False, datapath=directory_path, time_steps=3)
image_data, labels, emotion_map = dataLoader.get_data()
if verbose:
    print('raw image data shape: ' + str(image_data.shape))
label_count = len(labels[0])

print('Creating training/testing data...')
validation_split = 0.25
X_train, X_test, y_train, y_test = train_test_split(image_data,
                                                    labels,
                                                    test_size=validation_split,
                                                    random_state=42,
                                                    stratify=labels)
train_gen = DataGenerator(time_delay=time_delay).fit(X_train, y_train)
test_gen = DataGenerator(time_delay=time_delay).fit(X_test, y_test)
Пример #2
0
class ESIM(object):
    def __init__(self, model_config):
        self.configer = model_config
        self.dataloader = DataLoader()
        self.model = self.get_model()
        self.model_path = 'saved_models/esim_LCQMC_32_LSTM_0715_1036.h5'
        self._init_model()

    def _init_model(self):
        self.model.load_weights(self.model_path)
        p_pred, h_pred = self.dataloader.char_index(['买过了'], ['买好了'],
                                                    self.configer.maxlen,
                                                    pad_mode='pre')
        self.model.predict([p_pred, h_pred]).item()
        print('ESIM model loaded.')

    def _unchanged_shape(self, input_shape):
        "Function for Lambda layer"
        return input_shape

    def _substract(self, input_1, input_2):
        "Substract element-wise"
        neg_input_2 = Lambda(lambda x: -x,
                             output_shape=self._unchanged_shape)(input_2)
        out_ = Add()([input_1, neg_input_2])
        return out_

    def _submult(self, input_1, input_2):
        "Get multiplication and subtraction then concatenate results"
        mult = Multiply()([input_1, input_2])
        sub = self._substract(input_1, input_2)
        out_ = Concatenate()([sub, mult])
        return out_

    def _apply_multiple(self, input_, layers):
        "Apply layers to input then concatenate result"
        if not len(layers) > 1:
            raise ValueError('Layers list should contain more than 1 layer')
        else:
            agg_ = []
            for layer in layers:
                agg_.append(layer(input_))
            out_ = Concatenate()(agg_)
        return out_

    def _soft_attention_alignment(self, input_1, input_2):
        "Align text representation with neural soft attention"
        attention = Dot(axes=-1)([input_1, input_2])
        w_att_1 = Lambda(lambda x: softmax(x, axis=1),
                         output_shape=self._unchanged_shape)(attention)
        w_att_2 = Permute(
            (2, 1))(Lambda(lambda x: softmax(x, axis=2),
                           output_shape=self._unchanged_shape)(attention))
        in1_aligned = Dot(axes=1)([w_att_1, input_1])
        in2_aligned = Dot(axes=1)([w_att_2, input_2])
        return in1_aligned, in2_aligned

    def get_model(self):
        q1 = Input(name='q1', shape=(self.configer.maxlen, ))
        q2 = Input(name='q2', shape=(self.configer.maxlen, ))

        embedding_op = Embedding(self.configer.max_features,
                                 self.configer.embedding_size,
                                 input_length=self.configer.maxlen)
        # embedding_q2 = Embedding(self.configer.max_features, self.configer.embedding_size,
        #                          input_length=self.configer.maxlen)
        bn = BatchNormalization()

        # embedding + batch normalization
        q1_embed = bn(embedding_op(q1))
        q2_embed = bn(embedding_op(q2))

        # todo 一个还是两个
        # bi-lstm
        encode = Bidirectional(
            LSTM(self.configer.lstm_dim, return_sequences=True))
        q1_encoded = encode(q1_embed)
        q2_encoded = encode(q2_embed)

        # Attention
        q1_aligned, q2_aligned = self._soft_attention_alignment(
            q1_encoded, q2_encoded)

        # Compose
        q1_combined = Concatenate()(
            [q1_encoded, q2_aligned,
             self._submult(q1_encoded, q2_aligned)])
        q2_combined = Concatenate()(
            [q2_encoded, q1_aligned,
             self._submult(q2_encoded, q1_aligned)])

        # todo
        compose = Bidirectional(
            LSTM(self.configer.lstm_dim, return_sequences=True))
        q1_compare = compose(q1_combined)
        q2_compare = compose(q2_combined)

        # Aggregate
        q1_rep = self._apply_multiple(
            q1_compare,
            [GlobalAvgPool1D(), GlobalMaxPool1D()])
        q2_rep = self._apply_multiple(
            q2_compare,
            [GlobalAvgPool1D(), GlobalMaxPool1D()])

        # Classifier
        merged = Concatenate()([q1_rep, q2_rep])

        dense = BatchNormalization()(merged)
        dense = Dense(512, activation='relu')(dense)
        dense = BatchNormalization()(dense)
        dense = Dropout(self.configer.dropout_rate)(dense)
        dense = Dense(self.configer.dense_dim, activation='relu')(dense)
        dense = BatchNormalization()(dense)
        dense = Dropout(self.configer.dropout_rate)(dense)
        out_ = Dense(1, activation='sigmoid')(dense)

        model = Model(inputs=[q1, q2], outputs=out_)
        model.compile(optimizer=Adam(lr=1e-3),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        return model

    def predict(self, sent1, sent2):
        p_pred, h_pred = self.dataloader.char_index(sent1, sent2,
                                                    self.configer.maxlen)
        return self.model.predict([p_pred, h_pred], batch_size=1024)
Пример #3
0
 def __init__(self, model_config):
     self.configer = model_config
     self.dataloader = DataLoader()
     self.model = self.get_model()
     self.model_path = 'saved_models/esim_LCQMC_32_LSTM_0715_1036.h5'
     self._init_model()
Пример #4
0
    def train(self, begin_epoch, end_epoch, batch_size, val_ratio,
              save_interval, lr, logdir, datasetroot, imgdirlist, tsdfdirlist,
              ids_train):

        self.begin_epoch = begin_epoch
        self.end_epoch = end_epoch
        self.batch_size = batch_size
        self.val_ratio = val_ratio
        self.save_interval = save_interval
        self.lr = lr
        self.logdir = logdir
        self.datasetroot = datasetroot
        self.imgdirlist = imgdirlist
        self.tsdfdirlist = tsdfdirlist
        self.ids_train = ids_train

        # Generate logdir
        if not os.path.exists(self.logdir):
            os.makedirs(self.logdir)
        print("logdir: " + self.logdir)

        # Prepare dataset
        self.dl = DataLoader(self.datasetroot, self.imgdirlist,
                             self.tsdfdirlist, self.batch_size, self.val_ratio,
                             self.ids_train)

        # Check existance of logs
        if not os.path.exists(self.logdir + "/log.csv"):
            with open(self.logdir + "/log.csv", 'w') as f:
                f.write("{0:%Y-%m-%d %H:%M:%S}\n".format(
                    datetime.datetime.now()))
        else:
            with open(self.logdir + "/log.csv", 'a') as f:
                f.write("{0:%Y-%m-%d %H:%M:%S}\n".format(
                    datetime.datetime.now()))

        # Load weight when starting from intermediate epoch
        if (self.begin_epoch > 0):
            if (os.path.exists(self.logdir +
                               "/weights_{0:d}.hdf5".format(self.begin_epoch))
                ):
                print("Begin from " + self.logdir +
                      "/weights_{0:d}.hdf5".format(self.begin_epoch))
                self.tetranet.load_weights(
                    self.logdir +
                    "/weights_{0:d}.hdf5".format(self.begin_epoch))
            else:
                print("File " + self.logdir +
                      "/weights_{0:d}.hdf5".format(self.begin_epoch) +
                      "does not exist")
                print("Start training from epoch 0")
                self.begin_epoch = 0

        # Start training
        start_time = datetime.datetime.now()
        generator_train = self.dl.load_batch("train")

        if self.dl.num_val > 0:
            generator_val = self.dl.load_batch("val")
            steps_per_epoch_val = self.dl.steps_per_epoch_val
        else:
            generator_val = None
            steps_per_epoch_val = None

        self.tetranet.fit_generator(
            generator_train,
            steps_per_epoch=self.dl.steps_per_epoch_train,
            initial_epoch=begin_epoch,
            epochs=end_epoch,
            verbose=1,
            callbacks=[
                keras.callbacks.CSVLogger(self.logdir + "/log.csv",
                                          separator=',',
                                          append=True),
                keras.callbacks.ModelCheckpoint(self.logdir +
                                                "/weights_{epoch:d}.hdf5",
                                                period=save_interval),
                keras.callbacks.LearningRateScheduler(self.step_decay)
            ],
            validation_data=generator_val,
            validation_steps=steps_per_epoch_val,
            use_multiprocessing=True,
            workers=0,
            max_queue_size=5,
            shuffle=True)

        print("All processing time: ", datetime.datetime.now() - start_time)
Пример #5
0
class TetrahedraNetwork():
    def __init__(self, path_adjlists):

        self.path_adjlists = path_adjlists

        self.img_shape = (256, 256, 3)

        # # Load adjLists and make adjMats for GCN
        # self.adjLists_forGCN = utils.load_adjLists(path_adjlists + "/adjlist_[0-9].csv")
        # if self.adjLists_forGCN == []:
        # 	print("No adjlists_forGCN are loaded")
        # 	return

        # shapelist = []
        # for i in range(len(self.adjLists_forGCN)):
        # 	size = len(self.adjLists_forGCN[i])
        # 	shapelist += [[size, size]]
        # self.adjMats_forGCN = utils.make_adjMats(self.adjLists_forGCN, shapelist, gen_identity=True)

        # Load adjLists for PCN
        self.adjLists_forPCN = utils.load_adjLists(path_adjlists +
                                                   "/adjlist_[0-9]to[0-9].csv")
        if self.adjLists_forPCN == []:
            print("No adjlists_forPCN are loaded")
            return

        # Construct network
        # self.tetranet = tetranet.create_tetrahedra_network(self.adjLists_forPCN, self.adjMats_forGCN, shape = self.img_shape)
        self.tetranet = tetranet.create_tetrahedra_network(
            self.adjLists_forPCN, shape=self.img_shape)
        self.optimizer = Adam(lr=0.005)
        # self.tetranet.summary()

        # self.tetranet.compile(loss='categorical_crossentropy',
        self.tetranet.compile(loss='mean_squared_error',
                              optimizer=self.optimizer,
                              metrics=["mean_squared_error"])

        # from keras.utils import plot_model
        # plot_model(self.tetranet, to_file='tetranet.png')

    def step_decay(self, epoch):
        x = self.lr
        if epoch >= 50: x = 0.0001
        return x

    def train(self, begin_epoch, end_epoch, batch_size, val_ratio,
              save_interval, lr, logdir, datasetroot, imgdirlist, tsdfdirlist,
              ids_train):

        self.begin_epoch = begin_epoch
        self.end_epoch = end_epoch
        self.batch_size = batch_size
        self.val_ratio = val_ratio
        self.save_interval = save_interval
        self.lr = lr
        self.logdir = logdir
        self.datasetroot = datasetroot
        self.imgdirlist = imgdirlist
        self.tsdfdirlist = tsdfdirlist
        self.ids_train = ids_train

        # Generate logdir
        if not os.path.exists(self.logdir):
            os.makedirs(self.logdir)
        print("logdir: " + self.logdir)

        # Prepare dataset
        self.dl = DataLoader(self.datasetroot, self.imgdirlist,
                             self.tsdfdirlist, self.batch_size, self.val_ratio,
                             self.ids_train)

        # Check existance of logs
        if not os.path.exists(self.logdir + "/log.csv"):
            with open(self.logdir + "/log.csv", 'w') as f:
                f.write("{0:%Y-%m-%d %H:%M:%S}\n".format(
                    datetime.datetime.now()))
        else:
            with open(self.logdir + "/log.csv", 'a') as f:
                f.write("{0:%Y-%m-%d %H:%M:%S}\n".format(
                    datetime.datetime.now()))

        # Load weight when starting from intermediate epoch
        if (self.begin_epoch > 0):
            if (os.path.exists(self.logdir +
                               "/weights_{0:d}.hdf5".format(self.begin_epoch))
                ):
                print("Begin from " + self.logdir +
                      "/weights_{0:d}.hdf5".format(self.begin_epoch))
                self.tetranet.load_weights(
                    self.logdir +
                    "/weights_{0:d}.hdf5".format(self.begin_epoch))
            else:
                print("File " + self.logdir +
                      "/weights_{0:d}.hdf5".format(self.begin_epoch) +
                      "does not exist")
                print("Start training from epoch 0")
                self.begin_epoch = 0

        # Start training
        start_time = datetime.datetime.now()
        generator_train = self.dl.load_batch("train")

        if self.dl.num_val > 0:
            generator_val = self.dl.load_batch("val")
            steps_per_epoch_val = self.dl.steps_per_epoch_val
        else:
            generator_val = None
            steps_per_epoch_val = None

        self.tetranet.fit_generator(
            generator_train,
            steps_per_epoch=self.dl.steps_per_epoch_train,
            initial_epoch=begin_epoch,
            epochs=end_epoch,
            verbose=1,
            callbacks=[
                keras.callbacks.CSVLogger(self.logdir + "/log.csv",
                                          separator=',',
                                          append=True),
                keras.callbacks.ModelCheckpoint(self.logdir +
                                                "/weights_{epoch:d}.hdf5",
                                                period=save_interval),
                keras.callbacks.LearningRateScheduler(self.step_decay)
            ],
            validation_data=generator_val,
            validation_steps=steps_per_epoch_val,
            use_multiprocessing=True,
            workers=0,
            max_queue_size=5,
            shuffle=True)

        print("All processing time: ", datetime.datetime.now() - start_time)

    def predict(self, Imgs, savePaths):
        print("Predict")

        Imgs = np.array(Imgs, dtype=np.float32)
        out = self.tetranet.predict(Imgs)
        for i in range(len(out)):
            utils.saveTSDF_bin(out[i], savePaths[i])
            print("Saved result to: ", savePaths[i])

    def prepare_data(self, datasetroot, imgdirlist, tsdfdirlist, paramdirlist,
                     ids_test, savedir):
        self.datasetroot = datasetroot
        self.imgdirlist = imgdirlist
        self.tsdfdirlist = tsdfdirlist
        self.paramdirlist = paramdirlist
        self.savedir = savedir

        # Parse .txt
        self.imgdirPaths = []
        with open(self.imgdirlist, "r") as f:
            lines = f.read().split()
            for imgdirname in lines:
                if imgdirname[0] == "#":
                    continue
                imgdirpath = self.datasetroot + "/" + imgdirname
                if not exists(imgdirpath):
                    print("Dataset directory {} does not exists.".format(
                        imgdirpath))
                else:
                    self.imgdirPaths += [imgdirpath]
        self.tsdfdirPaths = []
        with open(self.tsdfdirlist, "r") as f:
            lines = f.read().split()
            for tsdfdirname in lines:
                if tsdfdirname[0] == "#":
                    continue
                tsdfdirpath = self.datasetroot + "/" + tsdfdirname
                if not exists(tsdfdirpath):
                    print("Dataset directory {} does not exists.".format(
                        tsdfdirpath))
                else:
                    self.tsdfdirPaths += [tsdfdirpath]
        self.paramdirPaths = []
        with open(self.paramdirlist, "r") as f:
            lines = f.read().split()
            for paramdirname in lines:
                if paramdirname[0] == "#":
                    continue
                paramdirpath = self.datasetroot + "/" + paramdirname
                if not exists(paramdirpath):
                    print("Dataset directory {} does not exists.".format(
                        paramdirpath))
                else:
                    self.paramdirPaths += [paramdirpath]
        print("Read data from:")
        print(self.imgdirPaths)
        print(self.tsdfdirPaths)
        print(self.paramdirPaths)

        # Count number of all dataset
        self.countList = []
        self.nameList_color = []
        self.nameList_TSDF = []
        self.nameList_param = []
        for imgdirpath, tsdfdirpath, paramdirpath in zip(
                self.imgdirPaths, self.tsdfdirPaths, self.paramdirPaths):
            searchpath_color_png = imgdirpath + "/*.png"
            searchpath_color_jpg = imgdirpath + "/*.jpg"
            searchpath_TSDF = tsdfdirpath + "/*.bin"
            searchpath_param = paramdirpath + "/*.pkl"
            names_color = sorted(
                glob.glob(searchpath_color_png) +
                glob.glob(searchpath_color_jpg))
            names_color.sort(
                key=lambda x: len(x))  #String length and Dictionary sort
            names_TSDF = sorted(glob.glob(searchpath_TSDF))
            names_TSDF.sort(
                key=lambda x: len(x))  #String length and Dictionary sort
            names_param = sorted(glob.glob(searchpath_param))
            names_param.sort(
                key=lambda x: len(x))  #String length and Dictionary sort

            if len(names_color) == len(names_TSDF) == len(names_param):
                self.countList += [len(names_color)]
                self.nameList_color += names_color
                self.nameList_TSDF += names_TSDF
                self.nameList_param += names_param
            else:
                print(
                    "The number of the input and target data is not same in:")
                print(imgdirpath, tsdfdirpath, paramdirpath)
                self.countList += [0]
                print("color: {}, TSDF: {}, param: {}".format(
                    len(names_color), len(names_TSDF), len(names_param)))

        print("Num of available dataset: {0:d} (from {1:d} dir(s))".format(
            sum(self.countList), len(self.countList)))
        print(self.countList)

        # Generate index list
        if not ids_test is None:
            print("Select training data by loaded Ids")
            print("Path to Ids_test: {}".format(ids_test))
            self.Ids_all = []
            with open(ids_test, "r") as f:
                lines = f.read().split()
                for idx in lines:
                    self.Ids_all += [int(idx)]
            self.Ids_all = np.array(self.Ids_all)
            if len(self.Ids_all) > sum(self.countList):
                print("Invalid inputs")
                sys.exit()
            self.nameList_color = np.array(self.nameList_color)[self.Ids_all]
            self.nameList_TSDF = np.array(self.nameList_TSDF)[self.Ids_all]
            self.nameList_param = np.array(self.nameList_param)[self.Ids_all]

        # Copy all data
        if not exists(self.savedir + "/imgs"):
            os.makedirs(self.savedir + "/imgs")
        if not exists(self.savedir + "/TSDF_GT"):
            os.makedirs(self.savedir + "/TSDF_GT")
        if not exists(self.savedir + "/params"):
            os.makedirs(self.savedir + "/params")
        for i, (imgpath, TSDFpath, parampath) in enumerate(
                zip(self.nameList_color, self.nameList_TSDF,
                    self.nameList_param)):
            shutil.copyfile(imgpath, self.savedir + "/imgs/{}.png".format(i))
            shutil.copyfile(TSDFpath,
                            self.savedir + "/TSDF_GT/{}.bin".format(i))
            shutil.copyfile(parampath,
                            self.savedir + "/params/{}.pkl".format(i))
        print("Saved imgs to: " + self.savedir + "/imgs")
        print("Saved TSDFs to: " + self.savedir + "/TSDF_GT")
        print("Saved params to: " + self.savedir + "/params")

        # Load images
        print("Loading input imgs")
        Imgs = [
            cv2.resize(cv2.imread(imgpath, -1), (256, 256))[:, :, 0:3]
            for imgpath in self.nameList_color
        ]
        Imgs = np.array(Imgs, dtype=np.float32)
        print("Predict & save TSDFs...")
        out = self.tetranet.predict(Imgs)
        if not exists(self.savedir + "/TSDF_pred"):
            os.makedirs(self.savedir + "/TSDF_pred")
        for i in range(len(out)):
            utils.saveTSDF_bin(out[i],
                               self.savedir + "/TSDF_pred/{}.bin".format(i))
        print("Saved predocted TSDFs to: {}".format(self.savedir +
                                                    "/TSDF_pred"))
Пример #6
0
def test_load_directory_data():
    invalid_directory_path = 'invalid_directory_path'
    valid_dummy_directory = './resources/dummy_data_directory'
    empty_dummy_directory = './resources/dummy_empty_data_directory'
    channels = 1

    # should raise error when receives an invalid directory path
    with pytest.raises(NotADirectoryError):
        DataLoader(from_csv=False, datapath=invalid_directory_path)

    # should raise error when tries to load empty directory
    data_loader = DataLoader(from_csv=False, datapath=empty_dummy_directory)
    with pytest.raises(AssertionError):
        data_loader.get_data()

    # should assign an image's parent directory name as its label
    data_loader = DataLoader(from_csv=False, datapath=valid_dummy_directory)
    images, labels, label_index_map = data_loader.get_data()
    label_count = len(label_index_map.keys())
    label = [0] * label_count
    label[label_index_map['happiness']] = 1
    assert label == labels[0]

    data_loader = DataLoader(from_csv=False, datapath=valid_dummy_directory)
    images, labels, label_index_map = data_loader.get_data()
    # should return non-empty image and label arrays when given valid arguments
    assert len(images) > 0 and len(labels) > 0
    # should return same number of labels and images when given valid arguments
    assert len(images) == len(labels)
    # should reshape image to contain channel_axis in channel_last format
    assert images.shape[-1] == channels
Пример #7
0
def test_load_csv_data():
    invalid_csv_file_path = 'invalid_csv_file_path'
    channels = 1
    invalid_image_dimensions = (50, 77)
    invalid_target_labels = [8, 9, 10]

    # should raise error when not given csv column indices for images and labels
    with pytest.raises(ValueError):
        DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                   image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col)

    # should raise error when given invalid csv file path
    with pytest.raises(FileNotFoundError):
        DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=invalid_csv_file_path,
                   image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col, csv_image_col=csv_image_col)

    # should raise error when given invalid csv column indices
    with pytest.raises(ValueError):
        DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                   image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col, csv_image_col=10)

    # should raise error when given empty target_labels list
    with pytest.raises(ValueError):
        DataLoader(from_csv=True, datapath=valid_csv_file_path, image_dimensions=valid_image_dimensions,
                   csv_label_col=csv_label_col, csv_image_col=csv_image_col)

    # should raise error when not given image dimensions
    with pytest.raises(ValueError):
        DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                   csv_label_col=csv_label_col, csv_image_col=csv_image_col)

    # should raise error when given invalid image dimensions
    with pytest.raises(ValueError):
        DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                   image_dimensions=invalid_image_dimensions, csv_label_col=csv_label_col, csv_image_col=csv_image_col)

    # should raise error if no image samples found in csv file
    with pytest.raises(AssertionError):
        data_loader = DataLoader(from_csv=True, target_labels=invalid_target_labels, datapath=valid_csv_file_path,
                                 image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col,
                                 csv_image_col=csv_image_col)
        data_loader.get_data()

    data_loader = DataLoader(from_csv=True, target_labels=valid_target_labels, datapath=valid_csv_file_path,
                             image_dimensions=valid_image_dimensions, csv_label_col=csv_label_col,
                             csv_image_col=csv_image_col)
    images, labels = data_loader.get_data()
    # should return non-empty image and label arrays when given valid arguments
    assert len(images) > 0 and len(labels) > 0
    # should return same number of labels and images when given valid arguments
    assert len(images) == len(labels)
    # should reshape the images to given valid image_dimensions
    assert list(images.shape[1:]) == list(valid_image_dimensions) + [channels]
Пример #8
0
def test_load_time_series_directory_data():
    invalid_directory_path = 'invalid_directory_path'
    valid_dummy_directory = './resources/dummy_time_series_data_directory'
    empty_dummy_directory = './resources/dummy_empty_data_directory'
    valid_time_steps = 4
    channels = 1

    # should raise error when receives an invalid directory path
    with pytest.raises(NotADirectoryError):
        DataLoader(from_csv=False, datapath=invalid_directory_path, time_steps=4)

    # should raise error when tries to load empty directory
    data_loader = DataLoader(from_csv=False, datapath=empty_dummy_directory, time_steps=4)
    with pytest.raises(AssertionError):
        data_loader.get_data()

    # should raise error when given time_step argument that is less than 1
    with pytest.raises(ValueError):
        DataLoader(from_csv=False, datapath=valid_dummy_directory, time_steps=-4)

    # should raise error when given time_step argument that not an integer
    with pytest.raises(ValueError):
        DataLoader(from_csv=False, datapath=valid_dummy_directory, time_steps=4.7)

    # should raise error when tries to load time series sample
    # containing a quantity of images less than the time_steps argument
    with pytest.raises(ValueError):
        data_loader = DataLoader(from_csv=False, datapath=valid_dummy_directory, time_steps=10)
        data_loader.get_data()

    # should assign an image's parent directory name as its label
    data_loader = DataLoader(from_csv=False, datapath=valid_dummy_directory, time_steps=valid_time_steps)
    samples, labels, label_index_map = data_loader.get_data()
    label_count = len(label_index_map.keys())
    label = [0] * label_count
    label[label_index_map['happiness']] = 1
    assert label == labels[0]

    data_loader = DataLoader(from_csv=False, datapath=valid_dummy_directory, time_steps=valid_time_steps)
    samples, labels, label_index_map = data_loader.get_data()
    # should return non-empty image and label arrays when given valid arguments
    assert len(samples) > 0 and len(labels) > 0
    # should return same number of labels and images when given valid arguments
    assert len(samples) == len(labels)
    # should reshape image to contain channel_axis in channel_last format
    assert samples.shape[1] == valid_time_steps
    # should reshape image to contain channel_axis in channel_last format
    assert samples.shape[-1] == channels