Пример #1
0
def compute_statistics_samples(ids, boolean_classes=True, attr=0):
    '''
    Compute the max and min values for normalizing the data.
    
    
    print max and min.
    These values will be computed only once and the max min values
    will be place as constants
    
    @param ids: ids for train
    '''

    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    counter_list_class = {}

    if boolean_classes:
        NUM_CLASSES = 8
    else:
        NUM_CLASSES = 2

    for cl in range(NUM_CLASSES):
        counter_list_class[cl] = []

    hist_classes_all = np.zeros((NUM_CLASSES))
    for P in persons:
        if P in ids:
            for r, R in enumerate(recordings):
                if P in ["S01", "S02", "S03", "S04", "S05", "S06"]:
                    S = "L01"
                else:
                    S = SCENARIO[r]
                for N in repetition:
                    annotator_file = annotator[P]
                    if P == 'S07' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S14' and SCENARIO[r] == 'L03':
                        annotator_file = "A19"
                    if P == 'S11' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S11' and R in [
                            'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13',
                            'R15'
                    ]:
                        annotator_file = "A02"
                    if P == 'S13' and R in ['R28']:
                        annotator_file = "A01"
                    if P == 'S13' and R in ['R29', 'R30']:
                        annotator_file = "A11"
                    if P == 'S09' and R in ['R28', 'R29']:
                        annotator_file = "A01"
                    if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']:
                        annotator_file = "A11"
                    file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format(
                        P, S, P, R, annotator_file, N)

                    try:
                        data = csv_reader.reader_labels(FOLDER_PATH +
                                                        file_name_label)
                        labels = data[:, attr]
                        print("Files loaded")

                        for c in range(NUM_CLASSES):

                            #indexes per class
                            idxs = np.where(labels == c)[0]
                            counter = 0

                            #counting if continuity in labels
                            for idx in range(idxs.shape[0] - 1):
                                if idxs[idx + 1] - idxs[idx] == 1:
                                    counter += 1
                                else:
                                    counter_list_class[c].append(counter)
                                    counter = 0

                                if (idx + 1) == (idxs.shape[0] - 1):
                                    counter_list_class[c].append(counter)
                                    counter = 0
                        #Statistics

                        hist_classes = np.bincount(labels.astype(int),
                                                   minlength=NUM_CLASSES)
                        hist_classes_all += hist_classes
                    except:
                        print("No file {}".format(FOLDER_PATH +
                                                  file_name_label))

    fig = plt.figure()
    axis_list = []
    axis_list.append(fig.add_subplot(421))
    axis_list.append(fig.add_subplot(422))
    axis_list.append(fig.add_subplot(423))
    axis_list.append(fig.add_subplot(424))
    axis_list.append(fig.add_subplot(425))
    axis_list.append(fig.add_subplot(426))
    axis_list.append(fig.add_subplot(427))
    axis_list.append(fig.add_subplot(428))

    fig2 = plt.figure()
    axis_list_2 = []
    axis_list_2.append(fig2.add_subplot(111))

    fig3 = plt.figure()
    axis_list_3 = []
    axis_list_3.append(fig3.add_subplot(421))
    axis_list_3.append(fig3.add_subplot(422))
    axis_list_3.append(fig3.add_subplot(423))
    axis_list_3.append(fig3.add_subplot(424))
    axis_list_3.append(fig3.add_subplot(425))
    axis_list_3.append(fig3.add_subplot(426))
    axis_list_3.append(fig3.add_subplot(427))
    axis_list_3.append(fig3.add_subplot(428))

    colours = {
        0: 'b',
        1: 'g',
        2: 'r',
        3: 'c',
        4: 'm',
        5: 'y',
        6: 'k',
        7: 'greenyellow'
    }

    mins = []
    mus = []
    sigmas = []
    min_1_data = []
    min_2_data = []
    min_3_data = []
    medians = []
    lower_whiskers = []
    Q1s = []
    for cl in range(NUM_CLASSES):
        mu = np.mean(np.array(counter_list_class[cl]))
        sigma = np.std(np.array(counter_list_class[cl]))

        mus.append(mu)
        sigmas.append(sigma)
        min_1_data.append(-1 * sigma + mu)
        min_2_data.append(-2 * sigma + mu)
        min_3_data.append(-3 * sigma + mu)
        mins.append(np.min(np.array(counter_list_class[cl])))
        medians.append(np.median(np.array(counter_list_class[cl])))

        x = np.linspace(-3 * sigma + mu, 3 * sigma + mu, 100)

        axis_list[cl].plot(x,
                           norm.pdf(x, mu, sigma) /
                           np.float(np.max(norm.pdf(x, mu, sigma))),
                           '-b',
                           label='mean:{}_std:{}'.format(mu, sigma))
        axis_list[cl].plot(counter_list_class[cl],
                           np.ones(len(counter_list_class[cl])), 'ro')
        result_box = axis_list[cl].boxplot(counter_list_class[cl], vert=False)
        lower_whiskers.append(result_box['whiskers'][0].get_data()[0][0])
        Q1s.append(result_box['whiskers'][0].get_data()[0][1])

        axis_list_2[0].plot(x,
                            norm.pdf(x, mu, sigma) /
                            np.float(np.max(norm.pdf(x, mu, sigma))),
                            '-b',
                            label='mean:{}_std:{}'.format(mu, sigma),
                            color=colours[cl])
        axis_list_2[0].plot(counter_list_class[cl],
                            np.ones(len(counter_list_class[cl])), 'ro')
        #color = colours[cl], marker='o')

        axis_list_3[cl].boxplot(counter_list_class[cl])

        axis_list_2[0].relim()
        axis_list_2[0].autoscale_view()
        axis_list_2[0].legend(loc='best')

        fig.canvas.draw()
        fig2.canvas.draw()
        plt.pause(2.0)

    print("Mins {} Min {} Argmin {}".format(mins, np.min(mins),
                                            np.argmin(mins)))
    print("Means {} Min {} Argmin {}".format(mus, np.min(mus), np.argmin(mus)))
    print("Stds {} Min {}".format(sigmas, sigmas[np.argmin(mus)]))
    print("Medians {} Min {} Argmin {}".format(medians, np.min(medians),
                                               np.argmin(medians)))
    print("Lower Whiskers {} Min {} Argmin {}".format(
        lower_whiskers, np.min(lower_whiskers), np.argmin(lower_whiskers)))
    print("Q1s {} Min {} Argmin {}".format(Q1s, np.min(Q1s), np.argmin(Q1s)))

    print("1sigma from mu {}".format(min_1_data))
    print("2sigma from mu {}".format(min_2_data))
    print("3sigma from mu {}".format(min_3_data))

    print("Min 1sigma from mu {}".format(np.min(min_1_data)))
    print("Min 2sigma from mu {}".format(np.min(min_2_data)))
    print("Min 3sigma from mu {}".format(np.min(min_3_data)))

    print("Number of samples per class {}".format(hist_classes_all))
    print("Number of samples per class {}".format(
        hist_classes_all / np.float(np.sum(hist_classes_all)) * 100))

    return
Пример #2
0
def generate_data(ids,
                  sliding_window_length,
                  sliding_window_step,
                  data_dir=None,
                  half=False,
                  identity_bool=False,
                  usage_modus='train'):
    '''
    creates files for each of the sequences, which are extracted from a file
    following a sliding window approach
    
    returns a numpy array
    
    @param ids: ids for train, val or test
    @param sliding_window_length: length of window for segmentation
    @param sliding_window_step: step between windows for segmentation
    @param data_dir: path to dir where files will be stored
    '''

    if identity_bool:
        if usage_modus == 'train':
            recordings = ['R{:02d}'.format(r) for r in range(1, 21)]
        elif usage_modus == 'val':
            recordings = ['R{:02d}'.format(r) for r in range(21, 26)]
        elif usage_modus == 'test':
            recordings = ['R{:02d}'.format(r) for r in range(26, 31)]
    else:
        recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    counter_seq = 0
    hist_classes_all = np.zeros(NUM_CLASSES)

    for P in persons:
        if P not in ids:
            print("\nNo Person in expected IDS {}".format(P))
        else:
            if P == 'S11':
                if identity_bool:
                    if usage_modus == 'train':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(1, 10)
                        ]
                    elif usage_modus == 'val':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(10, 12)
                        ]
                    elif usage_modus == 'test':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(12, 15)
                        ]
                else:
                    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]
            elif P == 'S12':
                if identity_bool:
                    if usage_modus == 'train':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(1, 25)
                        ]
                    elif usage_modus == 'val':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(25, 28)
                        ]
                    elif usage_modus == 'test':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(28, 31)
                        ]
                else:
                    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]
            else:
                if identity_bool:
                    if usage_modus == 'train':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(1, 21)
                        ]
                    elif usage_modus == 'val':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(21, 26)
                        ]
                    elif usage_modus == 'test':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(26, 31)
                        ]
                else:
                    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]
            for R in recordings:
                if P in ["S01", "S02", "S03", "S04", "S05", "S06"]:
                    S = "L01"
                else:
                    S = SCENARIO[R]
                for N in repetition:
                    annotator_file = annotator[P]
                    if P == 'S07' and SCENARIO[R] == 'L01':
                        annotator_file = "A03"
                    if P == 'S14' and SCENARIO[R] == 'L03':
                        annotator_file = "A19"
                    if P == 'S11' and SCENARIO[R] == 'L01':
                        annotator_file = "A03"
                    if P == 'S11' and R in [
                            'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13',
                            'R15'
                    ]:
                        annotator_file = "A02"
                    if P == 'S13' and R in ['R28']:
                        annotator_file = "A01"
                    if P == 'S13' and R in ['R29', 'R30']:
                        annotator_file = "A11"
                    if P == 'S09' and R in ['R28', 'R29']:
                        annotator_file = "A01"
                    if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']:
                        annotator_file = "A11"

                    file_name_norm = "{}/{}_{}_{}_{}_{}_norm_data.csv".format(
                        P, S, P, R, annotator_file, N)
                    file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format(
                        P, S, P, R, annotator_file, N)

                    try:
                        #getting data
                        data = csv_reader.reader_data(FOLDER_PATH +
                                                      file_name_norm)
                        print("\nFiles loaded in modus {}\n{}".format(
                            usage_modus, file_name_norm))
                        data = select_columns_opp(data)
                        print("Columns selected")
                    except:
                        print("\n In generating data, No file {}".format(
                            FOLDER_PATH + file_name_norm))
                        continue

                    try:
                        #Getting labels and attributes
                        labels = csv_reader.reader_labels(FOLDER_PATH +
                                                          file_name_label)
                        class_labels = np.where(labels[:, 0] == 7)[0]

                        # Deleting rows containing the "none" class
                        data = np.delete(data, class_labels, 0)
                        labels = np.delete(labels, class_labels, 0)

                        if half:
                            downsampling = range(0, data.shape[0], 2)
                            data = data[downsampling]
                            labels = labels[downsampling]
                            data_t, data_x, data_y = divide_x_y(data)
                            del data_t
                        else:
                            data_t, data_x, data_y = divide_x_y(data)
                            del data_t

                    except:
                        print(
                            "\n In generating data, Error getting the data {}".
                            format(FOLDER_PATH + file_name_norm))
                        continue

                    try:
                        # checking if annotations are consistent
                        data_x = normalize(data_x)
                        if np.sum(data_y == labels[:, 0]) == data_y.shape[0]:

                            # Sliding window approach

                            print("Starting sliding window")
                            X, y, y_all = opp_sliding_window(
                                data_x,
                                labels.astype(int),
                                sliding_window_length,
                                sliding_window_step,
                                label_pos_end=False)
                            print("Windows are extracted")

                            # Statistics
                            hist_classes = np.bincount(y[:, 0],
                                                       minlength=NUM_CLASSES)
                            hist_classes_all += hist_classes
                            print("Number of seq per class {}".format(
                                hist_classes_all))

                            for f in range(X.shape[0]):
                                try:

                                    sys.stdout.write('\r' +
                                                     'Creating sequence file '
                                                     'number {} with id {}'.
                                                     format(f, counter_seq))
                                    sys.stdout.flush()

                                    # print "Creating sequence file number {} with id {}".format(f, counter_seq)
                                    seq = np.reshape(X[f],
                                                     newshape=(1, X.shape[1],
                                                               X.shape[2]))
                                    seq = np.require(seq, dtype=np.float)

                                    obj = {
                                        "data": seq,
                                        "label": y[f],
                                        "labels": y_all[f],
                                        "identity": labels_persons[P]
                                    }
                                    f = open(
                                        os.path.join(
                                            data_dir, 'seq_{0:06}.pkl'.format(
                                                counter_seq)), 'wb')
                                    pickle.dump(
                                        obj,
                                        f,
                                        protocol=pickle.HIGHEST_PROTOCOL)
                                    f.close()

                                    counter_seq += 1

                                except:
                                    raise ('\nError adding the seq')

                            print("\nCorrect data extraction from {}".format(
                                FOLDER_PATH + file_name_norm))

                            del data
                            del data_x
                            del data_y
                            del X
                            del labels
                            del class_labels

                        else:
                            print("\nNot consisting annotation in  {}".format(
                                file_name_norm))
                            continue

                    except:
                        print("\n In generating data, No file {}".format(
                            FOLDER_PATH + file_name_norm))

    return
Пример #3
0
def generate_data(ids,
                  sliding_window_length,
                  sliding_window_step,
                  data_dir=None,
                  identity_bool=False,
                  usage_modus='train'):
    '''
    creates files for each of the sequences, which are extracted from a file
    following a sliding window approach

    returns
    Sequences are stored in given path

    @param ids: ids for train, val or test
    @param sliding_window_length: length of window for segmentation
    @param sliding_window_step: step between windows for segmentation
    @param data_dir: path to dir where files will be stored
    @param identity_bool: selecting for identity experiment
    @param usage_modus: selecting Train, Val or testing
    '''

    persons = [
        "S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08", "S09", "S10",
        "S11", "S12", "S13", "S14"
    ]

    idx_train = {
        "S07": 21,
        "S08": 21,
        "S09": 21,
        "S10": 11,
        "S11": 11,
        "S12": 23,
        "S13": 21,
        "S14": 21
    }
    idx_val = {
        "S07": 26,
        "S08": 26,
        "S09": 26,
        "S10": 16,
        "S11": 13,
        "S12": 26,
        "S13": 26,
        "S14": 26
    }
    idx_test = {
        "S07": 31,
        "S08": 31,
        "S09": 31,
        "S10": 24,
        "S11": 16,
        "S12": 31,
        "S13": 31,
        "S14": 31
    }

    counter_seq = 0
    hist_classes_all = np.zeros((NUM_CLASSES))

    for P in persons:
        if P not in ids:
            print("\n6 No Person in expected IDS {}".format(P))
        else:
            if identity_bool:
                # Selecting the proportions of the train, val or testing according to the quantity of
                # recordings per subject, as there are not equal number of recordings per subject
                # see dataset for checking the recording files per subject
                if usage_modus == 'train':
                    recordings = [
                        'R{:02d}'.format(rec)
                        for rec in range(1, idx_train[P])
                    ]
                elif usage_modus == 'val':
                    recordings = [
                        'R{:02d}'.format(rec)
                        for rec in range(idx_train[P], idx_val[P])
                    ]
                elif usage_modus == 'test':
                    recordings = [
                        'R{:02d}'.format(rec)
                        for rec in range(idx_val[P], idx_test[P])
                    ]
            else:
                recordings = ['R{:02d}'.format(rec) for rec in range(1, 31, 1)]
                # recordings = ['R{:02d}'.format(r) for r in range(1, 31, 2)]
            print("\nModus {} \n{}".format(usage_modus, recordings))
            for R in recordings:
                try:
                    S = SCENARIO[R]
                    file_name_data = "{}/{}_{}_{}.csv".format(P, S, P, R)
                    file_name_label = "{}/{}_{}_{}_labels.csv".format(
                        P, S, P, R)
                    print("\n{}\n{}".format(file_name_data, file_name_label))
                    try:
                        # getting data
                        data = reader_data(FOLDER_PATH + file_name_data)
                        print("\nFiles loaded in modus {}\n{}".format(
                            usage_modus, file_name_data))
                        data_x = data["data"]
                        print("\nFiles loaded")
                    except:
                        print("\n1 In loading data,  in file {}".format(
                            FOLDER_PATH + file_name_data))
                        continue

                    try:
                        # Getting labels and attributes
                        labels = csv_reader.reader_labels(FOLDER_PATH +
                                                          file_name_label)
                        class_labels = np.where(labels[:, 0] == 7)[0]

                        # Deleting rows containing the "none" class
                        data_x = np.delete(data_x, class_labels, 0)
                        labels = np.delete(labels, class_labels, 0)

                        #data_t, data_x, data_y = divide_x_y(data)
                        #del data_t
                    except:
                        print(
                            "2 In generating data, Error getting the data {}".
                            format(FOLDER_PATH + file_name_data))
                        continue
                    try:
                        data_x = norm_mbientlab(data_x)
                    except:
                        print("\n3  In generating data, Plotting {}".format(
                            FOLDER_PATH + file_name_data))
                        continue
                    try:
                        # checking if annotations are consistent
                        if data_x.shape[0] == data_x.shape[0]:

                            # Sliding window approach
                            print("\nStarting sliding window")
                            X, y, y_all = opp_sliding_window(
                                data_x,
                                labels.astype(int),
                                sliding_window_length,
                                sliding_window_step,
                                label_pos_end=False)
                            print("\nWindows are extracted")

                            # Statistics

                            hist_classes = np.bincount(y[:, 0],
                                                       minlength=NUM_CLASSES)
                            hist_classes_all += hist_classes
                            print("\nNumber of seq per class {}".format(
                                hist_classes_all))

                            for f in range(X.shape[0]):
                                try:

                                    sys.stdout.write(
                                        '\r' +
                                        'Creating sequence file number {} with id {}'
                                        .format(f, counter_seq))
                                    sys.stdout.flush()

                                    # print "Creating sequence file number {} with id {}".format(f, counter_seq)
                                    seq = np.reshape(X[f],
                                                     newshape=(1, X.shape[1],
                                                               X.shape[2]))
                                    seq = np.require(seq, dtype=np.float)

                                    obj = {
                                        "data": seq,
                                        "label": y[f],
                                        "labels": y_all[f],
                                        "identity": labels_persons[P]
                                    }
                                    file_name = open(
                                        os.path.join(
                                            data_dir, 'seq_{0:06}.pkl'.format(
                                                counter_seq)), 'wb')
                                    pickle.dump(
                                        obj,
                                        file_name,
                                        protocol=pickle.HIGHEST_PROTOCOL)
                                    file_name.close()

                                    counter_seq += 1

                                except:
                                    raise ('\nError adding the seq')

                            print("\nCorrect data extraction from {}".format(
                                FOLDER_PATH + file_name_data))

                            del data
                            del data_x
                            del X
                            del labels
                            del class_labels

                        else:
                            print(
                                "\n4 Not consisting annotation in  {}".format(
                                    file_name_data))
                            continue
                    except:
                        print("\n5 In generating data, No created file {}".
                              format(FOLDER_PATH + file_name_data))
                    print(
                        "-----------------\n{}\n{}\n-----------------".format(
                            file_name_data, file_name_label))
                except KeyboardInterrupt:
                    print('\nYou cancelled the operation.')

    return
Пример #4
0
def compute_min_num_samples(ids, boolean_classes=True, attr=0):
    '''
    Compute the max and min values for normalizing the data.
    
    
    print max and min.
    These values will be computed only once and the max min values
    will be place as constants
    
    @param ids: ids for train
    '''

    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    if boolean_classes:
        NUM_CLASSES = 8
    else:
        NUM_CLASSES = 2

    #min_durations = np.ones((NUM_CLASSES)) * 10000000
    min_durations = np.empty((0, NUM_CLASSES))
    hist_classes_all = np.zeros((NUM_CLASSES))
    for P in persons:
        if P in ids:
            for r, R in enumerate(recordings):
                if P in ["S01", "S02", "S03", "S04", "S05", "S06"]:
                    S = "L01"
                else:
                    S = SCENARIO[r]
                for N in repetition:
                    annotator_file = annotator[P]
                    if P == 'S07' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S14' and SCENARIO[r] == 'L03':
                        annotator_file = "A19"
                    if P == 'S11' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S11' and R in [
                            'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13',
                            'R15'
                    ]:
                        annotator_file = "A02"
                    if P == 'S13' and R in ['R28']:
                        annotator_file = "A01"
                    if P == 'S13' and R in ['R29', 'R30']:
                        annotator_file = "A11"
                    if P == 'S09' and R in ['R28', 'R29']:
                        annotator_file = "A01"
                    if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']:
                        annotator_file = "A11"
                    file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format(
                        P, S, P, R, annotator_file, N)

                    try:
                        data = csv_reader.reader_labels(FOLDER_PATH +
                                                        file_name_label)
                        labels = data[:, attr]
                        print("Files loaded")

                        min_duration = np.zeros((1, NUM_CLASSES))
                        for c in range(NUM_CLASSES):

                            #indexes per class
                            idxs = np.where(labels == c)[0]
                            counter = 0
                            min_counter = np.Inf
                            #counting if continuity in labels
                            for idx in range(idxs.shape[0] - 1):
                                if idxs[idx + 1] - idxs[idx] == 1:
                                    counter += 1
                                else:
                                    if counter < min_counter:
                                        min_counter = counter
                                        counter = 0
                            if counter < min_counter:
                                min_counter = counter
                                counter = 0
                            min_duration[0, c] = min_counter

                            print("class  {} counter size {}".format(
                                c, min_counter))

                        min_durations = np.append(min_durations,
                                                  min_duration,
                                                  axis=0)
                        #Statistics

                        hist_classes = np.bincount(labels.astype(int),
                                                   minlength=NUM_CLASSES)
                        hist_classes_all += hist_classes

                    except:
                        print("No file {}".format(FOLDER_PATH +
                                                  file_name_label))

    min_durations[min_durations == 0] = np.Inf
    print("Minimal duration per class \n{}".format(min_durations))

    print("Number of samples per class {}".format(hist_classes_all))
    print("Number of samples per class {}".format(
        hist_classes_all / np.float(np.sum(hist_classes_all)) * 100))

    return np.min(min_durations, axis=0)
Пример #5
0
def generate_data(ids,
                  sliding_window_length,
                  sliding_window_step,
                  data_dir=None,
                  identity_bool=False,
                  usage_modus='train'):
    '''
    creates files for each of the sequences extracted from a file
    following a sliding window approach


    returns a numpy array

    @param ids: ids for train, val or test
    @param sliding_window_length: length of window for segmentation
    @param sliding_window_step: step between windows for segmentation
    @param data_dir: path to dir where files will be stored
    '''

    dataset_path_imu = "/vol/actrec/DFG_Project/2019/LARa_dataset/Motionminers/LARa_dataset_mbientlab/"
    #dataset_path_imu = "/Users/fernandomoyarueda/Documents/Doktorado/DFG_project/Data/" \
    #                         "MotionMiners_FLW/flw_recordings_12000/"

    persons = [
        "S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08", "S09", "S10",
        "S11", "S12", "S13", "S14"
    ]

    idx_train = {
        "S07": 21,
        "S08": 21,
        "S09": 21,
        "S10": 11,
        "S11": 11,
        "S12": 23,
        "S13": 21,
        "S14": 21
    }
    idx_val = {
        "S07": 26,
        "S08": 26,
        "S09": 26,
        "S10": 16,
        "S11": 13,
        "S12": 26,
        "S13": 26,
        "S14": 26
    }
    idx_test = {
        "S07": 31,
        "S08": 31,
        "S09": 31,
        "S10": 24,
        "S11": 16,
        "S12": 31,
        "S13": 31,
        "S14": 31
    }

    counter_seq = 0
    hist_classes_all = np.zeros((NUM_CLASSES))
    counter_file_label = -1

    #g, ax_x = plt.subplots(2, sharex=False)
    #line3, = ax_x[0].plot([], [], '-b', label='blue')
    #line4, = ax_x[1].plot([], [], '-b', label='blue')
    for P in persons:
        if P not in ids:
            print("\n6 No Person in expected IDS {}".format(P))
        else:
            if identity_bool:
                if usage_modus == 'train':
                    recordings = [
                        'R{:02d}'.format(rec)
                        for rec in range(1, idx_train[P])
                    ]
                elif usage_modus == 'val':
                    recordings = [
                        'R{:02d}'.format(rec)
                        for rec in range(idx_train[P], idx_val[P])
                    ]
                elif usage_modus == 'test':
                    recordings = [
                        'R{:02d}'.format(rec)
                        for rec in range(idx_val[P], idx_test[P])
                    ]
            else:
                recordings = ['R{:02d}'.format(rec) for rec in range(1, 31, 1)]
                # recordings = ['R{:02d}'.format(r) for r in range(1, 31, 2)]
            print("\nModus {} \n{}".format(usage_modus, recordings))
            for R in recordings:
                try:
                    S = SCENARIO[R]
                    file_name_data = "{}/{}_{}_{}.csv".format(P, S, P, R)
                    file_name_label = "{}/{}_{}_{}_labels.csv".format(
                        P, S, P, R)
                    print("\n{}\n{}".format(file_name_data, file_name_label))
                    try:
                        # getting data
                        data = read_extracted_data(dataset_path_imu +
                                                   file_name_data,
                                                   skiprows=1)
                        print("\nFiles loaded in modus {}\n{}".format(
                            usage_modus, file_name_data))
                        data_x = data[:, 2:]
                        print("\nFiles loaded")
                    except:
                        print("\n1 Error In loading data,  in file {}".format(
                            dataset_path_imu + file_name_data))
                        continue

                    try:
                        # Getting labels and attributes
                        labels = csv_reader.reader_labels(dataset_path_imu +
                                                          file_name_label)
                        class_labels = np.where(labels[:, 0] == 7)[0]

                        # Deleting rows containing the "none" class
                        data_x = np.delete(data_x, class_labels, 0)
                        labels = np.delete(labels, class_labels, 0)

                        #data_t, data_x, data_y = divide_x_y(data)
                        #del data_t

                    except:
                        print(
                            "2 In generating data, Error getting the data {}".
                            format(dataset_path_imu + file_name_data))
                        continue

                    try:
                        # Graphic Vals for X in T
                        #line3.set_ydata(data_x[:, 0].flatten())
                        #line3.set_xdata(range(len(data_x[:, 0].flatten())))
                        #ax_x[0].relim()
                        #ax_x[0].autoscale_view()
                        #plt.draw()
                        #plt.pause(2.0)

                        data_x = norm_mbientlab(data_x)

                        #line4.set_ydata(data_x[:, 0].flatten())
                        #line4.set_xdata(range(len(data_x[:, 0].flatten())))
                        #ax_x[1].relim()
                        #ax_x[1].autoscale_view()
                        #plt.draw()
                        #plt.pause(2.0)
                    except:
                        print("\n3  In generating data, Plotting {}".format(
                            dataset_path_imu + file_name_data))
                        continue

                    try:
                        # checking if annotations are consistent
                        if data_x.shape[0] == labels.shape[0]:
                            # Sliding window approach
                            print("\nStarting sliding window")
                            X, y, y_all = opp_sliding_window(
                                data_x,
                                labels.astype(int),
                                sliding_window_length,
                                sliding_window_step,
                                label_pos_end=False)
                            print("\nWindows are extracted")

                            # Statistics

                            hist_classes = np.bincount(y[:, 0],
                                                       minlength=NUM_CLASSES)
                            hist_classes_all += hist_classes
                            print("\nNumber of seq per class {}".format(
                                hist_classes_all))

                            counter_file_label += 1

                            for f in range(X.shape[0]):
                                try:

                                    # print "Creating sequence file number {} with id {}".format(f, counter_seq)
                                    seq = np.reshape(X[f],
                                                     newshape=(1, X.shape[1],
                                                               X.shape[2]))
                                    seq = np.require(seq, dtype=float)

                                    obj = {
                                        "data": seq,
                                        "label": y[f],
                                        "labels": y_all[f],
                                        "identity": labels_persons[P],
                                        "label_file": counter_file_label
                                    }
                                    file_name = open(
                                        os.path.join(
                                            data_dir, 'seq_{0:07}.pkl'.format(
                                                counter_seq)), 'wb')
                                    pickle.dump(
                                        obj,
                                        file_name,
                                        protocol=pickle.HIGHEST_PROTOCOL)
                                    counter_seq += 1

                                    sys.stdout.write(
                                        '\r' +
                                        'Creating sequence file number {} with id {}'
                                        .format(f, counter_seq))
                                    sys.stdout.flush()

                                    file_name.close()

                                except:
                                    raise (
                                        '\nError adding the seq {} from {} \n'.
                                        format(f, X.shape[0]))

                            print("\nCorrect data extraction from {}".format(
                                dataset_path_imu + file_name_data))

                            del data
                            del data_x
                            del X
                            del labels
                            del class_labels

                        else:
                            print(
                                "\n4 Not consisting annotation in  {}".format(
                                    file_name_data))
                            continue
                    except:
                        print("\n5 In generating data, No created file {}".
                              format(dataset_path_imu + file_name_data))
                    print(
                        "-----------------\n{}\n{}\n-----------------".format(
                            file_name_data, file_name_label))
                except KeyboardInterrupt:
                    print('\nYou cancelled the operation.')

    return
Пример #6
0
def create_labels_imus_from_mocap():
    '''
    fig = plt.figure()
    axis_list = []
    plot_list = []
    axis_list.append(fig.add_subplot(411))
    axis_list.append(fig.add_subplot(412))
    axis_list.append(fig.add_subplot(413))
    axis_list.append(fig.add_subplot(414))

    plot_list.append(axis_list[0].plot([], [], '-r', label='MOCAP Annotation', linewidth=0.30)[0])
    plot_list.append(axis_list[1].plot([], [], '-b', label='Mocap Data', linewidth=0.15)[0])
    plot_list.append(axis_list[2].plot([], [], '-b', label='IMUs Annotation', linewidth=0.15)[0])
    plot_list.append(axis_list[3].plot([], [], '-b', label='IMUs Data', linewidth=0.15)[0])
    '''

    dir_dataset = "/vol/actrec/DFG_Project/2019/LARa_dataset/MoCap/recordings_2019/15_Annotated_Dataset_Corrected/"
    dataset_path_imu_sequences = "/vol/actrec/DFG_Project/2019/LARa_dataset/Motionminers/2019/flw_recordings_annotated_revised/"

    annotator = {
        "S01": "A17",
        "S02": "A03",
        "S03": "A08",
        "S04": "A06",
        "S05": "A12",
        "S06": "A13",
        "S07": "A05",
        "S08": "A17",
        "S09": "A03",
        "S10": "A18",
        "S11": "A08",
        "S12": "A11",
        "S13": "A08",
        "S14": "A06"
    }

    persons = [
        "S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08", "S09", "S10",
        "S11", "S12", "S13", "S14"
    ]
    repetition = ["N01", "N02"]

    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    for idp, P in enumerate(persons):
        for counter, r in enumerate(recordings):
            try:
                for N in repetition:
                    print(
                        "\n\n------------------------------------------------------------------"
                    )
                    annotator_file = annotator[P]
                    if P == 'S07' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S09' and r in ['R28', 'R29']:
                        annotator_file = "A01"
                    if P == 'S09' and r in ['R21', 'R22', 'R23', 'R24', 'R25']:
                        annotator_file = "A11"
                    if P == 'S11' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S11' and r in [
                            'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13',
                            'R15'
                    ]:
                        annotator_file = "A02"
                    if P == 'S13' and r in ['R28']:
                        annotator_file = "A01"
                    if P == 'S13' and r in ['R29', 'R30']:
                        annotator_file = "A11"
                    if P == 'S14' and SCENARIO[r] == 'L03':
                        annotator_file = "A19"

                    file_name_imu = '{}/{}_{}_{}.csv'.format(
                        P, SCENARIO[r], P, r)
                    file_name_imu_attr = '{}/{}_{}_{}_labels.csv'.format(
                        P, SCENARIO[r], P, r)
                    file_name_mocap_data = "{}/{}_{}_{}_{}_{}_norm_data.csv".format(
                        P, SCENARIO[r], P, r, annotator_file, N)
                    file_name_mocap_attr = "{}/{}_{}_{}_{}_{}_labels.csv".format(
                        P, SCENARIO[r], P, r, annotator_file, N)

                    print("\n{}\n".format(file_name_imu))

                    if not os.path.exists(dir_dataset + file_name_mocap_data):
                        print("1 - No file in {}".format(dir_dataset +
                                                         file_name_mocap_data))
                        continue

                    if not os.path.exists(dir_dataset + file_name_mocap_attr):
                        print("1 - No file in {}".format(dir_dataset +
                                                         file_name_mocap_attr))
                        continue

                    if not os.path.exists(dataset_path_imu_sequences +
                                          file_name_imu):
                        print("1 - No file in {}".format(
                            dataset_path_imu_sequences + file_name_imu))
                        continue

                    try:
                        data = csv_reader.reader_labels(dir_dataset +
                                                        file_name_mocap_attr)
                        labels_imus = data
                    except:
                        print(
                            "3 - Error getting annotated labels in {}".format(
                                dir_dataset + file_name_mocap_attr))
                        continue
                    '''
                    try:
                        csv_save.save_attr_csv(labels_imus.astype(int), filename=dataset_path_imu_sequences + file_name_imu_attr)
                    except:
                        print("5 - Error saving atts in {}".format(dataset_path_imu_sequences + file_name_imu_attr))
                    '''
                    print(
                        "\n\n------------------------------------------------------------------"
                    )
            except KeyboardInterrupt:
                print('You cancelled the operation.')

    return
Пример #7
0
def create_annotated_sequences():
    dir_dataset_mocap = "/vol/actrec/DFG_Project/2019/LARa_dataset/MoCap/recordings_2019/15_Annotated_Dataset_Corrected/"
    dataset_path_imu = "/vol/actrec/DFG_Project/2019/LARa_dataset/Motionminers/2019/flw_data_recordings/"
    dataset_path_imu_sequences = "/vol/actrec/DFG_Project/2019/LARa_dataset/Motionminers/2019/flw_recordings_annotated_revised/"
    #persons = ["S07", "S08", "S09", "S10", "S11", "S12", "S13", "S14"]
    persons = ["S12", "S13", "S14"]

    annotator = {
        "S01": "A17",
        "S02": "A03",
        "S03": "A08",
        "S04": "A06",
        "S05": "A12",
        "S06": "A13",
        "S07": "A05",
        "S08": "A17",
        "S09": "A03",
        "S10": "A18",
        "S11": "A08",
        "S12": "A11",
        "S13": "A08",
        "S14": "A06"
    }

    start_sequences_imus = [
        535, 525, 544, 0, 535, 520, 563, 550, 556, 539, 540, 535, 545, 565,
        545, 776, 558, 536, 540, 570, 553, 543, 528, 523, 520, 558, 558, 514,
        534, 544
    ]

    start_sequences_mocap = [
        351, 448, 484, 0, 468, 324, 410, 392, 442, 445, 450, 371, 315, 388,
        452, 274, 525, 422, 387, 389, 332, 378, 240, 515, 357, 430, 239, 349,
        400, 260
    ]

    repetition = ["N01", "N02"]

    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    for idp, P in enumerate(persons):
        for counter, r in enumerate(recordings):
            for N in repetition:
                print(
                    "\n\n------------------------------------------------------------------"
                )
                annotator_file = annotator[P]
                if P == 'S07' and SCENARIO[r] == 'L01':
                    annotator_file = "A03"
                if P == 'S09' and r in ['R28', 'R29']:
                    annotator_file = "A01"
                if P == 'S09' and r in ['R21', 'R22', 'R23', 'R24', 'R25']:
                    annotator_file = "A11"
                if P == 'S11' and SCENARIO[r] == 'L01':
                    annotator_file = "A03"
                if P == 'S11' and r in [
                        'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15'
                ]:
                    annotator_file = "A02"
                if P == 'S13' and r in ['R28']:
                    annotator_file = "A01"
                if P == 'S13' and r in ['R29', 'R30']:
                    annotator_file = "A11"
                if P == 'S14' and SCENARIO[r] == 'L03':
                    annotator_file = "A19"

                file_name_imu = '{}/{}_{}_{}.csv'.format(P, SCENARIO[r], P, r)
                file_name_imu_attr = '{}/{}_{}_{}_labels.csv'.format(
                    P, SCENARIO[r], P, r)
                file_name_mocap_attr = "{}/{}_{}_{}_{}_{}_labels.csv".format(
                    P, SCENARIO[r], P, r, annotator_file, N)

                print("\n{}\n".format(file_name_imu))

                if not os.path.exists(dir_dataset_mocap +
                                      file_name_mocap_attr):
                    print("1 - No file in {}".format(dir_dataset_mocap +
                                                     file_name_mocap_attr))
                    continue

                try:
                    data_labels = csv_reader.reader_labels(
                        dir_dataset_mocap + file_name_mocap_attr)
                    data_labels = data_labels[start_sequences_mocap[counter]:]
                except:
                    print("2 - Error getting annotated labels in {}".format(
                        dir_dataset_mocap + file_name_mocap_attr))
                    continue

                try:
                    data_imu = read_extracted_data(dataset_path_imu +
                                                   file_name_imu,
                                                   skiprows=1)
                    print("\nFiles loaded\n")
                except:
                    print("3 - Error getting annotated labels in {}".format(
                        dataset_path_imu + file_name_imu))
                    continue

                try:
                    idxs_labels = np.arange(0, data_labels.shape[0], 2)
                    data_labels = data_labels[idxs_labels]

                    sequence = data_imu[start_sequences_imus[counter]:
                                        start_sequences_imus[counter] +
                                        data_labels.shape[0]]
                    sequence[:, 0] = sequence[:, 0] - sequence[0, 0]

                    annotated_sequence = np.zeros(
                        (sequence.shape[0], sequence.shape[1] + 1))
                    annotated_sequence[:, 0] = sequence[:, 0]
                    annotated_sequence[:, 1] = data_labels[:, 0]
                    annotated_sequence[:, 2:] = sequence[:, 1:]

                except:
                    print("4 - Error getting annotated labels in {}".format(
                        dataset_path_imu + file_name_imu))
                    continue

                try:
                    if annotated_sequence.shape[0] == data_labels.shape[0]:
                        save_data(annotated_sequence,
                                  dataset_path_imu_sequences + file_name_imu,
                                  headers_bool=True,
                                  seq_annotated=True)
                        #csv_save.save_attr_csv(data_labels.astype(int),
                        #                      filename=dataset_path_imu_sequences + file_name_imu_attr)
                except:
                    print("4 - Error saving atts in {}".format(
                        dataset_path_imu_sequences + file_name_imu_attr))

    return
def compute_min_num_samples(ids, boolean_classes=True, attr=0):
    '''
    Compute the minimum duration of a sequences with the same classes or attribute
    
    This value will help selecting the best sliding window size
    
    @param ids: ids for train
    @param boolean_classes: selecting between classes or attributes
    @param attr: ids for attribute
    '''

    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    if boolean_classes:
        NUM_CLASSES = 8
    else:
        NUM_CLASSES = 2

    #min_durations = np.ones((NUM_CLASSES)) * 10000000
    min_durations = np.empty((0,NUM_CLASSES))
    hist_classes_all = np.zeros((NUM_CLASSES))
    for P in persons:
        if P in ids:
            for r, R in enumerate(recordings):
                # All of these if-cases are coming due to the naming of the recordings in the data.
                # Not all the subjects have the same
                # annotated recordings, nor annotators, nor annotations runs, nor scenarios.
                # these will include all of the recordings for the subjects
                if P in ["S01", "S02", "S03", "S04", "S05", "S06"]:
                    S = "L01"
                else:
                    S = SCENARIO[r]
                for N in repetition:
                    annotator_file = annotator[P]
                    if P == 'S07' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S14' and SCENARIO[r] == 'L03':
                        annotator_file = "A19"
                    if P == 'S11' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S11' and R in ['R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15']:
                        annotator_file = "A02"
                    if P == 'S13' and R in ['R28']:
                        annotator_file = "A01"
                    if P == 'S13' and R in ['R29', 'R30']:
                        annotator_file = "A11"
                    if P == 'S09' and R in ['R28', 'R29']:
                        annotator_file = "A01"
                    if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']:
                        annotator_file = "A11"
                    file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format(P, S, P, R, annotator_file,N)

                    try:
                        data = csv_reader.reader_labels(FOLDER_PATH + file_name_label)
                        labels = data[:,attr]
                        print("Files loaded")

                        min_duration = np.zeros((1,NUM_CLASSES))
                        for c in range(NUM_CLASSES):

                            #indexes per class
                            idxs = np.where(labels == c)[0]
                            counter = 0
                            min_counter = np.Inf
                            #counting if continuity in labels
                            for idx in range(idxs.shape[0] - 1):
                                if idxs[idx + 1] - idxs[idx] == 1:
                                    counter += 1
                                else:
                                    if counter < min_counter:
                                        min_counter = counter
                                        counter = 0
                            if counter < min_counter:
                                min_counter = counter
                                counter = 0
                            min_duration[0,c] = min_counter

                            print("class  {} counter size {}".format(c, min_counter))

                        min_durations = np.append(min_durations, min_duration, axis = 0)
                        #Statistics

                        hist_classes = np.bincount(labels.astype(int), minlength = NUM_CLASSES)
                        hist_classes_all += hist_classes

                    except:
                        print("No file {}".format(FOLDER_PATH + file_name_label))
    
    min_durations[min_durations == 0] = np.Inf
    print("Minimal duration per class \n{}".format(min_durations))
    
    print("Number of samples per class {}".format(hist_classes_all))
    print("Number of samples per class {}".format(hist_classes_all / np.float(np.sum(hist_classes_all)) * 100))
    
    return np.min(min_durations, axis = 0)
Пример #9
0
def generate_data(ids, sliding_window_length, sliding_window_step, data_dir=None):
    '''
    creates files for each of the sequences extracted from a file
    following a sliding window approach

    returns a numpy array

    @param ids: ids for train, val or test
    @param sliding_window_length: length of window for segmentation
    @param sliding_window_step: step between windows for segmentation
    @param data_dir: path to dir where files will be stored
    '''


    FOLDER_PATH = '/path_to_LARa_Mocap_for_annotations/'
    folder_derivative = "/path_to_LARa_Mocap_for_annotations/"

    # Recording names, refer to the naming of the files in LARa dataset
    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    counter_seq = 0
    hist_classes_all = np.zeros(NUM_CLASSES)

    for P in persons:
        if P not in ids:
            print("\nNo Person in expected IDS {}".format(P))
        else:
            for r, R in enumerate(recordings):
                # Selecting the proportions of the train, val or testing according to the quentity of
                # recordings per subject, as there are not equal number of recordings per subject
                # see dataset for checking the recording files per subject
                if P in ["S01", "S02", "S03", "S04", "S05", "S06"]:
                    S = "L01"
                else:
                    S = SCENARIO[R]
                for N in repetition:
                    annotator_file = annotator[P]
                    if P == 'S07' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S14' and SCENARIO[r] == 'L03':
                        annotator_file = "A19"
                    if P == 'S11' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S11' and R in ['R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15']:
                        annotator_file = "A02"
                    if P == 'S13' and R in ['R28']:
                        annotator_file = "A01"
                    if P == 'S13' and R in ['R29', 'R30']:
                        annotator_file = "A11"
                    if P == 'S09' and R in ['R28', 'R29']:
                        annotator_file = "A01"
                    if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']:
                        annotator_file = "A11"

                    file_name_norm = "{}/{}_{}_{}_{}_{}_der_data.csv".format(P, S, P, R, annotator_file, N)
                    file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format(P, S, P, R, annotator_file, N)

                    try:
                        # getting data
                        data = csv_reader.reader_data(folder_derivative + file_name_norm)
                        print("\nFiles loaded")
                    except:
                        print("\n In generating data, No file {}".format(folder_derivative + file_name_norm))
                        continue

                    try:
                        # Getting labels and attributes
                        labels = csv_reader.reader_labels(FOLDER_PATH + file_name_label)
                        class_labels = np.where(labels[:, 0] == 7)[0]
                        print("\nGet labels")

                        # Deleting rows containing the "none" class
                        data = np.delete(data, class_labels, 0)
                        labels = np.delete(labels, class_labels, 0)
                        print("\nDeleting none rows")

                        # halving the frequency, as Mbientlab or MotionMiners sensors use 100Hz
                        downsampling = range(0, data.shape[0], 2)
                        data = data[downsampling]
                        labels = labels[downsampling]
                        data_t, data_x, data_y = divide_x_y(data)
                        del data_t
                        print("\nDownsampling")

                    except:
                        print("\n In generating data, Error getting the data {}".format(FOLDER_PATH + file_name_norm))
                        continue

                    try:
                        # checking if annotations are consistent
                        data_x = norm_mean_std(data_x)
                        if np.sum(data_y == labels[:, 0]) == data_y.shape[0]:

                            # Sliding window approach
                            print("Starting sliding window")
                            X, y, y_all = opp_sliding_window(data_x, labels.astype(int), sliding_window_length,
                                                             sliding_window_step, label_pos_end=False)
                            print("Windows are extracted")

                            # Statistics
                            hist_classes = np.bincount(y[:, 0], minlength=NUM_CLASSES)
                            hist_classes_all += hist_classes
                            print("Number of seq per class {}".format(hist_classes_all))

                            for f in range(X.shape[0]):
                                try:

                                    sys.stdout.write(
                                        '\r' + 'Creating sequence file number {} with id {}'.format(f, counter_seq))
                                    sys.stdout.flush()

                                    # print "Creating sequence file number {} with id {}".format(f, counter_seq)
                                    seq = np.reshape(X[f], newshape=(1, X.shape[1], X.shape[2]))
                                    seq = np.require(seq, dtype=np.float)

                                    # Storing the sequences
                                    obj = {"data": seq, "label": y[f], "labels": y_all[f]}
                                    f = open(os.path.join(data_dir, 'seq_{0:06}.pkl'.format(counter_seq)), 'wb')
                                    pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
                                    f.close()

                                    counter_seq += 1

                                except:
                                    raise ('\nError adding the seq')

                            print("\nCorrect data extraction from {}".format(FOLDER_PATH + file_name_norm))

                            del data
                            del data_x
                            del data_y
                            del X
                            del labels
                            del class_labels

                        else:
                            print("\nNot consisting annotation in  {}".format(file_name_norm))
                            continue

                    except:
                        print("\n In generating data, No file {}".format(FOLDER_PATH + file_name_norm))

    return