Exemplo n.º 1
0
def dev(model):
    data_helper = DataHelper(mode='dev')
    total_pred = 0
    correct = 0
    accuracy = 0
    b_size = len(data_helper.label)
    print('*' * 100)
    print('dev set total:', b_size)
    loss_func = torch.nn.MSELoss(reduction='sum')  ##reduction='sum'
    loss_mae = torch.nn.L1Loss(reduction='sum')

    iter = 0
    total_loss = 0
    for content, label, _ in data_helper.batch_iter(batch_size=b_size,
                                                    num_epoch=1):
        iter += 1
        model.eval()
        ##need modify the regression task will minimize the mse error
        pred = model(content)
        pred_sq = torch.squeeze(pred, 1)
        loss = loss_func(pred_sq.cpu().data, label.cpu())
        #------------------------------------------------#
        error = loss_mae(pred_sq.cpu().data, label.cpu())
        # error = mean_absolute_error(pred_sq.cpu().data, label.cpu())
        accuracy += error
        total_pred = len(label)

    total_pred = float(total_pred)
    accuracy = float(accuracy)

    #return the overall accuracy
    return (accuracy / total_pred), (float(loss) / total_pred)
def model_test(model):
    """ """
    data = sio.loadmat("./data/fake_data.mat")
    dh = DataHelper(data, 19*7*24, 1*7*24, 50)

    feature, flow, label = dh.gen_test_samples()
    tfeature = feature[:, :model._tf_dim]
    sfeature = feature[:, -model._sf_dim:]

    err = model.decompose()
    saver = tf.train.Saver()
    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(os.path.dirname("checkpoints/checkpoint"))
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            return
        feed_dict = {
            model._sfeature: sfeature,
            model._tfeature: tfeature,
            model._flow: flow
        }
        err, stfeature = sess.run([err, model.stfeature], feed_dict=feed_dict)

    ano_detect(flow, err, stfeature, label)
Exemplo n.º 3
0
def cal_PMI(window_size=20):
    helper = DataHelper(mode="train")
    content, _ = helper.get_content()
    pair_count_matrix = np.zeros((len(helper.vocab), len(helper.vocab)),
                                 dtype=int)
    word_count = np.zeros(len(helper.vocab), dtype=int)

    for sentence in content:
        sentence = sentence.split(' ')
        for i, word in enumerate(sentence):
            try:
                word_count[helper.d[word]] += 1
            except KeyError:
                continue
            start_index = max(0, i - window_size)
            end_index = min(len(sentence), i + window_size)
            for j in range(start_index, end_index):
                if i == j:
                    continue
                else:
                    target_word = sentence[j]
                    try:
                        pair_count_matrix[helper.d[word],
                                          helper.d[target_word]] += 1
                    except KeyError:
                        continue

    total_count = np.sum(word_count)
    word_count = word_count / total_count
    pair_count_matrix = pair_count_matrix / total_count
    pmi_matrix = np.zeros((len(helper.vocab), len(helper.vocab)), dtype=float)
    for i in range(len(helper.vocab)):
        for j in range(len(helper.vocab)):
            pmi_matrix[i, j] = np.log(pair_count_matrix[i, j] /
                                      (word_count[i] * word_count[j]))
            if pmi_matrix[i, j] <= 0:
                continue

    pmi_matrix = np.nan_to_num(pmi_matrix)

    pmi_matrix = np.maximum(pmi_matrix, 0.0)

    edges_weights = [0.0]
    count = 1
    edges_mappings = np.zeros((len(helper.vocab), len(helper.vocab)),
                              dtype=int)
    for i in range(len(helper.vocab)):
        for j in range(len(helper.vocab)):
            if pmi_matrix[i, j] != 0:
                edges_weights.append(pmi_matrix[i, j])
                edges_mappings[i, j] = count
                count += 1

    edges_weights = np.array(edges_weights)

    edges_weights = edges_weights.reshape(-1, 1)
    # print(edges_weights.shape)
    edges_weights = torch.Tensor(edges_weights)

    return edges_weights, edges_mappings, count
def model_train(model):
    """ """

    data = sio.loadmat("./data/fake_data.mat")
    dh = DataHelper(data, 19*7*24, 1*7*24, 50)

    model.construct_loss()
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(model.loss)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init)
        ckpt = tf.train.get_checkpoint_state(os.path.dirname("checkpoints/checkpoint"))
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

        train_feature_batches, train_flow_batches = dh.gen_train_batch(BATCH_SIZE)
        for i in range(EPOCH):

            for x0, y0 in zip(train_feature_batches, train_flow_batches):
                shuffled_index = np.arange(BATCH_SIZE)
                np.random.shuffle(shuffled_index)
                x1 = x0[shuffled_index]
                y1 = y0[shuffled_index]

                feed_dict = {
                    model.x0: x0,
                    model.y0: y0,
                    model.x1: x1,
                    model.y1: y1,
                }
                _, loss = sess.run([optimizer, model.loss], feed_dict=feed_dict)

            print("Epoch {}: loss {}".format(i, loss))
            saver.save(sess, "checkpoints/model")
Exemplo n.º 5
0
def graph_eval(core_words):

    # print('load model from file.')
    data_helper = DataHelper('test')
    edges_num, edges_matrix = edges_dist(len(data_helper.vocab), data_helper,
                                         1)
    model = torch.load(os.path.join('temp_model.pkl'))
    content, label = data_helper.get_content()

    edges_weights = model.seq_edge_w.weight.to('cpu').detach().numpy()
    graph_ed = []
    for core_word in core_words:  #
        core_index = data_helper.vocab.index(core_word)

        results = {}
        unq_res = {}
        for i in range(len(data_helper.vocab)):

            word = data_helper.vocab[i]
            n_word = edges_matrix[i, core_index]

            if n_word != 0:
                results[word] = edges_weights[n_word][0]
            else:
                continue

        for value, key in results.items():
            if value not in unq_res:
                unq_res[value] = key

        sort_results = sorted(unq_res.items(), key=lambda d: d[1])
        graph_ed.append(sort_results)
    # print(sort_results)
    return graph_ed
Exemplo n.º 6
0
def graph_eval_extend(core_words):

    # print('load model from file.')
    data_helper = DataHelper('test')
    edges_num, edges_matrix = edges_mapping1(len(data_helper.vocab),
                                             data_helper, 1)
    model = torch.load(os.path.join('temp_model.pkl'))
    content, label = data_helper.get_content()
    edges_weights = model.seq_edge_w.weight.to('cpu').detach().numpy()
    graph_ed = []

    for core_word in core_words:  #
        other = []
        core_index = data_helper.vocab.index(core_word)
        for cc in core_words:
            if cc != core_word:
                o_ind = data_helper.vocab.index(cc)
                other.append(o_ind)

        results = {}
        for i in range(len(other)):
            word = data_helper.vocab[other[i]]
            n_word = edges_matrix[other[i], core_index]
            results[word] = edges_weights[n_word][0]
        unq_res = {}
        for value, key in results.items():
            if value not in unq_res:
                unq_res[value] = key

        sort_results = sorted(unq_res.items(), key=lambda d: d[1])
        graph_ed.append(sort_results)
    # print(sort_results)
    return graph_ed
Exemplo n.º 7
0
def _get_data(data_name='citeulike_title_only'):
    data_helper = DataHelper()

    def error():
        assert False, '[ERROR] unseen data_name %s' % data_name

    sub_folder = ''
    fold = re.findall('fold(\d+)', data_name)
    if len(fold) == 1:
        sub_folder = 'fold%d' % int(fold[0])
    if data_name.startswith('citeulike'):
        if data_name.startswith('citeulike_title_only'):
            content_file = data_root + '/citeulike/title_only/%s/data_content.pkl' % sub_folder
            split_file = data_root + '/citeulike/title_only/%s/data_split_cold_item.pkl' % sub_folder
        elif data_name.startswith('citeulike_title_and_abstract'):
            content_file = data_root + '/citeulike/title_and_abstract/%s/data_content.pkl' % sub_folder
            split_file = data_root + '/citeulike/title_and_abstract/%s/data_split_cold_item.pkl' % sub_folder
        else:
            error()
    elif data_name.startswith('news'):
        if data_name.startswith('news_title_only'):
            content_file = data_root + '/news/title_only/%s/data_content.pkl' % sub_folder
            split_file = data_root + '/news/title_only/%s/data_split_cold_item.pkl' % sub_folder
        elif data_name.startswith('news_title_and_abstract'):
            content_file = data_root + '/news/title_and_abstract/%s/data_content.pkl' % sub_folder
            split_file = data_root + '/news/title_and_abstract/%s/data_split_cold_item.pkl' % sub_folder
    else:
        error()

    # data_helper.load_data(content_file)
    with open(split_file) as fp:
        split_data = pickle.load(fp)
    data_helper.data = split_data
    return data_helper
 def __init__(self):
     """
     Constructor
     """
     self.dh = DataHelper()
     self.ds = DaysStatistics()
     self.prediction_steps = 288
    def __init__(self, estimator_name, param_dict):
        """
        Constructor of MyGridSearch. Prepares all necessary variables and prepares generator
        for grid search.
        :param estimator_name: String with name of sklearn predictor. Must be exactly the same as 
                                sklearn name. This is because this string is used for creation
                                of predictor. Not the best solution but working one.
        :param param_dict: Disctionary with parameters to search through. Key must be the exact name 
                            of parameter input for predictor, value must be list of sutable values.
                            Current implementation have problems with strings so only numeric values are
                            supported now.
        """
        inspector = inspect.getfullargspec(eval(estimator_name))
        for key in param_dict.keys():
            assert key in inspector.args, 'Argument %s is not valid for class %s' % (key, estimator.__class__.__name__)

        self.estimator_name = estimator_name
        self.param_dict = param_dict
        self.evaluation = []
        self.parameters = []
        self.best_parameters = None
        self.best_evaluation = None
        self.best_estimator = None
        self.arguments = None
        self.generator = None
        self.grid_size = 1
        self.dh = DataHelper()
        self.prepare_generator()
Exemplo n.º 10
0
    def _eval_model(self, img_filenames, labels_filenames, model):
        dhl = DataHelper()

        gt_lbls = []
        pr_lbls = []
        k = 0
        for i, file_name in tqdm(enumerate(img_filenames)):
            lbl = int(load(self.annotation_path + labels_filenames[i]))

            # print(lbl)
            # print(type(lbl))
            if lbl == 0 or lbl == 1 or lbl == 2 or lbl == 6:
                img = np.expand_dims(
                    np.array(imread(self.img_path + file_name)) / 255.0,
                    axis=0)
                gt_lbls.append(
                    dhl.load_and_relabel_exp(self.annotation_path +
                                             labels_filenames[i]))
                # gt_lbls.append(dhl.load_and_categorize_valence(self.annotation_path + labels_filenames[i]))
                prediction = model(img)[0]
                score = tf.nn.softmax(prediction)
                pr_lbls.append(np.argmax(score))
                print('Gt => ' + str(gt_lbls[k]) + ' : ' + str(pr_lbls[k]) +
                      ' <= Pr')
                k += 1

        print(confusion_matrix(gt_lbls, pr_lbls))
        acc = accuracy_score(gt_lbls, pr_lbls)

        return acc
Exemplo n.º 11
0
    def train(self):
        """
        train
        """
        gpu_options = tf.GPUOptions(allow_growth=True,
                                    per_process_gpu_memory_fraction=1.0)
        session_conf = tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False,
                                      gpu_options=gpu_options)
        with tf.Session(config=session_conf) as sess:
            sess.run(tf.global_variables_initializer())

            current_step = 0

            for epoch in range(self.config.epochs):
                start = time.time()
                print("-------- Epoch {}/{} -------".format(
                    epoch + 1, self.config.epochs))

                for batch in DataHelper.next_batch(self.train_data,
                                                   self.config.batch_size):
                    current_step += 1

                    loss = self.model.train(sess, batch, self.config)
                    perplexity = cal_perplexity(loss)

                    if current_step % 100 == 0:
                        print("train ---> step: {}, loss: {}, perplexity: {}".
                              format(current_step, loss, perplexity))

                    if current_step % self.config.eval_every == 0:

                        eval_losses = []
                        eval_perplexities = []

                        for eval_batch in DataHelper.next_batch(
                                self.eval_data, self.config.batch_size):
                            eval_loss = self.model.eval(sess, eval_batch)
                            eval_perplexity = cal_perplexity(eval_loss)
                            eval_losses.append(eval_loss)
                            eval_perplexities.append(eval_perplexity)

                        print("\n")
                        print("eval ---> step: {}, loss: {}, perplexity: {}".
                              format(current_step, mean(eval_losses),
                                     mean(eval_perplexities)))
                        print("\n")

                        # 保存checkpoint model
                        ckpt_model_path = self.config.ckpt_model_path
                        if not os.path.exists(ckpt_model_path):
                            os.makedirs(ckpt_model_path)
                        ckpt_model_path = os.path.join(ckpt_model_path,
                                                       "model")
                        self.model.saver.save(sess,
                                              ckpt_model_path,
                                              global_step=current_step)

                end = time.time()
                print("------time: {}----------".format(end - start))
Exemplo n.º 12
0
def grab_core(core_words):
    cores = []
    cores_w = []
    # print('load model from file.')
    data_helper = DataHelper('test')
    edges_num, edges_matrix = edges_mapping1(len(data_helper.vocab),
                                             data_helper, 1)
    model = torch.load(os.path.join('temp_model.pkl'))
    content, label = data_helper.get_content()

    # eval_test(model,data_helper)

    edges_weights = model.seq_edge_w.weight.to('cpu').detach().numpy()
    graph_ed = []
    for core_word in core_words:  #
        core_index = data_helper.vocab.index(core_word)

        results = {}
        for i in range(len(data_helper.vocab)):

            word = data_helper.vocab[i]
            n_word = edges_matrix[i, core_index]
            # n_word = edges_matrix[i, i]
            if n_word != 0:
                cores.append(i)
                cores_w.append(word)

            else:
                continue
    return cores_w
    def _clean_data(X, y):

        clean_X = DataFrame(columns=X.columns)
        clean_y = Series(name=y.name)

        skf = StratifiedKFold(n_splits=MajorityFiltering.k_folds, shuffle=True)

        for train_idxs, val_idxs in skf.split(X=range(len(y)), y=y):

            train_X = DataHelper.select_rows(X, train_idxs, copy=False)
            train_y = DataHelper.select_rows(y, train_idxs, copy=False)

            ensemble = MajorityFiltering.get_ensemble()
            ensemble.fit(train_X, train_y)

            val_X = DataHelper.select_rows(X, val_idxs, copy=False)

            predictions = ensemble.predict(val_X)

            maintain_idxs = [val_idxs[i] for i in range(len(val_idxs)) \
                if predictions[i]==y.iloc[val_idxs[i]]]

            maintain_X = DataHelper.select_rows(X, maintain_idxs, copy=True)
            maintain_y = DataHelper.select_rows(y, maintain_idxs, copy=True)

            clean_X = clean_X.append(maintain_X,
                                     verify_integrity=True,
                                     sort=False)
            clean_y = clean_y.append(maintain_y, verify_integrity=True)

        return clean_X, clean_y
 def __init__(self, double_model=False):
     """
     Constructor
     :param double_model: Bool if model is single or double. Single model have one tree model for prediction of all days.
                          Double model have one model for weekdays prediction and another model for weekend days prediction.
     """
     self.dh = DataHelper()
     self.double_model = double_model
Exemplo n.º 15
0
 def test_reg(self, model_file):
     dhp = DataHelper()
     model = tf.keras.models.load_model(model_file)
     filenames, val_labels = dhp.create_test_gen(
         img_path=self.img_path, annotation_path=self.annotation_path)
     loss_eval = self._eval_model_reg(filenames, val_labels, model)
     print('=============Evaluation=================')
     print(loss_eval)
     print('========================================')
Exemplo n.º 16
0
 def __init__(self):
     self.data_helper_instance = DataHelper(
         input_dir='../data_2017',
         op_dir='../examples/dataset',
         preprocessing_cache='../examples/preprocessing_cache',
         stats_dir='../stats')
     self.stats_dir = "../stats"
     self.limited_interactions = 'target_user_interactions.csv'
     self.create_targets()
Exemplo n.º 17
0
    def test_accuracy_dynamic(self, model):
        dhp = DataHelper()
        '''create batches'''
        img_filenames, exp_filenames, spm_up_filenames, spm_md_filenames, spm_bo_filenames = \
            dhp.create_generator_full_path_with_spm(img_path=self.img_path,
                                                    annotation_path=self.anno_path)
        print(len(img_filenames))
        exp_pr_lbl = []
        exp_gt_lbl = []

        dds = DynamicDataset()
        ds = dds.create_dataset(img_filenames=img_filenames,
                                spm_up_filenames=spm_up_filenames,
                                spm_md_filenames=spm_md_filenames,
                                spm_bo_filenames=spm_bo_filenames,
                                anno_names=exp_filenames,
                                is_validation=True)
        batch_index = 0
        for global_bunch, upper_bunch, middle_bunch, bottom_bunch, exp_gt_b in ds:
            '''predict on batch'''
            global_bunch = global_bunch[:, -1, :, :]
            upper_bunch = upper_bunch[:, -1, :, :]
            middle_bunch = middle_bunch[:, -1, :, :]
            bottom_bunch = bottom_bunch[:, -1, :, :]

            probab_exp_pr_b, _, _, _, _ = model.predict_on_batch(
                [global_bunch, upper_bunch, middle_bunch, bottom_bunch])
            exp_pr_b = np.array([
                np.argmax(probab_exp_pr_b[i])
                for i in range(len(probab_exp_pr_b))
            ])

            exp_pr_lbl += np.array(exp_pr_b).tolist()
            exp_gt_lbl += np.array(exp_gt_b).tolist()
            batch_index += 1

        exp_pr_lbl = np.int64(np.array(exp_pr_lbl))
        exp_gt_lbl = np.int64(np.array(exp_gt_lbl))

        global_accuracy = accuracy_score(exp_gt_lbl, exp_pr_lbl)
        conf_mat = confusion_matrix(exp_gt_lbl, exp_pr_lbl) / 500.0
        # conf_mat = tf.math.confusion_matrix(exp_gt_lbl, exp_pr_lbl, num_classes=7)/500.0

        ds = None
        face_img_filenames = None
        eyes_img_filenames = None
        nose_img_filenames = None
        mouth_img_filenames = None
        exp_filenames = None
        global_bunch = None
        upper_bunch = None
        middle_bunch = None
        bottom_bunch = None

        avg_accuracy = global_accuracy  # the class numbers are the same in the validation
        return global_accuracy, conf_mat
 def create_synthesized_landmarks(self, model_file, test_print=False):
     dhl = DataHelper()
     model = tf.keras.models.load_model(model_file)
     for i, file in tqdm(enumerate(os.listdir(self.img_path))):
         if file.endswith(".jpg") or file.endswith(".png"):
             dhl.create_synthesized_landmarks_path(img_path=self.img_path,
                                                   anno_path=self.anno_path,
                                                   file=file,
                                                   model=model,
                                                   test_print=test_print)
class LYFeatureIntegrate2(object):
    def __init__(self):
        self.d_h = DataHelper()

    def get_all_features(self, path=path_train01):
        start = time.time()
        data = self.d_h.get_data(path)
        fe = LYFeatureExtraction2(self.d_h, data)
        user_Y_list = self.d_h.get_user_Y_list(data)

        mt = ThreadingUtil()
        g_func_list = []

        g_func_list.append({"func": fe.user_driver_time, "args": (data,)})
        g_func_list.append({"func": fe.user_night_stat, "args": (data,)})
        g_func_list.append({"func": fe.user_driver_stat, "args": (data,)})
        g_func_list.append({"func": fe.get_distance, "args": (data,)})
        g_func_list.append({"func": fe.user_direction__stat, "args": (data,)})
        g_func_list.append({"func": fe.user_height_stat, "args": (data,)})
        g_func_list.append({"func": fe.user_speed_stat, "args": (data,)})

        mt.set_thread_func_list(g_func_list)
        mt.start()

        all_features_list = [[row[col] for row in mt.data_list] for col in range(len(mt.data_list[0]))]

        self.d_h.print_str += " get_train_features cost time: " + str(time.time() - start) + " "
        return all_features_list, user_Y_list

    def get_test_features02(self, path=path_test01):
        """

        :param path:
        :return:
        """
        start = time.time()
        data = self.d_h.get_test_data(path)
        userid_list = self.d_h.get_userlist(data)

        ft_Liyang = LYFeatureExtraction2(self.d_h, data)
        mt = ThreadingUtil()
        g_func_list = []
        g_func_list.append({"func": ft_Liyang.user_driver_time, "args": (data,)})
        g_func_list.append({"func": ft_Liyang.user_night_stat, "args": (data,)})
        g_func_list.append({"func": ft_Liyang.user_driver_stat, "args": (data,)})
        g_func_list.append({"func": ft_Liyang.get_distance, "args": (data,)})
        g_func_list.append({"func": ft_Liyang.user_direction__stat, "args": (data,)})
        g_func_list.append({"func": ft_Liyang.user_height_stat, "args": (data,)})
        g_func_list.append({"func": ft_Liyang.user_speed_stat, "args": (data,)})
        mt.set_thread_func_list(g_func_list)
        mt.start()

        test_features = [[row[col] for row in mt.data_list] for col in range(len(mt.data_list[0]))]
        self.d_h.print_str += " get_test_features cost time: " + str(time.time() - start) + " "
        return userid_list,test_features
Exemplo n.º 20
0
def main():

    for set_name in ConfigHelper.get_datasets():

        MetricsHelper.reset_metrics()

        data, set_target = IOHelper.read_dataset(set_name)

        feats, labels = DataHelper.extract_feature_labels(data, set_target)
        DataHelper.create_label_mapping(labels)
        max_nb_feats = DataHelper.calculate_max_nb_features(feats)

        for e in range(ConfigHelper.nb_executions):
            start = time.time()
            print("Execution " + str(e))

            train_idxs, test_idxs = DataHelper.split_in_sets(feats, labels)

            train_X = DataHelper.select_rows(feats, train_idxs, copy=False)
            train_y = DataHelper.select_rows(labels, train_idxs, copy=False)
            test_X = DataHelper.select_rows(feats, test_idxs, copy=False)
            test_y = DataHelper.select_rows(labels, test_idxs, copy=False)

            for noise_level in ConfigHelper.noise_levels:

                noisy_idxs, noisy_train_y = DataHelper.insert_noise(
                    train_y, noise_level)

                for name, clf, clean_type in ConfigHelper.get_classifiers():

                    algorithm_data = ConfigHelper.choose_algorithm(
                        clf, clean_type, train_X, noisy_train_y, noisy_idxs,
                        max_nb_feats)

                    chosen_rate = algorithm_data[0]
                    chosen_threshold = algorithm_data[1]
                    chosen_X = algorithm_data[2]
                    chosen_y = algorithm_data[3]
                    chosen_clf = algorithm_data[4]
                    true_filtered = algorithm_data[5]
                    false_filtered = algorithm_data[6]

                    chosen_clf.fit(chosen_X, chosen_y)
                    predictions = chosen_clf.predict(test_X)
                    error = MetricsHelper.calculate_error_score(
                        test_y, predictions)

                    MetricsHelper.metrics.append([
                        set_name, e, noise_level, name, chosen_rate,
                        chosen_threshold, error, true_filtered, false_filtered
                    ])
            print(str(time.time() - start))

        IOHelper.store_results(MetricsHelper.convert_metrics_to_frame(),
                               "final_" + set_name)
    def create_from_orig(self, ds_type):
        print('create_from_orig & relabel to affectNetLike--->')
        """
        labels are from 1-7, but we save them from 0 to 6

        :param ds_type:
        :return:
        """
        if ds_type == DatasetType.train:
            txt_path = RafDBConf.orig_annotation_txt_path
            load_img_path = RafDBConf.orig_image_path
            load_bbox_path = RafDBConf.orig_bounding_box
            save_img_path = RafDBConf.no_aug_train_img_path
            save_anno_path = RafDBConf.no_aug_train_annotation_path
            prefix = 'train'
        elif ds_type == DatasetType.test:
            txt_path = RafDBConf.orig_annotation_txt_path
            load_img_path = RafDBConf.orig_image_path
            load_bbox_path = RafDBConf.orig_bounding_box
            save_img_path = RafDBConf.test_img_path
            save_anno_path = RafDBConf.test_annotation_path
            prefix = 'test'
        '''read the text file, and save exp, and image'''
        file1 = open(txt_path, 'r')
        dhl = DataHelper()
        affectnet_like_lbls = [3, 4, 5, 1, 2, 6, 0]
        while True:
            line = file1.readline()
            if not line:
                break
            f_name = line.split(' ')[0]
            if prefix not in f_name: continue

            img_source_address = load_img_path + f_name[:-4] + '.jpg'
            img_dest_address = save_img_path + f_name

            exp = int(line.split(' ')[1]) - 1
            '''relabel to affectNet'''
            exp = affectnet_like_lbls[exp]

            img = np.array(Image.open(img_source_address))
            '''padd, resize image and save'''
            x_min, y_min, x_max, y_max = self.get_bounding_box(
                load_bbox_path + f_name[:-4] + '_boundingbox.txt')
            img = dhl.crop_image_bbox(img, x_min, y_min, x_max, y_max)
            '''resize'''
            res_img = resize(img, (InputDataSize.image_input_size,
                                   InputDataSize.image_input_size, 3),
                             anti_aliasing=True)
            im = Image.fromarray(np.round(res_img * 255.0).astype(np.uint8))
            im.save(img_dest_address)
            '''save annotation'''
            np.save(save_anno_path + f_name[:-4] + '_exp', exp)
        file1.close()
Exemplo n.º 22
0
 def do_show(self, name) -> None:
     """
     Show the train or test data set
     :param name: train or valid or test
     """
     if not self.data_set_helper:
         self.data_set_helper = DataHelper(self.data_path)
     self.data_set_helper.labels = self.labels_path
     try:
         self.data_set_helper.show_data_sets(name_of_dataset=name)
     except Exception as e:
         print(e)
Exemplo n.º 23
0
    def __init__(self, country='Russian Federation', years=None):
        if years and len(years) != 2:
            raise Exception("Must be two years...")

        if years is None:
            years = [2000, 2005]

        self.country = country
        self.years = years
        for year in years:
            self.data[year] = {}

        self.data_helper = DataHelper(country, years)
Exemplo n.º 24
0
 def create_au_mask(self):
     dhl = DataHelper()
     for i, file in tqdm(enumerate(os.listdir(self.img_path_aug))):
         if file.endswith(".jpg") or file.endswith(".png"):
             if os.path.exists(os.path.join(self.anno_path_aug, file[:-4] + "_exp.npy")) \
                     and os.path.exists(os.path.join(self.anno_path_aug, file[:-4] + "_slnd.npy")):
                 if os.path.exists(os.path.join(self.anno_path_aug, file[:-4] + "_im.jpg")) or \
                         os.path.exists(os.path.join(self.anno_path_aug, file[:-4] + "_im.jpg")):
                     continue
                 dhl.create_AU_mask_path(img_path=self.img_path_aug,
                                         anno_path=self.anno_path_aug,
                                         file=file,
                                         test_print=False)
 def create_masked_image(self):
     dhl = DataHelper()
     for i, file in tqdm(enumerate(os.listdir(self.img_path_aug))):
         if file.endswith(".jpg") or file.endswith(".png"):
             if os.path.exists(os.path.join(self.anno_path_aug, file[:-4] + "_exp.npy")) \
                     and os.path.exists(os.path.join(self.anno_path_aug, file[:-4] + "_slnd.npy")):
                 '''load data'''
                 lnd = np.load(
                     os.path.join(self.anno_path_aug,
                                  file[:-4] + "_slnd.npy"))
                 img_file_name = os.path.join(self.img_path_aug, file)
                 img = np.float32(Image.open(img_file_name)) / 255.0
                 '''create masks'''
                 dr_mask = np.expand_dims(dhl.create_derivative(img=img,
                                                                lnd=lnd),
                                          axis=-1)
                 au_mask = np.expand_dims(dhl.create_AU_mask(img=img,
                                                             lnd=lnd),
                                          axis=-1)
                 up_mask, mid_mask, bot_mask = dhl.create_spatial_mask(
                     img=img, lnd=lnd)
                 up_mask = np.expand_dims(up_mask, axis=-1)
                 mid_mask = np.expand_dims(mid_mask, axis=-1)
                 bot_mask = np.expand_dims(bot_mask, axis=-1)
                 '''fuse images'''
                 face_fused = dhl.create_input_bunches(
                     img_batch=img,
                     dr_mask_batch=dr_mask,
                     au_mask_batch=au_mask,
                     spatial_mask=None)
                 eyes_fused = dhl.create_input_bunches(
                     img_batch=img,
                     dr_mask_batch=dr_mask,
                     au_mask_batch=au_mask,
                     spatial_mask=up_mask)
                 nose_fused = dhl.create_input_bunches(
                     img_batch=img,
                     dr_mask_batch=dr_mask,
                     au_mask_batch=au_mask,
                     spatial_mask=mid_mask)
                 mouth_fused = dhl.create_input_bunches(
                     img_batch=img,
                     dr_mask_batch=dr_mask,
                     au_mask_batch=au_mask,
                     spatial_mask=bot_mask)
                 '''save fused'''
                 savez_compressed(
                     self.masked_img_path + file[:-4] + "_face", face_fused)
                 savez_compressed(
                     self.masked_img_path + file[:-4] + "_eyes", eyes_fused)
                 savez_compressed(
                     self.masked_img_path + file[:-4] + "_nose", nose_fused)
                 savez_compressed(
                     self.masked_img_path + file[:-4] + "_mouth",
                     mouth_fused)
Exemplo n.º 26
0
def train(
        data_helper: DataHelper,
        model: keras.Model,
        save_filename: str,
        batch_size=32,
        epochs=10
):
    # 模型信息
    model.summary()

    # 训练数据生成器
    train_data_generator = data_helper.train_data_generator(batch_size)
    # 验证数据生成器
    validation_data_generator = data_helper.validation_data_generator(batch_size)
    # 测试数据生成器
    test_data_generator = data_helper.test_data_generator(batch_size)

    # 训练
    model.fit(
        x=train_data_generator,
        steps_per_epoch=data_helper.train_data_count // batch_size,
        validation_data=validation_data_generator,
        validation_steps=data_helper.validation_data_count // batch_size,
        epochs=epochs,
        shuffle=True,
        callbacks=[
            # 配置 tensorboard,将训练过程可视化,方便调参,tensorboard --logdir logs/fit
            keras.callbacks.TensorBoard(
                log_dir='logs/fit/' + datetime.now().strftime('%Y%m%d-%H%M%S'),
                histogram_freq=1
            ),
            # 定时保存模型
            keras.callbacks.ModelCheckpoint(
                filepath=save_filename,
                monitor='sparse_categorical_accuracy',
                verbose=0,
                save_best_only=True,
                save_weights_only=False,
                mode='auto',
                save_freq='epoch'
            )
        ],
    )

    # 测试
    model.evaluate(
        x=test_data_generator,
        steps=data_helper.test_data_count // batch_size,
    )
Exemplo n.º 27
0
    def create_affectnet(self, load_img_path, save_img_path, save_anno_path,
                         img_path_arr, bbox_arr, landmarks_arr,
                         expression_lbl_arr, valence_arr, arousal_arr,
                         FLD_model_file_name, do_aug, is_7):

        # model = tf.keras.models.load_model(FLD_model_file_name)
        dhl = DataHelper()
        model = None
        if is_7:
            print('777777777777777777777777777777777777')
            print('++++++++++++| 7 labels |++++++++++++')
            print('777777777777777777777777777777777777')
        else:
            print('888888888888888888888888888888888888')
            print('++++++++++++| 8 labels |++++++++++++')
            print('888888888888888888888888888888888888')

        print('len(img_path_arr)')
        print(len(img_path_arr))

        for i in tqdm(range(len(img_path_arr))):
            if is_7:
                if int(expression_lbl_arr[i]) == ExpressionCodesAffectnet.none or \
                        int(expression_lbl_arr[i]) == ExpressionCodesAffectnet.uncertain or \
                        int(expression_lbl_arr[i]) == ExpressionCodesAffectnet.contempt or \
                        int(expression_lbl_arr[i]) == ExpressionCodesAffectnet.noface:
                    continue
            else:
                if int(expression_lbl_arr[i]) == ExpressionCodesAffectnet.none or \
                        int(expression_lbl_arr[i]) == ExpressionCodesAffectnet.uncertain or \
                        int(expression_lbl_arr[i]) == ExpressionCodesAffectnet.noface:
                    continue
            '''crop, resize, augment image'''
            dhl.crop_resize_aug_img(
                load_img_name=load_img_path + img_path_arr[i],
                save_img_name=save_img_path + str(i) + '.jpg',
                bbox=bbox_arr[i],
                landmark=landmarks_arr[i],
                save_anno_name=save_anno_path + str(i) + '_lnd',
                synth_save_anno_name=save_anno_path + str(i) + '_slnd',
                model=model,
                do_aug=do_aug)
            '''save annotation: exp_lbl, valence, arousal, landmark '''
            # print(str(int(expression_lbl_arr[i])))
            # save(save_anno_path + str(i) + '_exp', str(int(expression_lbl_arr[i])-1))
            save(save_anno_path + str(i) + '_exp',
                 str(int(expression_lbl_arr[i])))
            save(save_anno_path + str(i) + '_val', valence_arr[i])
            save(save_anno_path + str(i) + '_aro', arousal_arr[i])
Exemplo n.º 28
0
    def create_derivative_mask(self):
        dhl = DataHelper()
        for i, file in tqdm(enumerate(os.listdir(self.img_path_aug))):
            if file.endswith(".jpg") or file.endswith(".png"):
                if os.path.exists(os.path.join(self.anno_path_aug, file[:-4] + "_exp.npy")) \
                        and os.path.exists(os.path.join(self.anno_path_aug, file[:-4] + "_slnd.npy")):
                    # check if we have already created it:
                    if os.path.exists(os.path.join(self.anno_path_aug + 'dmg/', file[:-4] + "_dmg.jpg")) or \
                            os.path.exists(os.path.join(self.anno_path_aug, file[:-4] + "_dmg.jpg")):
                        continue

                    dhl.create_derivative_path(img_path=self.img_path_aug,
                                               anno_path=self.anno_path_aug,
                                               file=file,
                                               test_print=False)
Exemplo n.º 29
0
def cal_class_distribution(data_dir, level):
    """
    calculate the class distribution
    :param data_dir:
    :param level: 0 for inner-sentence, 1 for inter-sentence but inner paragraph, 2 for inter-paragraph, 3 for different depth
    :return: None
    """
    rst_trees = DataHelper.read_rst_trees(data_dir)
    all_nodes = [node for rst_tree in rst_trees for node in rst_tree.postorder_DFT(rst_tree.tree, [])]
    if level in [0, 1, 2]:
        valid_relations = [RstTree.extract_relation(node.child_relation) for node in all_nodes if
                           node.level == level and node.child_relation is not None]
        distribution = Counter(valid_relations)
        for cla in class2rel:
            if cla not in distribution:
                distribution[cla] = 0
        return distribution
    if level == 3:
        depth_relation_distributions = {}
        for node in all_nodes:
            if node.lnode is None and node.rnode is None:
                continue
            if node.depth in depth_relation_distributions:
                depth_relation_distributions[node.depth][RstTree.extract_relation(node.child_relation)] += 1
            else:
                depth_relation_distributions[node.depth] = Counter()
                depth_relation_distributions[node.depth][RstTree.extract_relation(node.child_relation)] = 1
        for depth, distribution in depth_relation_distributions.items():
            for cla in class2rel:
                if cla not in distribution:
                    distribution[cla] = 0
        return depth_relation_distributions
Exemplo n.º 30
0
    def upsample_data(self):
        """we generate some samples so that all classes will have equal number of training samples"""
        dhl = DataHelper()
        '''count samples & categorize their address based on their category'''
        if self.ds_type == DatasetType.train:
            sample_count_by_class = np.zeros([8])
            img_addr_by_class = [[] for i in range(8)]
            anno_addr_by_class = [[] for i in range(8)]
            lnd_addr_by_class = [[] for i in range(8)]
        else:
            sample_count_by_class = np.zeros([7])
            img_addr_by_class = [[] for i in range(7)]
            anno_addr_by_class = [[] for i in range(7)]
            lnd_addr_by_class = [[] for i in range(7)]
        """"""
        print("counting classes:")
        for i, file in tqdm(enumerate(os.listdir(self.anno_path))):
            if file.endswith("_exp.npy"):
                exp = int(np.load(os.path.join(self.anno_path, file)))
                sample_count_by_class[exp] += 1
                '''adding ex'''
                anno_addr_by_class[exp].append(
                    os.path.join(self.anno_path, file))
                img_addr_by_class[exp].append(
                    os.path.join(self.img_path, file[:-8] + '.jpg'))
                lnd_addr_by_class[exp].append(
                    os.path.join(self.anno_path, file[:-8] + '_slnd.npy'))

        print("sample_count_by_category: ====>>")
        print(sample_count_by_class)
        '''calculate augmentation factor for each class:'''
        aug_factor_by_class, aug_factor_by_class_freq = dhl.calculate_augmentation_rate(
            sample_count_by_class=sample_count_by_class,
            base_aug_factor=AffectnetConf.augmentation_factor)
        '''after we have calculated those two array, we will augment samples '''
        for i in range(len(anno_addr_by_class)):
            dhl.do_random_augment(img_addrs=img_addr_by_class[i],
                                  anno_addrs=anno_addr_by_class[i],
                                  lnd_addrs=lnd_addr_by_class[i],
                                  aug_factor=int(aug_factor_by_class[i]),
                                  aug_factor_freq=int(
                                      aug_factor_by_class_freq[i]),
                                  img_save_path=self.img_path_aug,
                                  anno_save_path=self.anno_path_aug,
                                  class_index=i)
Exemplo n.º 31
0
def mc_worker(jobs, stats, ctl, store, timeout=5):
    logging.info("mc_worker started")

    while ctl["run_ok"]:
        try:
            root, parents, val = jobs.get(block=True, timeout=timeout)
        except Queue.Empty:
            logging.debug("mc_worker hasn't received jobs for %s seconds"
                          % timeout)
            continue

        start = time.time()

        for server in val:

            try:
                ip, port = server.split(":")
            except (ValueError, AttributeError), e:
                logging.error("unable to collect mc stats from %s : %s"
                              % (server, e))
                continue

            mc_server = Server(ip)

            # get bucket name from root and parent nodes
            bucket = DataHelper.get_bucket(root, parents)

            # initialize memcached source
            mc_source = MemcachedSource(mc_server, bucket)

            # initialize handlers to dump data json doc
            j_handler = JsonHandler()
            s_handler = SerieslyHandler(store)

            # collect data from source and emit to handlers
            mc_coll = MemcachedCollector([mc_source], [j_handler, s_handler])
            mc_coll.collect()
            mc_coll.emit()
            stats.put([mc_source.fast, mc_source.meta], block=True)
            stats.put([mc_source.slow, mc_source.meta], block=True)

        delta = time.time() - start
        logging.debug("collected mc stats from %s, took %s seconds"
                      % (val, delta))

        if delta < timeout:
            logging.debug("mc_worker sleep for %s seconds" % (timeout - delta))
            time.sleep(timeout - delta)