def _process_people(self, directory): files = [f for f in listdir(directory) if isfile(join(directory, f))] preproc = Preprocessing(DATA_FREQUENCY) people = [] num_samples = 0 print("Starting preprocessing") for file in files: try: number = self._get_file_number(file) person = preproc.process_person( f"{DATA_PATH}/{file}", f"{ORIGINALS_PATH}/s{number}.bdf", number) people.append(person) person_sample_count = len(person) print( f"{file} done. Got data from {person_sample_count} videos." ) num_samples += person_sample_count print(f"Has {num_samples} samples already") except Exception: # raise pass print("Preprocessing finished") return people
def __init__(self, freq, model): self.freq = freq self.model = model self.pre = Preprocessing(self.freq) self.post = Postprocessing(self.freq) self.base_bvp_features = None self.base_gsr_features = None
def _show_statistics(self): print("Creating statistics...") main.EXTRACT_ALL_FEATURES = True x, y = self._get_data_tuples() labels = Preprocessing.get_labels() # print(y) stats = Statistics(x, y, labels) stats.create()
def resume(): print('start...') preprocessing = Preprocessing() vsm = VSM() svd = SVD() result = preprocessing.read_file() judul = preprocessing.read_title() sentences = preprocessing.split_sentence(result) sumarize = [] original = [] pre_judul = preprocessing.preprocessing(judul) index = 0 path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'hasil_uji')) for s in sentences: listterm = [] term_fn = [] sentence_result = '' original_result = '' pre_result = preprocessing.preprocessing(s) listterm = vsm.list_term(pre_result) term_frequency = vsm.cal_tf(pre_result, listterm) term_frequency_judul = vsm.cal_tf_basic(pre_judul[index], listterm) term_frequency_normalize = vsm.cal_tf_normalize(term_frequency) term_frequency_normalize_judul = np.log( 1 + np.asarray(term_frequency_judul)) document_frequency = vsm.cal_df(pre_result, listterm) document_frequency_judul = vsm.cal_df(pre_judul, listterm) for idx, tfn in enumerate(term_frequency_normalize): term_fn.append(tfn.tolist()) weight = vsm.cal_vsm(term_fn, document_frequency) weight_judul = vsm.cal_vsm_basic(term_frequency_normalize_judul, document_frequency_judul) index_sentence = svd.cal_svd(weight, weight_judul) for i in index_sentence: sentence_result += s[i] + ' ' for sen in s: original_result += sen sumarize.append(sentence_result) original.append(original_result) f = open(path + "/berita-" + str(index) + ".txt", "w+") f.write(sentence_result) print(index) index += 1 hasil_ringkasan = [] for i in range(len(sumarize)): hasil_ringkasan.append({'ringkasan': sumarize[i], 'judul': judul[i]}) try: mongo.db.ringkasan.insert_many(hasil_ringkasan) except Exception as e: print(e) Exception(e) return jsonify({"sumarize": sumarize, "original": original}), 200
def _reverse_sbs_scores(self): print("Looking for best features...") x, y = self._get_data_tuples() print(len(y)) ai = AI() features, accuracy = ai.reverse_sbs_score(x, y) labels = Preprocessing.get_labels() print(f"Max accuracy is {accuracy}") print("Features:") for f in features: print(labels[f])
def get_hasil_uji(): print('start...') preprocessing = Preprocessing() dataset = preprocessing.read_ringkas() hasil_sistem = preprocessing.read_hasil() kalimat_ringkas_dataset = preprocessing.split_sentence(dataset) kalimat_ringkas_hasil = preprocessing.split_sentence(hasil_sistem) akurasi = [] hasil = [] for i in range(len(dataset)): fn = 0 fp = 0 tp = len(kalimat_ringkas_dataset[i]) for sentence in kalimat_ringkas_dataset[i]: if not (sentence in kalimat_ringkas_hasil[i]): fn += 1 for sentence in kalimat_ringkas_hasil[i]: if not (sentence in kalimat_ringkas_dataset[i]): fp += 1 precission = tp / (tp + fp) recall = tp / (tp + fn) f1 = 2 * ((precission * recall) / (precission + recall)) hasil.append({ "akurasi": f1, "system": hasil_sistem[i], "pakar": dataset[i], "precission": precission, "recall": recall }) akurasi.append(f1) print(np.average(akurasi)) print(np.max(akurasi)) print(np.min(akurasi)) return jsonify(hasil), 200
def _random_forest_scores(self): print("Looking for best features...") x, y = self._get_data_tuples() ai = AI() ai.random_forest_score(x, y, Preprocessing.get_labels())
class Predictor: """ This class is used to predict emotions in practice """ def __init__(self, freq, model): self.freq = freq self.model = model self.pre = Preprocessing(self.freq) self.post = Postprocessing(self.freq) self.base_bvp_features = None self.base_gsr_features = None def process(self): """ Loop that predicts emotions. After small changes may be used to get signals via pipeline """ base_bvp, base_gsr = self._get_basic_values() print("Processing base signals...") self.base_bvp_features = self.pre.get_base_bvp_features(base_bvp) self.base_gsr_features = self.pre.get_base_gsr_features(base_gsr) while True: print( "Enter BVP signal (one value per line). Press ENTER to finish: " ) bvp = self._read_signal() print( "Enter GSR signal (one value per line). Press ENTER to finish: " ) gsr = self._read_signal() values = self.pre.get_diffed_values(bvp, gsr, self.base_bvp_features, self.base_gsr_features) values = self.post.standarize(values) emotion = self.model.predict(values) print(f"Predicted emotion class: {emotion}") def _read_signal(self): signal = [] for line in sys.stdin: rawline = line.rstrip() if not rawline: break signal.append(float(rawline)) return signal def _get_basic_values(self): print( "Enter base BVP signal (one value per line). Press ENTER to finish: " ) base_bvp = self._read_signal() print( "Enter base GSR signal (one value per line). Press ENTER to finish: " ) base_gsr = self._read_signal() return base_bvp, base_gsr