def run_experiment_with_rake(): print "\nBegin experiment using RAKE algorithm..." # RAKE: predict keyword dengan RAKE, ambil words dengan RAKE skor tertinggi rake = RakeKeywordExtractor() tweets_rake['keyword'] = tweets_rake.apply(lambda t: rake.extract_keyword( rake.extract_candidates(t['text'], incl_scores=True)), axis=1) # RAKE: infer aspect dengan aspect mapping, dengan similarity terbesar tweets_rake['selected_keyword'] = tweets_rake.apply( lambda t: asp.find_nearest_inferred_aspect(t['keyword'], emb)[1], axis=1) tweets_rake['inferred_aspect'] = tweets_rake.apply( lambda t: asp.find_nearest_inferred_aspect(t['keyword'], emb)[0], axis=1) tweets_rake['gold_aspect'] = tweets_rake.apply( lambda t: asp.INVERTED_ASPECTS[t['inferred_aspect']], axis=1) tweets_rake.to_csv('dump/result_rake.csv', encoding='utf-8', index=False) # RAKE: Evaluasi dengan accuracy eva_rake = Evaluation(tweets_rake) conf_matrix = eva_rake.build_confusion_matrix(tweets_rake) print "Confusion matrix:" print conf_matrix print "Accuracy using RAKE algorithm: {}".format(eva_rake.accuracy()) print "Average Precision using RAKE algorithm: {}".format( eva_rake.average_precision()) print "Average Recall using RAKE algorithm: {}".format( eva_rake.average_recall())
def evaluate_accuracy(contingency_table: np.ndarray, evaluation: Evaluation, is_sampled_graph: bool = False) -> np.ndarray: """Evaluates the accuracy of partitioning. Parameters --------- contingency_table : np.ndarray (int) the contingency table (confusion matrix) comparing the true block assignment to the algorithmically determined block assignment evaluation : Evaluation stores evaluation results is_sampled_graph : bool True if evaluation is for a sampled graph. Default = False Returns ------- joint_prob : np.ndarray (float) the normalized contingency table """ # joint probability of the two partitions is just the normalized contingency table joint_prob = contingency_table / sum(sum(contingency_table)) accuracy = sum(joint_prob.diagonal()) print('Accuracy (with optimal partition matching): {}'.format(accuracy)) print() if is_sampled_graph: evaluation.sampled_graph_accuracy = accuracy else: evaluation.accuracy = accuracy return joint_prob
def run_experiment_with_tfidf(tweets_tfidf): print "\nBegin experiment using TF-IDF weighting algorithm..." # TF-IDF: cari keyword dengan TF-IDF, ambil yang single word aja dengan bobot tertinggi tfidf = TfidfKeywordExtractor() tfidf_weight = tfidf.fit_transform(tweets_tfidf) tfidf_weight['keyword'] = tfidf_weight.idxmax(axis=1) # MUST BE after extracting keyword # OTHERWISE, the keyword will be "tweet_no" for all tweets tfidf_weight = tfidf_weight.reset_index().rename( columns={'index': 'tweet_no'}) tfidf_weight['tweet_no'] = tfidf_weight['tweet_no'] + 1 tfidf_weight = tfidf_weight[['tweet_no', 'keyword']] tfidf_weight.to_csv('tfidf_keyword.csv', encoding='utf-8', index=False) tweets_tfidf = tweets_tfidf.reset_index().rename( columns={'index': 'tweet_no'}) tweets_tfidf['tweet_no'] = tweets_tfidf['tweet_no'] + 1 tweets_tfidf.to_csv('tweets_tfidf.csv', encoding='utf-8', index=False) tweets_tfidf = pd.merge(tweets_tfidf, tfidf_weight, how='left', on='tweet_no') tweets_tfidf.to_csv('tweets_tfidf_after_merge.csv', encoding='utf-8', index=False) # TF-IDF: infer aspect dengan aspect mapping, dengan similarity terbesar tweets_tfidf['selected_keyword'] = tweets_tfidf.apply( lambda t: asp.find_nearest_inferred_aspect(t['keyword'], emb)[1], axis=1) tweets_tfidf['inferred_aspect'] = tweets_tfidf.apply( lambda t: asp.find_nearest_inferred_aspect(t['keyword'], emb)[0], axis=1) tweets_tfidf['gold_aspect'] = tweets_tfidf.apply( lambda t: asp.INVERTED_ASPECTS[t['inferred_aspect']], axis=1) tweets_tfidf.to_csv('dump/result_tfidf.csv', encoding='utf-8', index=False) # RAKE: Evaluasi dengan accuracy eva_tfidf = Evaluation(tweets_tfidf) conf_matrix = eva_tfidf.build_confusion_matrix(tweets_tfidf) print "Confusion matrix:" print conf_matrix print "Accuracy using TF-IDF weighting algorithm: {}".format( eva_tfidf.accuracy()) print "Average Precision using TF-IDF weighting algorithm: {}".format( eva_tfidf.average_precision()) print "Average Recall using TF-IDF weighting algorithm: {}".format( eva_tfidf.average_recall())
weight_train,weight_test = dataset.get_weight_train_test() eval_ = Evaluation() all_result_df = pd.DataFrame(columns=['models','params','AMS','Accuracy','Precision','Recall']) # Notre tableau de résultat optimal_parameters = pd.DataFrame(columns=['models','params','AMS']) # Notre tableau résultats optimaux #Random forest score_opt_ams = 0 for bootstrap in [True,False]: for max_depth in range(3,10): for n_estimators in [10, 30, 50, 100]: rf = RandomForestClassifier(n_estimators=n_estimators,bootstrap=bootstrap,max_depth=max_depth) rf.fit(Xtrain,ytrain) ypred = rf.predict(Xtest) score_ams = eval_.AMS(ytest,ypred,weights=weight_test) score_accuracy = eval_.accuracy(ytest,ypred) score_precision = eval_.precision(ytest,ypred) score_recall = eval_.rappel(ytest,ypred) l = {'models':'Random Forest','params':str(rf.get_params()),\ 'AMS':score_ams,'Accuracy':score_accuracy,'Precision':score_precision,'Recall':score_recall} all_result_df=all_result_df.append(l,ignore_index=True) if score_ams > score_opt_ams: optim_param = str(rf.get_params()) score_opt_ams = score_ams l = {'models':'Random Forest','params':optim_param,'AMS':score_opt_ams} optimal_parameters=optimal_parameters.append(l,ignore_index = True) optimal_parameters.to_csv("optimal_parameters.csv",index=False) all_result_df.to_csv("all_result.csv",index=False) """
def task_predict(input_files, input_model, isDynamic): """ Predict the speaker from the given file(s) Args: input_files (string): full path to the speaker file input_model (string): model trained to give the solution """ # Loads the model object and retrieve the number of speaker # m = ModelInterface.load(input_model) n_label = m.get_n_label() # Computes the threshold (dynamic or static) # if (isDynamic): dyn_thrsh = m.get_dyn_threshold() else: threshold = 1 / n_label # Creates an Evaluation object to save the results # ev = Evaluation() # Starts the prediction process # print(input_files) for f in glob.glob(os.path.expanduser(input_files)): try: start_time = time.time() fs, signal = read_wav(f) signal = signal / max(abs(signal)) # Extracts the features and predicts the label using the higher score within all possible speaker # label, score = m.predict(fs, VAD_process(signal)) except Exception as e: print(f + ' error %s' % (e)) # Retrieves the expected label from the directory (evaluation not real time only) # root = os.path.split(f) if (input_files[-9:] == "*/*/*.wav"): root = os.path.split(root[0]) speaker = os.path.basename(root[0]) # Recognition process : If the given score is higher than the threshold, the label is correct # # Else the speaker is not recognize # if (isDynamic): threshold = dyn_thrsh[label] recog = (score > threshold) # recog = True if not (recog): print(speaker, ' not recognize. ->', label, 'Score->', score) else: print(speaker, '->', label, ', score->', score) # Adds the speaker and its results to the evaluation object # ev.new(speaker, label, recog) # Retrieves the Database label used and prints the accuracy # path = os.path.split(root[0])[0] DB_name = os.path.split(path)[0] DB_name = os.path.basename(os.path.split(DB_name)[1]) print('Accuracy : ', ev.accuracy(), '\n') ev.save(os.path.basename(path), n_label, DB_name, (time.time() - start_time))
m = ModelInterface.load(input_model) speaker = input( "Write the name of the speaker (for evaluation purposes) :") start_time = time.time() while tmp < 5: count += 1 buffer.record(chunk_size=sampling_rate) # 1 second of record data = buffer.get_data() data = np.frombuffer(data, 'int16') # Predicting every 3 loop # # Recording at 16000 Hz as sampling rate, (1 * 3) sec as buffer size and converting data in int16 type # if count >= 3: predict(data, m, ev, speaker) # save_RT(speaker, data, width =2, rate=sampling_rate) count = 0 tmp += 1 print("Ok, ", time.time() - start_time - 15, " seconds") # Stops the recording and closes the audio stream # print('Accuracy : ', ev.accuracy(), '\n') ev.save("Real-Time_Speaker_Recognition", tmp, "RTSP/RTSP_" + speaker, (time.time() - start_time - 15)) buffer.stop_record()
pair_wise=[3, 1], train_or_test=0, smoothness=0.1, contour=False) """ Instantiate an object of Evaluation class to calculate various model metrics. """ eval = Evaluation(bayes_case=bayes_classifier, data_prep=data_preprocess, test_size=0.30) class_id = 1 # The class_id for the required class # Returns confusion matrix for a given Bayesian Classifier Case cm = eval.confusion_matrix() # Returns the accuracy of classification for a given Bayesian Classifier Case acc = eval.accuracy() # Returns the precision for a given class for a given Bayesian Classifier Case prec = eval.precision(class_id) # Returns the recall for a given class for a given Bayesian Classifier Case rec = eval.recall(class_id) # Returns the F-score for a given class for a given Bayesian Classifier Case f_score = eval.f_score(class_id) # Returns the mean precision of classification for a given Bayesian Classifier Case mean_prec = eval.mean_precision() # Returns the mean recall of classification for a given Bayesian Classifier Case mean_rec = eval.mean_recall()