def __init__(self): self.transformer = {} self.fitted: bool = False self.dict_length: int = 0 self.api = MongoDb() self.encoding_mapper: Dict[int, int] = {} self.reverse_mapper: Dict[int, str] = {}
def main(): args = input_args().parse_args() api = MongoDb() api.load_data_to_database( fasttext_collection_name, Path(args.fasttext), fasttext_loader, fasttext_transformer, )
def main(): api = MongoDb() records_1 = api.get_record( collection_name=COLLECTION_NAME, collection_id=COLLECTION_ID, label="Fake", limit=200, ) records_2 = api.get_record( collection_name=COLLECTION_NAME, collection_id=COLLECTION_ID, label="True", limit=200, ) # tweet_positive_records = api.get_record( # collection_name=TweetsEmotions_collection_name, # collection_id=TweetsEmotions_collection_id, # label=0, # limit=500, # ) # tweet_negative_records = api.get_record( # collection_name=TweetsEmotions_collection_name, # collection_id=TweetsEmotions_collection_id, # label=4, # limit=500, # ) # Define model specifications model_1 = "bayesian_dropout_nn_fast_text_embeddings" model_2 = "bayesian_dropout_nn_fast_text_embeddings" algorithm = "nn" acquisition_function_1 = "random" acquisition_function_2 = "entropy" active_learning_loops = 1 active_learning_step = 10 max_active_learning_iters = 10 initial_training_data_size = 10 validation_data_size = 400 category_1 = "SPORTS" category_2 = "COMEDY" transformation_needed = False instances = records_1 + records_2 labels = [sample["record"]["label"] for sample in records_1 + records_2] # instances = tweet_positive_records + tweet_negative_records # labels = [ # sample["record"]["label"] # for sample in tweet_positive_records + tweet_negative_records # ] instances_from_db, labels_from_db = shuffle(instances, labels, random_state=0) # HuffPostTransform = word_embed_transformator() HuffPostTransform = ( transformator() ) # I'm using here not HuffPost transformator but I'm too lazy to change all variable names HuffPostTransform.fit(instances_from_db, labels_from_db) if transformation_needed: instances = instances_from_db labels = labels_from_db else: instances = HuffPostTransform.transform_instances(instances_from_db) labels = HuffPostTransform.transform_labels(labels_from_db) # Get AUC results from an active learning simulation auc_active_learning_random_10_runs_nn = active_learning_simulation( HuffPostTransform, None, active_learning_loops, max_active_learning_iters, active_learning_step, algorithm, instances, labels, initial_training_data_size, transformation_needed, ) # Pack specifications and resutls to the list for uploading to Peony Database list_to_upload = [ model_1, acquisition_function_1, active_learning_loops, active_learning_step, max_active_learning_iters, initial_training_data_size, validation_data_size, category_1, category_2, auc_active_learning_random_10_runs_nn, ] # Upload results to Peony Database # api.load_model_results(*list_to_upload) # Get AUC results from an active learning simulation auc_active_learning_entropy_10_runs_nn = active_learning_simulation( HuffPostTransform, entropy_sampling, # false_positive_sampling, active_learning_loops, max_active_learning_iters, active_learning_step, algorithm, instances, labels, initial_training_data_size, transformation_needed, ) # Pack specifications and resutls to the list for uploading to Peony Database list_to_upload = [ model_2, acquisition_function_2, active_learning_loops, active_learning_step, max_active_learning_iters, initial_training_data_size, validation_data_size, category_1, category_2, auc_active_learning_entropy_10_runs_nn, ] # Upload results to Peony Database # api.load_model_results(*list_to_upload) visualize_two_auc_evolutions(auc_active_learning_random_10_runs_nn, auc_active_learning_entropy_10_runs_nn)
from PeonyPackage.PeonyDb import MongoDb from Peony_visualization.src.peony_visualization import visualize_two_auc_evolutions api = MongoDb() # Random acquisition function svm_random_sampling_results = api.get_model_results({ "model": "bayesian_denfi_nn_hot_start_fast_text_embeddings", "acquisition_function": "random", "category_1": "POSITIVE_EMOTIONS_TWEETS", }) svm_random_sampling_results = [ item for val in svm_random_sampling_results for item in val["results"] ] # Entropy acquisition function svm_false_positive_sampling_results = api.get_model_results({ "model": "bayesian_denfi_nn_hot_start_fast_text_embeddings", "acquisition_function": "entropy", "category_1": "POSITIVE_EMOTIONS_TWEETS", }) svm_false_positive_sampling_results = [ item for val in svm_false_positive_sampling_results for item in val["results"] ]
def __init__(self): self.api = MongoDb() self.data = self.api.get_model_results(filter_dict={})
def __init__(self): super().__init__(embedding_dim=300) self.transformer = {} self.fitted: bool = False self.dict_length: int = 0 self.api = MongoDb()
def main(): args = input_args().parse_args() api = MongoDb() if args.huffpost: api.load_data_to_database( HuffPost_collection_name, Path(args.huffpost), HuffPost_loader, HuffPost_transformer, ) if args.newsgroups: api.load_data_to_database( NewsGroups_collection_name, Path(args.newsgroups), NewsGroups_loader, NewsGroups_transformer, ) if args.tweets: api.load_data_to_database( Tweets_collection_name, Path(args.tweets), Tweets_loader, Tweets_transformer ) if args.comments: api.load_data_to_database( Comments_collection_name, Path(args.comments), Comments_loader, Comments_transformer, ) if args.emotions: api.load_data_to_database( Emotions_collection_name, Path(args.emotions), Emotions_loader, Emotions_transformer, ) if args.fake_news: api.load_data_to_database( fake_news_collection_name, Path(args.fake_news), fake_news_loader, fake_news_transformer, ) if args.fake_news_detection: api.load_data_to_database( fake_news_detection_collection_name, Path(args.fake_news_detection), fake_news_detection_loader, fake_news_detection_transformer, ) if args.liar_paragraph: api.load_data_to_database( liar_paragraph_collection_name, Path(args.liar_paragraph), liar_paragraph_loader, liar_paragraph_transformer, ) if args.liar_full_text: api.load_data_to_database( liar_full_text_collection_name, Path(args.liar_full_text), liar_full_text_loader, liar_full_text_transformer, )
def main(): api = MongoDb() laebl_1 = api.get_record( collection_name=COLLECTION_NAME, collection_id=COLLECTION_ID, label=0, limit=300, ) laebl_2 = api.get_record( collection_name=COLLECTION_NAME, collection_id=COLLECTION_ID, label=4, limit=300, ) # laebl_1 = api.get_record( # collection_name=COLLECTION_NAME, # collection_id=COLLECTION_ID, # label=0, # limit=10, # ) # laebl_2 = api.get_record( # collection_name=COLLECTION_NAME, # collection_id=COLLECTION_ID, # label=4, # limit=10, # ) instances = laebl_1 + laebl_2 labels = [sample["record"]["label"] for sample in laebl_1 + laebl_2] instances, labels = shuffle(instances, labels, random_state=0) Transformator = transformator() # Transformator.fit(instances, labels) Transformator.fit(labels) peony_model = PeonyBoxModel( Transformator, active_learning_step=5, acquisition_function=entropy_sampling, ) # peony_model.bayesian_dropout_nn.fit(instances[50:], labels[50:]) # peony_model.bayesian_denfi_nn.reset() # peony_model.bayesian_denfi_nn.epsilon_greedy_coef = 1 # indexes = peony_model.bayesian_denfi_nn.get_learning_samples(instances[:50]) # add_training = [instances[index] for index in indexes.tolist()] # add_labels = [labels[index] for index in indexes.tolist()] # peony_model.feed_forward_nn.add_new_learning_samples(add_training, add_labels) # peony_model.feed_forward_nn.fit(instances, labels) # predicted = peony_model.bayesian_dropout_nn.predict(instances[50:]) start_time = time.time() k_fold = k_fold_corss_validation(peony_model.bayesian_dropout_nn, Transformator, instances, labels, 2) print(f"elapsed time is {time.time() - start_time}") print(auc_metrics(k_fold)) scores = [ accuracy_score(eval["true"], eval["predicted"], normalize=True) for eval in k_fold ] print(scores) print("test")