def testLogarithm(self): db = DB() db.setUp() session = db.Session() recs = session.execute("select log(10) as q;") for rec in recs: self.assertAlmostEqual(rec[0],2.302585092994046)
def setUp(self): self._db = DB() self._db.setUp() self.tweets_crawler = OldTweetsCrawler(self._db) self.tweets_crawler._domain = u'Claim' self._add_author(u"author_guid") self._claims = {}
def setUp(self): TestBase.setUp(self) self.config = getConfig() self._start_date = self.config.eval("DEFAULT", "start_date") #self._end_date = self.config.get("DEFAULT", "end_date") self._tsv_files_path = self.config.get("TumblrImporter", "tsv_test_files_account_properties_feature_generator") self._db = DB() self._db.setUp() self._tumblr_parser = TumblrImporter(self._db) self._tumblr_parser.setUp(self._tsv_files_path) self._tumblr_parser.execute() self._author_guid = "f0f4bb42-3fed-322a-b71a-681179d47ea1" authors = self._db.get_authors_by_domain(Domains.MICROBLOG) posts = self._db.get_posts_by_domain(Domains.MICROBLOG) parameters = {"authors": authors, "posts": posts} account_properties_feature_generator = AccountPropertiesFeatureGenerator(self._db, **parameters) account_properties_feature_generator.execute() self._author_features = self._db.get_author_features_by_author_guid(author_guid=self._author_guid) self._author_features_dict = self._create_author_features_dictionary(self._author_features)
def setUpClass(cls): """ get_some_resource() is slow, to avoid calling it for each test use setUpClass() and store the result as class variable """ super(TestSub2VecModelCreator, cls).setUpClass() cls._db = DB() cls._db.setUp() cls.sub2vec_model_creator = Sub2VecModelCreator(cls._db) cls.sub2vec_feature_generator = Sub2VecFeatureGenerator( cls._db, **{ 'authors': [], 'posts': {} }) edges = [(0, 4), (2, 0), (1, 3), (3, 1), (0, 1), (1, 2), (4, 0), (4, 3), (2, 3), (3, 0)] cls.connected_undirected_graph = cls.create_undirected_graph( 5, edges, 'connected_undirected_graph') cls.unconnected_directed_graph = cls.connected_directed_graph( 7, edges, 'unconnected_directed_graph') cls.connected_directed_graph = cls.connected_directed_graph( 5, edges, 'connected_directed_graph') cls.unconnected_undirected_graph = cls.create_undirected_graph( 7, edges, 'unconnected_undirected_graph') cls.add_graph_to_db(cls.connected_undirected_graph) cls.add_graph_to_db(cls.unconnected_directed_graph) cls.add_graph_to_db(cls.connected_directed_graph) cls.add_graph_to_db(cls.unconnected_undirected_graph)
def setUp(self): self._db = DB() self._db.setUp() self._posts = [] self._author = None self.instagram_crawler = InstagramCrawler(self._db) self.instagram_crawler.insta_crawler.get_json = types.MethodType(f, self.instagram_crawler.insta_crawler)
def setUp(self): self._db = DB() self._db.setUp() self._posts = [] self._author = None self.reddit_crawler = RedditCrawler(self._db) self.reddit_crawler.reddit = RedditStub()
def setUp(self): TestBase.setUp(self) self.config = getConfig() self._start_date = self.config.eval("DEFAULT", "start_date") #self._end_date = self.config.get("DEFAULT", "end_date") self._tsv_files_path = self.config.get( "TumblrImporter", "tsv_test_files_graph_feature_generator") self._db = DB() self._db.setUp() self._tumblr_parser = TumblrImporter(self._db) self._tumblr_parser.setUp(self._tsv_files_path) self._tumblr_parser.execute() self._author_guid = u"f0f4bb42-3fed-322a-b71a-681179d47ea1" authors = self._db.get_authors_by_domain(Domains.MICROBLOG) posts = self._db.get_posts_by_domain(Domains.MICROBLOG) parameters = {"authors": authors, "posts": posts} graph_types = self.config.eval("GraphFeatureGenerator_1", "graph_types") algorithms = self.config.eval("GraphFeatureGenerator_1", "algorithms") aggregations = self.config.eval("GraphFeatureGenerator_1", "aggregation_functions") neighborhood_sizes = self.config.eval("GraphFeatureGenerator_1", "neighborhood_sizes") distances_from_labeled_authors = self.config.eval( "GraphFeatureGenerator_1", "distances_from_labeled_authors") graph_directed = self.config.eval("GraphFeatureGenerator_1", "graph_directed") graph_weights = self.config.eval("GraphFeatureGenerator_1", "graph_weights") parameters.update({ "graph_types": graph_types, "algorithms": algorithms, "aggregation_functions": aggregations, "neighborhood_sizes": neighborhood_sizes, "graph_directed": graph_directed, "graph_weights": graph_weights, "distances_from_labeled_authors": distances_from_labeled_authors }) graph_feature_generator = GraphFeatureGenerator(self._db, **parameters) graph_feature_generator.execute() self._author_features = self._db.get_author_features_by_author_guid( author_guid=self._author_guid) self._author_features_dict = self._create_author_features_dictionary( self._author_features)
def testDBSetUp(self): from sqlalchemy.engine.reflection import Inspector self.db = DB() self.db.setUp() self.inspector = Inspector.from_engine(self.db.engine) #self.assertTrue("posts" in set(self.inspector.get_table_names())) self.db.session.close()
def setUp(self): TestBase.setUp(self) self.config = getConfig() self._tsv_files_path = self.config.get( "TumblrImporter", "tsv_test_files_key_author_score_feature_generator") self._db = DB() self._db.setUp() self._tumblr_parser = TumblrImporter(self._db) self._author_guid = "150ff707-a6eb-3051-8f3c-f623293c714b" self._tumblr_parser.setUp(self._tsv_files_path) self._tumblr_parser.execute() autotopic_executor = AutotopicExecutor(self._db) autotopic_executor.setUp() autotopic_executor.execute() key_author_model = KeyAuthorsModel(self._db) key_author_model.setUp() key_author_model.execute() authors = self._db.get_authors_by_domain(Domains.MICROBLOG) posts = self._db.get_posts_by_domain(Domains.MICROBLOG) parameters = {"authors": authors, "posts": posts} self._key_author_score_feature_generator = KeyAuthorScoreFeatureGenerator( self._db, **parameters) self._key_author_score_feature_generator.execute() self._author_features = self._db.get_author_features_by_author_guid( author_guid=self._author_guid) self._author_features_dict = self._create_author_features_dictionary( self._author_features)
def setUp(self): self._db = DB() self._db.setUp() self._domain = u'test' self._posts = [] self._authors = [] self._texts = []
def testDBSetUp(self): from sqlalchemy.engine.reflection import Inspector db = DB() db.setUp() session = db.Session() inspector = Inspector.from_engine(db.engine) self.assertTrue("posts" in set(inspector.get_table_names()))
def setUp(self): self.config = getConfig() self._db = DB() self._db.setUp() self.timeline_overlap = TimelineOverlapVisualizationGenerator() author1 = Author() author1.name = 'acquired_user' author1.domain = 'Microblog' author1.author_guid = 'acquired_user' author1.author_screen_name = 'acquired_user' author1.author_full_name = 'acquired_user' author1.author_osn_id = 1 author1.created_at = datetime.datetime.now() author1.missing_data_complementor_insertion_date = datetime.datetime.now( ) author1.xml_importer_insertion_date = datetime.datetime.now() author1.author_type = 'bad_actor' author1.author_sub_type = 'acquired' self._db.add_author(author1) for i in range(1, 11): post1 = Post() post1.post_id = 'bad_post' + str(i) post1.author = 'acquired_user' post1.guid = 'bad_post' + str(i) post1.date = datetime.datetime.now() post1.domain = 'Microblog' post1.author_guid = 'acquired_user' post1.content = 'InternetTV love it' + str(i) post1.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post1) author = Author() author.name = 'TestUser1' author.domain = 'Microblog' author.author_guid = 'TestUser1' author.author_screen_name = 'TestUser1' author.author_full_name = 'TestUser1' author.author_osn_id = 2 author.created_at = datetime.datetime.now() author.missing_data_complementor_insertion_date = datetime.datetime.now( ) author.xml_importer_insertion_date = datetime.datetime.now() self._db.add_author(author) for i in range(1, 11): post = Post() post.post_id = 'TestPost' + str(i) post.author = 'TestUser1' post.guid = 'TestPost' + str(i) post.date = datetime.datetime.now() post.domain = 'Microblog' post.author_guid = 'TestUser1' post.content = 'InternetTV love it' + str(i) post.xml_importer_insertion_date = datetime.datetime.now() self._db.addPost(post) self._db.commit()
def setUp(self): self._db = DB() self._db.setUp() self._posts = [] self._claim_dictionary = {} self._authors = [] self._add_author(u'test author') self._preprocess_visualization = ClaimToTopicConverter(self._db)
def setUp(self): self.normalize_actor_guid = "00f888bdfe92039ccbc440ab27b7804040f195e9dc367bc077270033" self.not_normalize_author_guid = "0cc3fd06f73d6613dec1e4e31bcd7c4efd430df3b00dd7fe092cfa5b" self._db = DB() self._db.setUp() self.rank_app_importer = RankAppImporter(self._db) self.rank_app_importer.setUp()
def setUp(self): self._db = DB() self._db.setUp() self.fake_news_snopes_importer = FakeNewsSnopesImporter(self._db) self.fake_news_snopes_importer._input_csv_file = 'data/input/FakeNewsSnopesImporter/Fake_News_Snopes_V3.csv' self.fake_news_snopes_importer.execute() self.keywords_generator = KeywordsGenerator(self._db)
def setUp(self): # TestBase.setUp(self) self.config = getConfig() self._db = DB() self._db.setUp() self._tumblr_importer = TumblrImporter(self._db) self._tumblr_importer.execute()
def setUp(self): self._config_parser = getConfig() self._db = DB() self._db.setUp() self._model = Word_Embedding_Differential_Feature_Generator(self._db) self._posts = [] self._author = None self._set_author(u'test_user')
def setUp(self): self._db = DB() self._db.setUp() self._data_handler = Data_Handler(self._db, 'author_type') self._authors_to_author_features_dict = {} self._fill_empty= True self._remove_features = [] self._select_features = [] self._label_text_to_value = {'good':0,'bad':1}
def __init__(self, db): AbstractController.__init__(self, db) self._working_app_number = self._config_parser.eval( self.__class__.__name__, "working_app_number") self._maximal_get_friend_ids_requests_in_window = self._config_parser.eval( self.__class__.__name__, "maximal_get_friend_ids_requests_in_window") self._maximal_get_follower_ids_requests_in_window = self._config_parser.eval( self.__class__.__name__, "maximal_get_follower_ids_requests_in_window") self._maximal_get_user_requests_in_window = self._config_parser.eval( self.__class__.__name__, "maximal_get_user_requests_in_window") self._maximal_user_ids_allowed_in_single_get_user_request = self._config_parser.eval( self.__class__.__name__, "maximal_user_ids_allowed_in_single_get_user_request") self._num_of_twitter_status_id_requests_without_checking = self._config_parser.eval( self.__class__.__name__, "num_of_twitter_status_id_requests_without_checking") self._num_of_twitter_timeline_requests_without_checking = self._config_parser.eval( self.__class__.__name__, "num_of_twitter_timeline_requests_without_checking") self._max_tweet_ids_allowed_in_single_get_tweets_by_tweet_ids_request = self._config_parser.eval( self.__class__.__name__, "max_tweet_ids_allowed_in_single_get_tweets_by_tweet_ids_request") self._max_num_of_tweet_ids_requests_without_checking = self._config_parser.eval( self.__class__.__name__, "max_num_of_tweet_ids_requests_without_checking") self._num_of_get_friend_ids_requests = 0 self._num_of_get_follower_ids_requests = 0 self._num_of_get_timeline_statuses = 0 self._num_of_twitter_status_id_requests = 0 self._num_of_twitter_timeline_requests = 0 self._num_of_get_tweet_ids_requests = 0 self._total_author_connections = [] print("Creating TwitterApiRequester") self._twitter_api_requester = TwitterApiRequester( self._working_app_number) # self._find_source_twitter_id() logging.info("Setup DB...") print("Setup DB...") self._db = DB() self._db.setUp()
def setUp(self): self.bad_actor_guid = "e2f8a58933d5e673d9c673c442cea1b73e9732d27a0f13472fde19f0" self.good_actor_guid = "0a2f4a19fb5066c3a67fc9b3325515b8bf0db66b7fec92b63da564a9" self._db = DB() self._db.setUp() self.app_importer = AppImporter(self._db) self.app_importer.setUp() self._domain = self.app_importer._domain self.app_importer.execute()
def setUp(self): self._db = DB() self._db.setUp() self._posts = [] self._author = None self._init_authors() self._init_posts() self._init_claims() self._reddit_post_by_claim_feature_generator = RedditPostByClaimFeatureGenerator( self._db, **self._get_params()) self._reddit_author_by_claim_feature_generator = RedditAuthorByClaimFeatureGenerator( self._db, **self._get_params())
def setUp(self): TestBase.setUp(self) self.config = getConfig() from DB.schema_definition import DB self.db = DB() self.db.setUp() self.social_network_crawler = Twitter_Rest_Api(self.db) self.xml_importer = XMLImporter(self.db) self.create_author_table = CreateAuthorTables(self.db) self._targeted_twitter_author_ids = self.config.eval('BadActorsCollector', "targeted_twitter_author_ids") self._targeted_twitter_post_ids = self.config.eval('BadActorsCollector', "targeted_twitter_post_ids") self._bad_actor_collector = BadActorsCollector(self.db)
def testDoubleExecute(self): import sys sys.argv = [sys.argv[0], 'config.ini'] db = DB() db.setUp() db.execute(getConfig().get("DEFAULT","start_date")) getTablesQuerys=["select * from posts","select * from authors","select * from topics","select * from author_citations","select * from authors_boost_stats","select * from post_citations","select * from posts_representativeness","select * from posts_to_pointers_scores","select * from posts_to_topic","select * from visualization_windows"] listNumTablesRows=[] for tableQuery in getTablesQuerys: listNumTablesRows.append(db.session.execute(tableQuery).scalar()) db.setUp() db.execute(getConfig().get("DEFAULT","start_date")) listNumTablesRows2=[] for tableQuery in getTablesQuerys: listNumTablesRows2.append(db.session.execute(tableQuery).scalar()) self.assertListEqual(listNumTablesRows,listNumTablesRows2,"the two executions had different results")
def setUp(self): self._config_parser = getConfig() self._db = DB() self._db.setUp() # self._Word_Embedding_Model_Creator.execute(None) self._is_load_wikipedia_300d_glove_model = True self._wikipedia_model_file_path = "data/input/glove/test_glove.6B.300d_small.txt" self._table_name = "wikipedia_model_300d" self._word_vector_dict_full_path = "data/output/word_embedding/" self._word_vector_dict = {} self._author = None self._set_author(u'test_user') self._counter = 0 self._posts = []
def setUp(self): self._db = DB() self._db.setUp() self.author_guid = u"author_guid" author = Author() author.author_guid = self.author_guid author.author_full_name = u'author' author.name = u'author_name' author.author_screen_name = u'author_screen_name' author.domain = u'Microblog' author.statuses_count = 10 author.friends_count = 5 author.followers_count = 6 author.favourites_count = 8 author.author_sub_type = u"bot" author.author_type = u"bad" author.created_at = u"2017-06-17 05:00:00" author.default_profile = True author.default_profile_image = True author.verified = True self._db.add_author(author) post = Post() post.author = self.author_guid post.author_guid = self.author_guid post.content = u"content" post.title = u"title" post.domain = u"domain" post.post_id = u"post_id" post.guid = post.post_id post.date = convert_str_to_unicode_datetime("2017-06-14 05:00:00") post.created_at = post.date self._db.addPost(post) self._db.session.commit() self.feature_prefix = u"AccountPropertiesFeatureGenerator_" self.account_properties_feature_generator = AccountPropertiesFeatureGenerator( self._db, **{ 'authors': [author], 'posts': { self.author_guid: [post] } }) self.account_properties_feature_generator.execute()
def setUp(self): TestBase.setUp(self) self.config = getConfig() self._start_date = self.config.eval("DEFAULT", "start_date") #self._end_date = self.config.get("DEFAULT", "end_date") self._tsv_files_path = self.config.get("TumblrImporter", "tsv_test_files_boost_score_feature_generator") self._db = DB() self._db.setUp() self._tumblr_parser = TumblrImporter(self._db) self._tumblr_parser.setUp(self._tsv_files_path) self._tumblr_parser.execute() self._author_guid = "f0f4bb42-3fed-322a-b71a-681179d47ea1" self._original_post_id = "130277126878" self._original_post_url = "http://tmblr.co/ZBqKGn1vL7pBS1" self._non_origin_post_id1 = "130277126879" self._non_origin_post_url1 = "http://tmblr.co/ZBqKGn1vL7pBS2" self._non_origin_post_id2 = "130277126880" self._non_origin_post_url2 = "http://tmblr.co/ZBqKGn1vL7pBS3" self._non_origin_post_id3 = "130277126881" self._non_origin_post_url3 = "http://tmblr.co/ZBqKGn1vL7pBS4" self._tumblr_parser.setUp(self._tsv_files_path) self._tumblr_parser.execute() self._create_post_citations() boost_score_model = BoostAuthorsModel(self._db) boost_score_model.execute(self._start_date) authors = self._db.get_authors_by_domain(Domains.MICROBLOG) posts = self._db.get_posts_by_domain(Domains.MICROBLOG) parameters = {"authors": authors, "posts": posts} self._boost_score_feature_generator = BoostScoresFeatureGenerator(self._db, **parameters) self._boost_score_feature_generator.execute() self._author_features = self._db.get_author_features_by_author_guid(author_guid=self._author_guid) self._author_features_dict = self._create_author_features_dictionary(self._author_features)
def __init__(self): config_parser = getConfig() logging.config.fileConfig(getConfig().get("DEFAULT", "logger_conf_file")) self._db = DB() self._db.setUp() self._acquired_bad_authors = [] self._suspected_authors = [] self.common_posts_threshold = config_parser.eval( self.__class__.__name__, "common_posts_threshold") self.output_path = config_parser.eval(self.__class__.__name__, "output_path") self.output_dir = config_parser.eval(self.__class__.__name__, "output_dir") # self.unlabeled_prediction_threshold = config_parser.eval(self.__class__.__name__, "unlabeled_prediction_threshold") if not os.path.exists(self.output_path + "/" + self.output_dir): os.makedirs(self.output_path + "/" + self.output_dir) self._source_author_destination_author_num_of_mutual_posts_dict = {}
def __init__(self): config_parser = getConfig() logging.config.fileConfig(getConfig().get("DEFAULT", "logger_conf_file")) logger = logging.getLogger(getConfig().get("DEFAULT", "logger_name")) logger.info("Start Execution ... ") self._missing_retweets_not_retrived_from_vico_file_name = config_parser.get( self.__class__.__name__, "missing_retweets_not_retrived_from_vico_file_name") self._missing_tweets_not_retrived_from_vico_file_name = config_parser.get( self.__class__.__name__, "missing_tweets_not_retrived_from_vico_file_name") self._retweets_retrieved_from_vico_file_name = config_parser.get( self.__class__.__name__, "retweets_retrieved_from_vico_file_name") self._tweets_retrieved_from_vico_file_name = config_parser.get( self.__class__.__name__, "tweets_retrieved_from_vico_file_name") self._path = config_parser.get(self.__class__.__name__, "path") self._backup_path = config_parser.get(self.__class__.__name__, "backup_path") self._csv_header = config_parser.eval(self.__class__.__name__, "csv_header") self._csv_header_bad_actors_vico_retrieved_posts = config_parser.eval( self.__class__.__name__, "csv_header_bad_actors_vico_retrieved_posts") targeted_twitter_post_ids = config_parser.get( "BadActorsCollector", "targeted_twitter_post_ids") self._targeted_twitter_post_ids = create_ids_from_config_file( targeted_twitter_post_ids) self._original_statuses = config_parser.eval(self.__class__.__name__, "original_statuses") self._csv_importer = PostCSVExporter() self._social_network_crawler = Twitter_Rest_Api() self._db = DB() self._db.setUp()
def setUp(self): self._db = DB() self._db.setUp() self._posts = [] self._author = None
from dataset_builder.feature_extractor.topic_feature_generator import SinglePostLDA moduleNames["SinglePostLDA"] = SinglePostLDA ############################################################### ## SETUP ############################################################### logging.config.fileConfig(getConfig().get("DEFAULT", "Logger_conf_file")) domain = getConfig().get("DEFAULT", "domain") logging.info("Start Execution ... ") logging.info("SETUP global variables") window_start = getConfig().eval("DEFAULT", "start_date") logging.info("CREATE pipeline") db = DB() moduleNames["DB"] = lambda x: x pipeline = [] for module in getConfig().sections(): if moduleNames.get(module): pipeline.append(moduleNames.get(module)(db)) logging.info("SETUP pipeline") for module in pipeline: logging.info("setup module: {0}".format(module)) T = time.time() module.setUp() T = time.time() - T for module in pipeline: