示例#1
0
 def testLogarithm(self):
     db = DB()
     db.setUp()
     session = db.Session()
     recs = session.execute("select log(10) as q;")
     for rec in recs:
         self.assertAlmostEqual(rec[0],2.302585092994046)
示例#2
0
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self.tweets_crawler = OldTweetsCrawler(self._db)
     self.tweets_crawler._domain = u'Claim'
     self._add_author(u"author_guid")
     self._claims = {}
示例#3
0
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._start_date = self.config.eval("DEFAULT", "start_date")
        #self._end_date = self.config.get("DEFAULT", "end_date")

        self._tsv_files_path = self.config.get("TumblrImporter", "tsv_test_files_account_properties_feature_generator")

        self._db = DB()
        self._db.setUp()

        self._tumblr_parser = TumblrImporter(self._db)
        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        self._author_guid = "f0f4bb42-3fed-322a-b71a-681179d47ea1"

        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}

        account_properties_feature_generator = AccountPropertiesFeatureGenerator(self._db, **parameters)
        account_properties_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(author_guid=self._author_guid)
        self._author_features_dict = self._create_author_features_dictionary(self._author_features)
    def setUpClass(cls):
        """ get_some_resource() is slow, to avoid calling it for each test use setUpClass()
            and store the result as class variable
        """
        super(TestSub2VecModelCreator, cls).setUpClass()

        cls._db = DB()
        cls._db.setUp()
        cls.sub2vec_model_creator = Sub2VecModelCreator(cls._db)
        cls.sub2vec_feature_generator = Sub2VecFeatureGenerator(
            cls._db, **{
                'authors': [],
                'posts': {}
            })

        edges = [(0, 4), (2, 0), (1, 3), (3, 1), (0, 1), (1, 2), (4, 0),
                 (4, 3), (2, 3), (3, 0)]
        cls.connected_undirected_graph = cls.create_undirected_graph(
            5, edges, 'connected_undirected_graph')
        cls.unconnected_directed_graph = cls.connected_directed_graph(
            7, edges, 'unconnected_directed_graph')
        cls.connected_directed_graph = cls.connected_directed_graph(
            5, edges, 'connected_directed_graph')
        cls.unconnected_undirected_graph = cls.create_undirected_graph(
            7, edges, 'unconnected_undirected_graph')

        cls.add_graph_to_db(cls.connected_undirected_graph)
        cls.add_graph_to_db(cls.unconnected_directed_graph)
        cls.add_graph_to_db(cls.connected_directed_graph)
        cls.add_graph_to_db(cls.unconnected_undirected_graph)
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._posts = []
     self._author = None
     self.instagram_crawler = InstagramCrawler(self._db)
     self.instagram_crawler.insta_crawler.get_json = types.MethodType(f, self.instagram_crawler.insta_crawler)
示例#6
0
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._posts = []
     self._author = None
     self.reddit_crawler = RedditCrawler(self._db)
     self.reddit_crawler.reddit = RedditStub()
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._start_date = self.config.eval("DEFAULT", "start_date")
        #self._end_date = self.config.get("DEFAULT", "end_date")

        self._tsv_files_path = self.config.get(
            "TumblrImporter", "tsv_test_files_graph_feature_generator")

        self._db = DB()
        self._db.setUp()

        self._tumblr_parser = TumblrImporter(self._db)
        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        self._author_guid = u"f0f4bb42-3fed-322a-b71a-681179d47ea1"

        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}

        graph_types = self.config.eval("GraphFeatureGenerator_1",
                                       "graph_types")
        algorithms = self.config.eval("GraphFeatureGenerator_1", "algorithms")
        aggregations = self.config.eval("GraphFeatureGenerator_1",
                                        "aggregation_functions")
        neighborhood_sizes = self.config.eval("GraphFeatureGenerator_1",
                                              "neighborhood_sizes")
        distances_from_labeled_authors = self.config.eval(
            "GraphFeatureGenerator_1", "distances_from_labeled_authors")
        graph_directed = self.config.eval("GraphFeatureGenerator_1",
                                          "graph_directed")
        graph_weights = self.config.eval("GraphFeatureGenerator_1",
                                         "graph_weights")

        parameters.update({
            "graph_types":
            graph_types,
            "algorithms":
            algorithms,
            "aggregation_functions":
            aggregations,
            "neighborhood_sizes":
            neighborhood_sizes,
            "graph_directed":
            graph_directed,
            "graph_weights":
            graph_weights,
            "distances_from_labeled_authors":
            distances_from_labeled_authors
        })

        graph_feature_generator = GraphFeatureGenerator(self._db, **parameters)
        graph_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(
            author_guid=self._author_guid)
        self._author_features_dict = self._create_author_features_dictionary(
            self._author_features)
 def testDBSetUp(self):
     from sqlalchemy.engine.reflection import Inspector
     self.db = DB()
     self.db.setUp()
     self.inspector = Inspector.from_engine(self.db.engine)
     #self.assertTrue("posts" in set(self.inspector.get_table_names()))
     self.db.session.close()
示例#9
0
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._tsv_files_path = self.config.get(
            "TumblrImporter",
            "tsv_test_files_key_author_score_feature_generator")
        self._db = DB()
        self._db.setUp()
        self._tumblr_parser = TumblrImporter(self._db)

        self._author_guid = "150ff707-a6eb-3051-8f3c-f623293c714b"

        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        autotopic_executor = AutotopicExecutor(self._db)
        autotopic_executor.setUp()
        autotopic_executor.execute()

        key_author_model = KeyAuthorsModel(self._db)
        key_author_model.setUp()
        key_author_model.execute()

        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}

        self._key_author_score_feature_generator = KeyAuthorScoreFeatureGenerator(
            self._db, **parameters)
        self._key_author_score_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(
            author_guid=self._author_guid)
        self._author_features_dict = self._create_author_features_dictionary(
            self._author_features)
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._domain = u'test'
     self._posts = []
     self._authors = []
     self._texts = []
示例#11
0
 def testDBSetUp(self):
     from sqlalchemy.engine.reflection import Inspector
     db = DB()
     db.setUp()
     session = db.Session()
     inspector = Inspector.from_engine(db.engine)
     self.assertTrue("posts" in set(inspector.get_table_names()))
示例#12
0
    def setUp(self):
        self.config = getConfig()
        self._db = DB()
        self._db.setUp()
        self.timeline_overlap = TimelineOverlapVisualizationGenerator()

        author1 = Author()
        author1.name = 'acquired_user'
        author1.domain = 'Microblog'
        author1.author_guid = 'acquired_user'
        author1.author_screen_name = 'acquired_user'
        author1.author_full_name = 'acquired_user'
        author1.author_osn_id = 1
        author1.created_at = datetime.datetime.now()
        author1.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author1.xml_importer_insertion_date = datetime.datetime.now()
        author1.author_type = 'bad_actor'
        author1.author_sub_type = 'acquired'
        self._db.add_author(author1)

        for i in range(1, 11):
            post1 = Post()
            post1.post_id = 'bad_post' + str(i)
            post1.author = 'acquired_user'
            post1.guid = 'bad_post' + str(i)
            post1.date = datetime.datetime.now()
            post1.domain = 'Microblog'
            post1.author_guid = 'acquired_user'
            post1.content = 'InternetTV love it' + str(i)
            post1.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post1)

        author = Author()
        author.name = 'TestUser1'
        author.domain = 'Microblog'
        author.author_guid = 'TestUser1'
        author.author_screen_name = 'TestUser1'
        author.author_full_name = 'TestUser1'
        author.author_osn_id = 2
        author.created_at = datetime.datetime.now()
        author.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author.xml_importer_insertion_date = datetime.datetime.now()
        self._db.add_author(author)

        for i in range(1, 11):
            post = Post()
            post.post_id = 'TestPost' + str(i)
            post.author = 'TestUser1'
            post.guid = 'TestPost' + str(i)
            post.date = datetime.datetime.now()
            post.domain = 'Microblog'
            post.author_guid = 'TestUser1'
            post.content = 'InternetTV love it' + str(i)
            post.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post)

        self._db.commit()
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._posts = []
     self._claim_dictionary = {}
     self._authors = []
     self._add_author(u'test author')
     self._preprocess_visualization = ClaimToTopicConverter(self._db)
示例#14
0
    def setUp(self):
        self.normalize_actor_guid = "00f888bdfe92039ccbc440ab27b7804040f195e9dc367bc077270033"
        self.not_normalize_author_guid = "0cc3fd06f73d6613dec1e4e31bcd7c4efd430df3b00dd7fe092cfa5b"
        self._db = DB()
        self._db.setUp()

        self.rank_app_importer = RankAppImporter(self._db)
        self.rank_app_importer.setUp()
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self.fake_news_snopes_importer = FakeNewsSnopesImporter(self._db)
        self.fake_news_snopes_importer._input_csv_file = 'data/input/FakeNewsSnopesImporter/Fake_News_Snopes_V3.csv'
        self.fake_news_snopes_importer.execute()

        self.keywords_generator = KeywordsGenerator(self._db)
    def setUp(self):

        # TestBase.setUp(self)
        self.config = getConfig()

        self._db = DB()
        self._db.setUp()
        self._tumblr_importer = TumblrImporter(self._db)
        self._tumblr_importer.execute()
    def setUp(self):
        self._config_parser = getConfig()
        self._db = DB()
        self._db.setUp()

        self._model = Word_Embedding_Differential_Feature_Generator(self._db)

        self._posts = []
        self._author = None
        self._set_author(u'test_user')
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self._data_handler = Data_Handler(self._db, 'author_type')
        self._authors_to_author_features_dict = {}

        self._fill_empty= True
        self._remove_features = []
        self._select_features = []
        self._label_text_to_value = {'good':0,'bad':1}
示例#19
0
    def __init__(self, db):
        AbstractController.__init__(self, db)

        self._working_app_number = self._config_parser.eval(
            self.__class__.__name__, "working_app_number")

        self._maximal_get_friend_ids_requests_in_window = self._config_parser.eval(
            self.__class__.__name__,
            "maximal_get_friend_ids_requests_in_window")

        self._maximal_get_follower_ids_requests_in_window = self._config_parser.eval(
            self.__class__.__name__,
            "maximal_get_follower_ids_requests_in_window")

        self._maximal_get_user_requests_in_window = self._config_parser.eval(
            self.__class__.__name__, "maximal_get_user_requests_in_window")

        self._maximal_user_ids_allowed_in_single_get_user_request = self._config_parser.eval(
            self.__class__.__name__,
            "maximal_user_ids_allowed_in_single_get_user_request")

        self._num_of_twitter_status_id_requests_without_checking = self._config_parser.eval(
            self.__class__.__name__,
            "num_of_twitter_status_id_requests_without_checking")
        self._num_of_twitter_timeline_requests_without_checking = self._config_parser.eval(
            self.__class__.__name__,
            "num_of_twitter_timeline_requests_without_checking")

        self._max_tweet_ids_allowed_in_single_get_tweets_by_tweet_ids_request = self._config_parser.eval(
            self.__class__.__name__,
            "max_tweet_ids_allowed_in_single_get_tweets_by_tweet_ids_request")

        self._max_num_of_tweet_ids_requests_without_checking = self._config_parser.eval(
            self.__class__.__name__,
            "max_num_of_tweet_ids_requests_without_checking")

        self._num_of_get_friend_ids_requests = 0
        self._num_of_get_follower_ids_requests = 0
        self._num_of_get_timeline_statuses = 0
        self._num_of_twitter_status_id_requests = 0
        self._num_of_twitter_timeline_requests = 0
        self._num_of_get_tweet_ids_requests = 0
        self._total_author_connections = []

        print("Creating TwitterApiRequester")
        self._twitter_api_requester = TwitterApiRequester(
            self._working_app_number)

        # self._find_source_twitter_id()

        logging.info("Setup DB...")
        print("Setup DB...")
        self._db = DB()
        self._db.setUp()
示例#20
0
    def setUp(self):
        self.bad_actor_guid = "e2f8a58933d5e673d9c673c442cea1b73e9732d27a0f13472fde19f0"
        self.good_actor_guid = "0a2f4a19fb5066c3a67fc9b3325515b8bf0db66b7fec92b63da564a9"

        self._db = DB()
        self._db.setUp()

        self.app_importer = AppImporter(self._db)
        self.app_importer.setUp()
        self._domain = self.app_importer._domain
        self.app_importer.execute()
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._posts = []
     self._author = None
     self._init_authors()
     self._init_posts()
     self._init_claims()
     self._reddit_post_by_claim_feature_generator = RedditPostByClaimFeatureGenerator(
         self._db, **self._get_params())
     self._reddit_author_by_claim_feature_generator = RedditAuthorByClaimFeatureGenerator(
         self._db, **self._get_params())
示例#22
0
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        from DB.schema_definition import DB
        self.db = DB()
        self.db.setUp()
        self.social_network_crawler = Twitter_Rest_Api(self.db)
        self.xml_importer = XMLImporter(self.db)
        self.create_author_table = CreateAuthorTables(self.db)
        self._targeted_twitter_author_ids = self.config.eval('BadActorsCollector', "targeted_twitter_author_ids")

        self._targeted_twitter_post_ids = self.config.eval('BadActorsCollector', "targeted_twitter_post_ids")
        self._bad_actor_collector = BadActorsCollector(self.db)
示例#23
0
 def testDoubleExecute(self):
     import sys
     sys.argv = [sys.argv[0], 'config.ini']
     db = DB()
     db.setUp()
     db.execute(getConfig().get("DEFAULT","start_date"))
     getTablesQuerys=["select * from posts","select * from authors","select * from topics","select * from author_citations","select * from authors_boost_stats","select * from post_citations","select * from posts_representativeness","select * from posts_to_pointers_scores","select * from posts_to_topic","select * from visualization_windows"]
     listNumTablesRows=[]
     for tableQuery in getTablesQuerys:
         listNumTablesRows.append(db.session.execute(tableQuery).scalar())
     db.setUp()
     db.execute(getConfig().get("DEFAULT","start_date"))
     listNumTablesRows2=[]
     for tableQuery in getTablesQuerys:
         listNumTablesRows2.append(db.session.execute(tableQuery).scalar())
     self.assertListEqual(listNumTablesRows,listNumTablesRows2,"the two executions had different results")
    def setUp(self):
        self._config_parser = getConfig()
        self._db = DB()
        self._db.setUp()

        # self._Word_Embedding_Model_Creator.execute(None)
        self._is_load_wikipedia_300d_glove_model = True
        self._wikipedia_model_file_path = "data/input/glove/test_glove.6B.300d_small.txt"
        self._table_name = "wikipedia_model_300d"
        self._word_vector_dict_full_path = "data/output/word_embedding/"
        self._word_vector_dict = {}

        self._author = None
        self._set_author(u'test_user')
        self._counter = 0
        self._posts = []
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self.author_guid = u"author_guid"

        author = Author()
        author.author_guid = self.author_guid
        author.author_full_name = u'author'
        author.name = u'author_name'
        author.author_screen_name = u'author_screen_name'
        author.domain = u'Microblog'
        author.statuses_count = 10
        author.friends_count = 5
        author.followers_count = 6
        author.favourites_count = 8
        author.author_sub_type = u"bot"
        author.author_type = u"bad"
        author.created_at = u"2017-06-17 05:00:00"
        author.default_profile = True
        author.default_profile_image = True
        author.verified = True
        self._db.add_author(author)

        post = Post()
        post.author = self.author_guid
        post.author_guid = self.author_guid
        post.content = u"content"
        post.title = u"title"
        post.domain = u"domain"
        post.post_id = u"post_id"
        post.guid = post.post_id
        post.date = convert_str_to_unicode_datetime("2017-06-14 05:00:00")
        post.created_at = post.date
        self._db.addPost(post)

        self._db.session.commit()
        self.feature_prefix = u"AccountPropertiesFeatureGenerator_"
        self.account_properties_feature_generator = AccountPropertiesFeatureGenerator(
            self._db, **{
                'authors': [author],
                'posts': {
                    self.author_guid: [post]
                }
            })
        self.account_properties_feature_generator.execute()
示例#26
0
    def setUp(self):
        TestBase.setUp(self)
        self.config = getConfig()
        self._start_date = self.config.eval("DEFAULT", "start_date")
        #self._end_date = self.config.get("DEFAULT", "end_date")

        self._tsv_files_path = self.config.get("TumblrImporter", "tsv_test_files_boost_score_feature_generator")
        self._db = DB()
        self._db.setUp()

        self._tumblr_parser = TumblrImporter(self._db)
        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        self._author_guid = "f0f4bb42-3fed-322a-b71a-681179d47ea1"
        self._original_post_id = "130277126878"
        self._original_post_url = "http://tmblr.co/ZBqKGn1vL7pBS1"

        self._non_origin_post_id1 = "130277126879"
        self._non_origin_post_url1 = "http://tmblr.co/ZBqKGn1vL7pBS2"

        self._non_origin_post_id2 = "130277126880"
        self._non_origin_post_url2 = "http://tmblr.co/ZBqKGn1vL7pBS3"

        self._non_origin_post_id3 = "130277126881"
        self._non_origin_post_url3 = "http://tmblr.co/ZBqKGn1vL7pBS4"

        self._tumblr_parser.setUp(self._tsv_files_path)
        self._tumblr_parser.execute()

        self._create_post_citations()

        boost_score_model = BoostAuthorsModel(self._db)
        boost_score_model.execute(self._start_date)


        authors = self._db.get_authors_by_domain(Domains.MICROBLOG)
        posts = self._db.get_posts_by_domain(Domains.MICROBLOG)
        parameters = {"authors": authors, "posts": posts}

        self._boost_score_feature_generator = BoostScoresFeatureGenerator(self._db, **parameters)
        self._boost_score_feature_generator.execute()

        self._author_features = self._db.get_author_features_by_author_guid(author_guid=self._author_guid)
        self._author_features_dict = self._create_author_features_dictionary(self._author_features)
    def __init__(self):
        config_parser = getConfig()
        logging.config.fileConfig(getConfig().get("DEFAULT",
                                                  "logger_conf_file"))
        self._db = DB()
        self._db.setUp()
        self._acquired_bad_authors = []
        self._suspected_authors = []
        self.common_posts_threshold = config_parser.eval(
            self.__class__.__name__, "common_posts_threshold")
        self.output_path = config_parser.eval(self.__class__.__name__,
                                              "output_path")
        self.output_dir = config_parser.eval(self.__class__.__name__,
                                             "output_dir")
        # self.unlabeled_prediction_threshold = config_parser.eval(self.__class__.__name__, "unlabeled_prediction_threshold")

        if not os.path.exists(self.output_path + "/" + self.output_dir):
            os.makedirs(self.output_path + "/" + self.output_dir)
        self._source_author_destination_author_num_of_mutual_posts_dict = {}
示例#28
0
    def __init__(self):
        config_parser = getConfig()
        logging.config.fileConfig(getConfig().get("DEFAULT",
                                                  "logger_conf_file"))
        logger = logging.getLogger(getConfig().get("DEFAULT", "logger_name"))

        logger.info("Start Execution ... ")

        self._missing_retweets_not_retrived_from_vico_file_name = config_parser.get(
            self.__class__.__name__,
            "missing_retweets_not_retrived_from_vico_file_name")
        self._missing_tweets_not_retrived_from_vico_file_name = config_parser.get(
            self.__class__.__name__,
            "missing_tweets_not_retrived_from_vico_file_name")
        self._retweets_retrieved_from_vico_file_name = config_parser.get(
            self.__class__.__name__, "retweets_retrieved_from_vico_file_name")
        self._tweets_retrieved_from_vico_file_name = config_parser.get(
            self.__class__.__name__, "tweets_retrieved_from_vico_file_name")
        self._path = config_parser.get(self.__class__.__name__, "path")
        self._backup_path = config_parser.get(self.__class__.__name__,
                                              "backup_path")
        self._csv_header = config_parser.eval(self.__class__.__name__,
                                              "csv_header")
        self._csv_header_bad_actors_vico_retrieved_posts = config_parser.eval(
            self.__class__.__name__,
            "csv_header_bad_actors_vico_retrieved_posts")

        targeted_twitter_post_ids = config_parser.get(
            "BadActorsCollector", "targeted_twitter_post_ids")
        self._targeted_twitter_post_ids = create_ids_from_config_file(
            targeted_twitter_post_ids)

        self._original_statuses = config_parser.eval(self.__class__.__name__,
                                                     "original_statuses")

        self._csv_importer = PostCSVExporter()

        self._social_network_crawler = Twitter_Rest_Api()

        self._db = DB()
        self._db.setUp()
 def setUp(self):
     self._db = DB()
     self._db.setUp()
     self._posts = []
     self._author = None
from dataset_builder.feature_extractor.topic_feature_generator import SinglePostLDA
moduleNames["SinglePostLDA"] = SinglePostLDA

###############################################################
## SETUP
###############################################################
logging.config.fileConfig(getConfig().get("DEFAULT", "Logger_conf_file"))
domain = getConfig().get("DEFAULT", "domain")
logging.info("Start Execution ... ")
logging.info("SETUP global variables")

window_start = getConfig().eval("DEFAULT", "start_date")

logging.info("CREATE pipeline")
db = DB()
moduleNames["DB"] = lambda x: x
pipeline = []
for module in getConfig().sections():
    if moduleNames.get(module):
        pipeline.append(moduleNames.get(module)(db))

logging.info("SETUP pipeline")

for module in pipeline:
    logging.info("setup module: {0}".format(module))
    T = time.time()
    module.setUp()
    T = time.time() - T

for module in pipeline: