def create_authors_for_deleting_tests(self):
        self.guid1 = u'83d5812f-ff13-46d8-8c1c-3f17a48c239f'
        author = Author()
        author.name = u'author1'
        author.domain = unicode(self._domain)
        author.author_guid = unicode(self.guid1)
        author.author_type = u'bad_actor'
        author.author_sub_type = u'crowdturfer'
        self.db.add_author(author)


        self.guid2 = u'08fffd68-52f9-45dd-a1ea-7c2a1b0206c4'
        author = Author()
        author.name = u'author2'
        author.domain = unicode(self._domain)
        author.author_guid = unicode(self.guid2)
        author.author_type = u'bad_actor'
        author.author_sub_type = None
        self.db.add_author(author)


        self.guid3 = u'a041d99d-7adc-47ad-a32b-ac24c1e43c03'
        author = Author()
        author.name = u'author3'
        author.domain = self._domain
        author.author_guid = self.guid3
        author.author_type = u'bad_actor'
        author.author_sub_type = u'bot'
        self.db.add_author(author)


        self.guid4 = u'06bc3c1b-0350-428f-b66c-7d476f442643'
        author = Author()
        author.name = u'author4'
        author.domain = self._domain
        author.author_guid = self.guid4
        author.author_type = u'good_actor'
        author.author_sub_type = None
        self.db.add_author(author)


        self.guid5 = u'c5c1d938-1196-4bab-9f5e-23092c7be053'
        author = Author()
        author.name = u'author5'
        author.domain = self._domain
        author.author_guid = self.guid5
        author.author_type = u'bad_actor'
        author.author_sub_type = u'acquired'
        self.db.add_author(author)
        self.db.session.commit()
Exemplo n.º 2
0
 def get_about_info_from_users(self):
     self._facebook_login()
     authors = []
     for author_osn_id in self.osn_ids:
         author = Author()
         author.domain = self._domain
         author.author_osn_id = author_osn_id
         author.author_type = 'User'
         author.education = 'User Blocked'
         author.professional_skills = 'User Blocked'
         author.past_residence = 'User Blocked'
         author.birth_day = 'User Blocked'
         author.gender = 'User Blocked'
         author.gender = 'User Blocked'
         author.email = 'User Blocked'  # Need to add the rest of the features with User Blocked as default.
         author.work = 'User Blocked'
         self.driver.get('https://www.facebook.com/' + author_osn_id)
         a_element = self.driver.find_element_by_xpath(
             "//a[@class='_2nlw _2nlv']")
         href_attribute = a_element.get_attribute('href')
         name = a_element.text  # Extracting name
         unique_user_name = self._parse_unique_user_name(href_attribute)
         author.name = name
         author.author_screen_name = unique_user_name
         author.author_guid = commons.compute_author_guid_by_osn_id(
             author_osn_id)
         authors.append(author)
     self._get_about_info_for_authors(authors)
     self._db.addPosts(authors)
Exemplo n.º 3
0
 def get_liked_pages_from_user(self):
     """
     Method gets liked pages from users in the config.ini osn_ids.
     Assumption: The users in the osn_ids fields in config.ini ARE NOT IN THE DB
     """
     self._facebook_login()
     for osn_id in self.osn_ids:
         author = Author()
         self.driver.get('https://www.facebook.com/' + osn_id)
         time.sleep(2)
         a_element = self.driver.find_element_by_xpath(
             "//a[@class='_2nlw _2nlv']")
         href_attribute = a_element.get_attribute('href')
         name = a_element.text  # Extracting name
         unique_user_name = self._parse_unique_user_name(href_attribute)
         author.name = name
         author.author_screen_name = unique_user_name
         author.author_osn_id = osn_id
         author.author_guid = commons.compute_author_guid_by_osn_id(
             author.author_osn_id)
         author.domain = self._domain
         author.author_type = "User"
         pages_id_to_name_dict = self._get_liked_pages_from_user(
             osn_id, unique_user_name)
         page_authors = self._convert_pages_to_authors(
             pages_id_to_name_dict)
         page_authors.append(
             author
         )  # Adding user created author to page author list to add to DB
         self._db.addPosts(page_authors)
         connections = self._convert_page_and_user_to_connection(
             page_authors, author)
         self._db.addPosts(connections)
Exemplo n.º 4
0
    def _parse_author(self, json_content):
        author = Author()

        post_id = json_content["id"]
        author.name = post_id
        author.author_screen_name = post_id
        author.author_osn_id = post_id
        author.domain = self._domain

        str_post_timestamp = json_content["postTimestamp"]
        post_timestamp, str_post_date = self._get_str_and_date_formats(
            str_post_timestamp)
        author.created_at = str(str_post_date)

        post_guid = compute_post_guid(self._social_network_url, post_id,
                                      str_post_date)
        author.author_guid = post_guid

        if post_id in self._post_id_targeted_class_dict:
            targeted_class = self._post_id_targeted_class_dict[post_id]
            author.author_type = targeted_class

        post_media = json_content["postMedia"]
        if len(post_media) > 0:
            author.media_path = post_media[0]
        else:
            author.media_path = None

        author.notifications = self._dataset_type

        return author
Exemplo n.º 5
0
    def save_to_db(self):
        self._db.addPosts(self._posts)
        self._db.add_claim_connections(self._claim_tweet_connections)
        self._db.add_claim_connections(self._post_comment_connections)
        authors = []
        reddit_authors = []
        for i, redditor in enumerate(set(self._redditors)):
            for attempt in xrange(self._number_of_attempts):
                try:
                    self._retrive_reddit_author(authors, i, reddit_authors, redditor)
                    print("\rretrive reddit author {0}/{1}".format(i, len(self._redditors)), end='')
                except prawcore.exceptions.ServerError as e:
                    print('Server overload code 503, save to DB and sleep 30 sec and try again')
                    self.save_to_db()
                    time.sleep(5)  # 30)
                except Exception as e:
                    print('\r retrive reddit author {0}/{1} exception: {2}'.format(i, len(self._redditors), e.message), end='')
            print()
        for i, redditor in enumerate(set(self._deleted_redditors)):
            author = Author()
            author.name = "deleted"
            author.author_guid = compute_author_guid_by_author_name(redditor)
            author.domain = u'reddit'
            author.author_type = u'deleted'
            authors.append(author)

        self._db.add_authors_fast(authors)
        self._db.add_reddit_authors(reddit_authors)
        self._posts = []
        self._claim_tweet_connections = []
        self._redditors = []
        self._deleted_redditors = []
        self._post_comment_connections = []
Exemplo n.º 6
0
    def setUp(self):
        self.config = getConfig()
        self._db = DB()
        self._db.setUp()
        self.timeline_overlap = TimelineOverlapVisualizationGenerator()

        author1 = Author()
        author1.name = 'acquired_user'
        author1.domain = 'Microblog'
        author1.author_guid = 'acquired_user'
        author1.author_screen_name = 'acquired_user'
        author1.author_full_name = 'acquired_user'
        author1.author_osn_id = 1
        author1.created_at = datetime.datetime.now()
        author1.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author1.xml_importer_insertion_date = datetime.datetime.now()
        author1.author_type = 'bad_actor'
        author1.author_sub_type = 'acquired'
        self._db.add_author(author1)

        for i in range(1, 11):
            post1 = Post()
            post1.post_id = 'bad_post' + str(i)
            post1.author = 'acquired_user'
            post1.guid = 'bad_post' + str(i)
            post1.date = datetime.datetime.now()
            post1.domain = 'Microblog'
            post1.author_guid = 'acquired_user'
            post1.content = 'InternetTV love it' + str(i)
            post1.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post1)

        author = Author()
        author.name = 'TestUser1'
        author.domain = 'Microblog'
        author.author_guid = 'TestUser1'
        author.author_screen_name = 'TestUser1'
        author.author_full_name = 'TestUser1'
        author.author_osn_id = 2
        author.created_at = datetime.datetime.now()
        author.missing_data_complementor_insertion_date = datetime.datetime.now(
        )
        author.xml_importer_insertion_date = datetime.datetime.now()
        self._db.add_author(author)

        for i in range(1, 11):
            post = Post()
            post.post_id = 'TestPost' + str(i)
            post.author = 'TestUser1'
            post.guid = 'TestPost' + str(i)
            post.date = datetime.datetime.now()
            post.domain = 'Microblog'
            post.author_guid = 'TestUser1'
            post.content = 'InternetTV love it' + str(i)
            post.xml_importer_insertion_date = datetime.datetime.now()
            self._db.addPost(post)

        self._db.commit()
 def _add_author(self, author_guid, type=u"good_actor"):
     author = Author()
     author.author_guid = author_guid
     author.author_full_name = author_guid
     author.author_screen_name = author_guid
     author.name = author_guid
     author.domain = u'Microblog'
     author.author_type = type
     self._db.add_author(author)
     self._authors.append(author)
 def liker_to_author(self, liker, photo_id):
     author = Author()
     author.name = str(liker['username'])
     author.author_full_name = str(liker.get('realname', ""))
     author.domain = str(photo_id)
     author.created_at = str(liker["favedate"])
     author.author_osn_id = str(liker['nsid'])
     author.author_guid = compute_author_guid_by_author_name(
         author.author_osn_id)
     author.author_type = 'like'
     return author
    def _create_author(self, guid, author_type):
        author = Author()
        author.name = unicode(guid)
        author.domain = u'Microblog'
        author.author_guid = unicode(guid)
        author.author_screen_name = u'TestUser1'
        author.author_type = author_type
        author.domain = u'Restaurant'
        author.author_osn_id = 1

        self._authors_to_author_features_dict[author.author_guid]=[]
        self._db.add_author(author)
    def insert_suspended_accounts2(self):
        authors = []
        author_screen_names = []
        author_guids = []
        author_guid_author_screen_name_tuples = self._db.get_missing_authors_tuples(
        )
        author_guid_author_screen_name_tuples = list(
            author_guid_author_screen_name_tuples)
        num_of_suspended_accounts = len(author_guid_author_screen_name_tuples)
        for i, author_guid_author_screen_name_tuple in enumerate(
                author_guid_author_screen_name_tuples):
            msg = "\rInserting missing authors to authors table: {0}/{1}".format(
                i, num_of_suspended_accounts)
            print(msg, end="")

            author_guid = author_guid_author_screen_name_tuple[0]
            author_screen_name = author_guid_author_screen_name_tuple[1]
            if author_guid is None and author_screen_name is None:
                continue

            author = Author()

            author.author_screen_name = author_screen_name
            author.name = author_screen_name
            author_screen_names.append(author_screen_name)

            if author_guid is None:
                author_guid = compute_author_guid_by_author_name(
                    author_screen_name)
            author.author_guid = author_guid
            author_guids.append(author_guid)

            author.author_type = u"bad_actor"

            author.domain = self._domain

            authors.append(author)

        self._db.add_authors(authors)

        with open(self._output_path + "insert_suspended_accounts.csv",
                  'w') as output_file:
            writer = csv.writer(output_file)
            writer.writerow(
                "Number of suspended_users_added_to_authors_table is: " +
                str(num_of_suspended_accounts))

            author_screen_names_str = ','.join(author_screen_names)
            writer.writerow("author_screen_names: " + author_screen_names_str)

            author_guids_str = ','.join(author_guids)
            writer.writerow("author_guids: " + author_guids_str)
 def _json_user_to_db_author_converter(self, user, domain='Instagram_author'):
     author = Author()
     author.name = user['username']
     author.author_screen_name = author.name
     author.author_guid = compute_author_guid_by_author_name(author.name)
     author.domain = domain
     author.author_type = domain
     author.author_osn_id = user['id']
     author.author_full_name = user['full_name']
     author.description = user.setdefault('biography', None)
     author.url = 'https://www.instagram.com/' + author.author_screen_name
     author.profile_image_url = user['profile_pic_url']
     return author
 def commenter_to_author(self, commenter, photo_id):
     author = Author()
     author.name = str(commenter['authorname'])
     author.author_screen_name = str(commenter['path_alias'])
     author.author_full_name = str(commenter.get('realname', ""))
     author.url = str(commenter['permalink'])
     author.domain = str(photo_id)
     author.created_at = str(commenter["datecreate"])
     author.author_osn_id = str(commenter['author'])
     author.author_guid = compute_author_guid_by_author_name(
         author.author_osn_id)
     author.author_type = 'comment'
     return author
 def owner_to_author(self, jsonresponse):
     author = Author()
     author.name = str(jsonresponse['photo']['owner']['username'])
     author.author_full_name = str(
         jsonresponse['photo']['owner']['realname'])
     author.domain = str(jsonresponse['photo']['id'])
     author.created_at = str(jsonresponse['photo']['dates']["posted"])
     author.author_osn_id = str(jsonresponse['photo']['owner']['nsid'])
     author.author_guid = compute_author_guid_by_author_name(
         author.author_osn_id)
     author.location = str(jsonresponse['photo']['owner']['location'])
     author.author_type = 'owner'
     return author
Exemplo n.º 14
0
 def _convert_group_to_author(self):
     """
     Method takes given group_id from config.ini and creates Author object for it.
     """
     author = Author()
     author.name = self.get_group_name()
     author.author_osn_id = self._group_id
     author.author_guid = commons.compute_author_guid_by_osn_id(
         author.author_osn_id)
     author.domain = self._domain
     author.author_type = "Group"
     author.followers_count = self.get_group_number_of_members()
     author.author_sub_type = self.get_group_level_of_activity()
     return author
 def parse_row(self, row):
     try:
         author = Author()
         author.author_type = self.parse_type(row['Risk'])
         if author.author_type == -1:
             return None
         author.author_full_name = unicode(row['AKA Name']).encode('ascii', 'ignore').decode('ascii')
         author.location = unicode(row['City'] + ' ' + row['Address'])
         author.geo_enabled = unicode(row['Location'])
         author.name=row['DBA Name']
         author.created_at = unicode(row['Inspection Date'])
         return author
     except:
         return None
    def insert_suspended_accounts(self):
        authors = []
        author_screen_names = []
        author_guids = []
        missing_author_posts = self._db.get_posts_of_missing_authors()
        num_of_missing_posts = len(missing_author_posts)
        for i, missing_author_post in enumerate(missing_author_posts):
            msg = "\rInserting missing authors to authors table: {0}/{1}".format(
                i, num_of_missing_posts)
            print(msg, end="")

            author = Author()

            author_screen_name = missing_author_post.author
            author.author_screen_name = author_screen_name
            author.name = author_screen_name
            author_screen_names.append(author_screen_name)

            author_guid = compute_author_guid_by_author_name(
                author_screen_name)
            author.author_guid = author_guid
            author_guids.append(author_guid)

            author.author_type = u"bad_actor"

            author.domain = self._domain

            authors.append(author)

            # update the missing guid to post
            missing_author_post.author_guid = author_guid

        self._db.add_authors(authors)
        self._db.addPosts(missing_author_posts)

        with open(self._output_path + "insert_suspended_accounts.txt",
                  'w') as output_file:
            output_file.write(
                "Number of suspended_users_added_to_authors_table is: " +
                str(num_of_missing_posts))
            output_file.write("\n")

            author_screen_names_str = ','.join(author_screen_names)
            output_file.write("author_screen_names: " +
                              author_screen_names_str)
            output_file.write("\n")

            author_guids_str = ','.join(author_guids)
            output_file.write("author_guids: " + author_guids_str)
Exemplo n.º 17
0
 def _convert_pages_to_authors(self, pages_id_to_name_dict):
     """
     :return:a list of Author objects ready to be added to DB
     """
     authors = []
     for page_id in pages_id_to_name_dict:
         author = Author()
         author.name = pages_id_to_name_dict[page_id]
         author.author_osn_id = page_id
         author.author_guid = commons.compute_author_guid_by_osn_id(
             author.author_osn_id)
         author.domain = self._domain
         author.author_type = "Page"
         authors.append(author)
     return authors
    def _create_author(self, post, dataset_affiliation):
        author = Author()

        author_name = post.author
        author.name = author_name
        author.author_screen_name = author_name

        author_guid = compute_author_guid_by_author_name(author_name)
        author.author_osn_id = author_guid
        author.author_guid = author_guid
        author.domain = self._domain

        author.author_type = post.post_type
        author.notifications = dataset_affiliation

        return author
 def parse_row(self, row):
     try:
         author = Author()
         author.author_type = self.parse_type(row['risk_category'])
         if author.author_type == -1:
             return None
         author.author_full_name = unicode(row['business_name']).encode('ascii', 'ignore').decode('ascii')
         author.name = unicode(author.author_full_name).encode('ascii', 'ignore').decode('ascii')
         author.location = unicode(row['business_city'] + ' ' + row['business_address'])
         author.geo_enabled = unicode(row['business_location'])
         if author.geo_enabled is None or author.geo_enabled == "":
             return None
         author.created_at = unicode(row['inspection_date'])
         return author
     except:
         logging.info("error with row:"+ str(row))
         return None
Exemplo n.º 20
0
 def _convert_group_members_to_author(self, users_id_to_name_dict):
     """
     :return: a list of Author objects ready to be added to DB.
     """
     authors = []
     for user_id in users_id_to_name_dict:
         author = Author()
         name_and_screen_name = users_id_to_name_dict[user_id]
         author.name = name_and_screen_name[0]
         author.author_screen_name = name_and_screen_name[1]
         author.author_osn_id = user_id
         author.author_guid = commons.compute_author_guid_by_osn_id(
             author.author_osn_id)
         author.domain = self._domain
         author.author_type = "User"
         authors.append(author)
     return authors
    def setUp(self):
        self._db = DB()
        self._db.setUp()
        self.author_guid = u"author_guid"

        author = Author()
        author.author_guid = self.author_guid
        author.author_full_name = u'author'
        author.name = u'author_name'
        author.author_screen_name = u'author_screen_name'
        author.domain = u'Microblog'
        author.statuses_count = 10
        author.friends_count = 5
        author.followers_count = 6
        author.favourites_count = 8
        author.author_sub_type = u"bot"
        author.author_type = u"bad"
        author.created_at = u"2017-06-17 05:00:00"
        author.default_profile = True
        author.default_profile_image = True
        author.verified = True
        self._db.add_author(author)

        post = Post()
        post.author = self.author_guid
        post.author_guid = self.author_guid
        post.content = u"content"
        post.title = u"title"
        post.domain = u"domain"
        post.post_id = u"post_id"
        post.guid = post.post_id
        post.date = convert_str_to_unicode_datetime("2017-06-14 05:00:00")
        post.created_at = post.date
        self._db.addPost(post)

        self._db.session.commit()
        self.feature_prefix = u"AccountPropertiesFeatureGenerator_"
        self.account_properties_feature_generator = AccountPropertiesFeatureGenerator(
            self._db, **{
                'authors': [author],
                'posts': {
                    self.author_guid: [post]
                }
            })
        self.account_properties_feature_generator.execute()