Пример #1
0
 def __init__(self, input_file='scrape_data.csv', config='options.ini'):
     self.input_file = input_file
     self.data_dict = DefaultOrderedDict(list)
     ts = time()
     self.stamp = datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
     self.read_csv()
     options = os.path.join(os.path.dirname(__file__), config)
     self.config = ConfigParser()
     self.config.read(options)
 def __init__(self, input_file='scrape_data.csv', config='options.ini'):
     self.input_file = input_file
     self.data_dict = DefaultOrderedDict(list)
     ts = time()
     self.stamp = datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
     self.read_csv()
     options = os.path.join(os.path.dirname(__file__),config)
     self.config = ConfigParser()
     self.config.read(options)
class Social_Media_Tracker(object):
    """
    Track a single entity across 3 diferent types of social media:
    Facebook, Twitter and Youtube.

    As of now you can retrieve:
    -No. of likes from facebook
    -No. of followers from Twitter
    -No. of Subscribers from Youtube

    This class takes a CSV input file with the following header:
    twitter_id,  facebook_id, youtube_id,  handle, scraping frequency (times/hour),
    likes, followers, subscribers, video_views.

    It also has a configuration file (.ini) for FB and Twitter App keys.
    The Youtube pinger method has dependencies that reside in current directory.
    """

    def __init__(self, input_file='scrape_data.csv', config='options.ini'):
        self.input_file = input_file
        self.data_dict = DefaultOrderedDict(list)
        ts = time()
        self.stamp = datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
        self.read_csv()
        options = os.path.join(os.path.dirname(__file__),config)
        self.config = ConfigParser()
        self.config.read(options)

    def read_csv(self):
        """Loads input csv file into memory as dict"""
        with open(self.input_file, 'r') as f:
            self.data = csv.DictReader(f)
            for row in self.data:
                for key,value in row.iteritems():
                    self.data_dict[key].append(value)

    def write_csv(self):
        """Writes back to csv file"""
        with open('test.csv', 'wb') as f:
            writer = csv.writer(f)
            values = self.data_dict.values()

            writer.writerow(self.data_dict.keys())
            for vals in range(len(values[0])):
                writer.writerow([row[vals] for row in values])

    def log_error(self,error):
        """Logs error into txt file for debugging"""
        with open('error_log.txt', 'a') as f:
            f.write(str(error)+'\n')

    def ping_facebook(self):
        """pings facebook for number of likes in current time"""

        #Facebook API authentication procedure
        token = self.config.get('facebook','token')
        graph = facebook.GraphAPI(token)

        #Look up number of likes from facebook_id written in csv
        for i,ID in enumerate(self.data_dict['facebook_id']):
            node = graph.get_object(ID)
            try:
                print 'facebook likes:', node['likes'], i
                if i>0:
                    self.data_dict['likes '+self.stamp].append(node['likes'])
                else:
                    self.data_dict['likes '+self.stamp] = [node['likes']]

            except Exception, error:
                self.log_error(error)
        print self.data_dict
Пример #4
0
class Social_Media_Tracker(object):
    """
    Track a single entity across 3 diferent types of social media:
    Facebook, Twitter and Youtube.

    As of now you can retrieve:
    -No. of likes from facebook
    -No. of followers from Twitter
    -No. of Subscribers from Youtube

    This class takes a CSV input file with the following header:
    twitter_id,  facebook_id, youtube_id,  handle, scraping frequency (times/hour),
    likes, followers, subscribers, video_views.

    It also has a configuration file (.ini) for FB and Twitter App keys.
    The Youtube pinger method has dependencies that reside in current directory.
    """
    def __init__(self, input_file='scrape_data.csv', config='options.ini'):
        self.input_file = input_file
        self.data_dict = DefaultOrderedDict(list)
        ts = time()
        self.stamp = datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
        self.read_csv()
        options = os.path.join(os.path.dirname(__file__), config)
        self.config = ConfigParser()
        self.config.read(options)

    def read_csv(self):
        """Loads input csv file into memory as dict"""
        with open(self.input_file, 'r') as f:
            self.data = csv.DictReader(f)
            for row in self.data:
                for key, value in row.iteritems():
                    self.data_dict[key].append(value)

    def write_csv(self):
        """Writes back to csv file"""
        with open('test.csv', 'wb') as f:
            writer = csv.writer(f)
            values = self.data_dict.values()

            writer.writerow(self.data_dict.keys())
            for vals in range(len(values[0])):
                writer.writerow([row[vals] for row in values])

    def log_error(self, error):
        """Logs error into txt file for debugging"""
        with open('error_log.txt', 'a') as f:
            f.write(str(error) + '\n')

    def ping_facebook(self):
        """pings facebook for number of likes in current time"""

        #Facebook API authentication procedure
        token = self.config.get('facebook', 'token')
        graph = facebook.GraphAPI(token)

        #Look up number of likes from facebook_id written in csv
        for i, ID in enumerate(self.data_dict['facebook_id']):
            node = graph.get_object(ID)
            try:
                print 'facebook likes:', node['likes'], i
                if i > 0:
                    self.data_dict['likes ' + self.stamp].append(node['likes'])
                else:
                    self.data_dict['likes ' + self.stamp] = [node['likes']]

            except Exception, error:
                self.log_error(error)
        print self.data_dict