def get(self): verifier = self.request.get('oauth_verifier') request_token_key = self.request.get('oauth_token') if not verifier or not request_token_key: # user declined self.finish(None) return # look up the request token request_token = models.OAuthRequestToken.get_by_id(request_token_key) if request_token is None: raise exc.HTTPBadRequest('Invalid oauth_token: %s' % request_token_key) # generate and store the final token tp = tumblpy.Tumblpy(app_key=appengine_config.TUMBLR_APP_KEY, app_secret=appengine_config.TUMBLR_APP_SECRET, oauth_token=request_token_key, oauth_token_secret=request_token.token_secret) auth_token = tp.get_authorized_tokens(verifier) auth_token_key = auth_token['oauth_token'] auth_token_secret = auth_token['oauth_token_secret'] # get the user's blogs # http://www.tumblr.com/docs/en/api/v2#user-methods tp = TumblrAuth._api_from_token(auth_token_key, auth_token_secret) logging.debug('Fetching user/info') try: resp = tp.post('user/info') except BaseException, e: util.interpret_http_exception(e) raise
def _api_from_token(key, secret): """Returns a tumblpy.Tumblpy. """ assert TUMBLR_APP_KEY and TUMBLR_APP_SECRET, \ "Please fill in the tumblr_app_key and tumblr_app_secret files in your app's root directory." return tumblpy.Tumblpy(app_key=TUMBLR_APP_KEY, app_secret=TUMBLR_APP_SECRET, oauth_token=key, oauth_token_secret=secret)
def get(self): # lookup the request token token_key = self.request.get('oauth_token') token = TumblrOAuthRequestToken.get_by_key_name(token_key) if token is None: raise exc.HTTPBadRequest('Invalid oauth_token: %s' % token_key) # generate and store the final token tp = tumblpy.Tumblpy(app_key=TUMBLR_APP_KEY, app_secret=TUMBLR_APP_SECRET, oauth_token=token_key, oauth_token_secret=token.secret) auth_token = tp.get_authorized_tokens( self.request.params['oauth_verifier']) final_token = auth_token['oauth_token'] final_secret = auth_token['oauth_token_secret'] TumblrOAuthFinalToken.new(final_token, final_secret) # get the user's blogs # http://www.tumblr.com/docs/en/api/v2#user-methods tp = tumblpy.Tumblpy(app_key=TUMBLR_APP_KEY, app_secret=TUMBLR_APP_SECRET, oauth_token=final_token, oauth_token_secret=final_secret) resp = tp.post('user/info') logging.debug(resp) user = resp['user'] hostnames = [util.domain_from_link(b['url']) for b in user['blogs']] hostnames = util.trim_nulls(hostnames) # titles = [b[title] for b in user['blogs']] # redirect so that refreshing the page doesn't try to regenerate the oauth # token, which won't work. self.redirect('/?' + urllib.urlencode( { 'tumblr_username': user['name'], 'tumblr_hostnames': hostnames, # 'tumblr_titles': titles, 'oauth_token': auth_token['oauth_token'], }, True))
def _api_from_token(key, secret): """Returns a tumblpy.Tumblpy. """ assert ( appengine_config.TUMBLR_APP_KEY and appengine_config.TUMBLR_APP_SECRET ), ("Please fill in the tumblr_app_key and tumblr_app_secret files in " "your app's root directory.") return tumblpy.Tumblpy(app_key=appengine_config.TUMBLR_APP_KEY, app_secret=appengine_config.TUMBLR_APP_SECRET, oauth_token=key, oauth_token_secret=secret)
def post(self): tp = tumblpy.Tumblpy(app_key=TUMBLR_APP_KEY, app_secret=TUMBLR_APP_SECRET) auth_props = tp.get_authentication_tokens( callback_url=OAUTH_CALLBACK_URL) # store the request token for later use in the callback handler TumblrOAuthRequestToken.new(auth_props['oauth_token'], auth_props['oauth_token_secret']) auth_url = auth_props['auth_url'] logging.info('Generated request token, redirecting to Tumblr: %s', auth_url) self.redirect(auth_url)
def redirect_url(self, state=None): assert TUMBLR_APP_KEY and TUMBLR_APP_SECRET, \ "Please fill in the tumblr_app_key and tumblr_app_secret files in your app's root directory." tp = tumblpy.Tumblpy(app_key=TUMBLR_APP_KEY, app_secret=TUMBLR_APP_SECRET) auth_props = tp.get_authentication_tokens( callback_url=self.request.host_url + self.to_path) # store the request token for later use in the callback handler models.OAuthRequestToken(id=auth_props['oauth_token'], token_secret=auth_props['oauth_token_secret'], state=state).put() return auth_props['auth_url']
def __init__(self): self.stream = TwitterStream( auth=OAuth(ACCESS_KEY, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET), api_version='1.1') self.twitter = Twitter( auth=OAuth(ACCESS_KEY, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET), api_version='1.1') self.tmblr = tumblpy.Tumblpy(app_key=TUMBLR_KEY, app_secret=TUMBLR_SECRET, oauth_token=TOKEN_KEY, oauth_token_secret=TOKEN_SECRET )
def publish_post(self, post): """Publishes a post. Args: post: post entity Returns: string, the Tumblr post id """ # TODO: expose as option # Attach these tags to the Tumblr posts. POST_TAGS = 'freedom.io' activity = post.to_activity() obj = activity['object'] date = util.parse_iso8601(activity['published']) location = obj.get('location') logging.info('Publishing post %s', obj['id']) # extract title title = obj.get('title') if not title: first_phrase = re.search('^[^,.:;?!]+', obj.get('content', '')) if first_phrase: title = first_phrase.group() elif location and 'displayName' in location: title = 'At ' + location['displayName'] else: title = date.date().isoformat() # date is UTC (ie GMT), formatted e.g. '2012-01-14 12:00:15 GMT' if date.utcoffset(): date = date - date.utcoffset() datestr_utc = date.strftime('%Y-%m-%d %H:%M:%S GMT') # post params: http://www.tumblr.com/docs/en/api/v2#posting body = post.render_html() params = { 'type': 'text', # 'tags': POST_TAGS, # TODO: ugh, tumblr doesn't let you create a post with a date more than an # hour off of the current time. bleh. # https://groups.google.com/d/msg/tumblr-api/CYLno2Q60sU/6tR1Xe56TiIJ # 'date': datestr_utc, 'format': 'html', # 'title': title, 'body': body, } # photo image_url = obj.get('image', {}).get('url') if obj.get('objectType') == 'photo' and image_url: params.update({ 'type': 'photo', 'source': image_url, 'caption': body, }) del params['body'] # del params['title'] # post! tp = tumblpy.Tumblpy(app_key=TUMBLR_APP_KEY, app_secret=TUMBLR_APP_SECRET, oauth_token=self.token_key, oauth_token_secret=self.token_secret) logging.info('Creating post with params: %r', params) resp = tp.post('post', blog_url=self.hostname(), params=params) return str(resp['id'])
def init_client(): client = tumblpy.Tumblpy(oauth_config['YOUR_CONSUMER_KEY'], oauth_config['YOUR_CONSUMER_SECRET'], oauth_config['OAUTH_TOKEN'], oauth_config['OAUTH_TOKEN_SECRET']) return client
def scrape_tumblr(username, url_to_scrape, database_name, number, offset, limit=20, url_type='blog'): # Default offset if offset == None or offset == 0: offset = 20 # Set authorization authorization = tumblpy.Tumblpy(app_key='APP KEY HERE', app_secret='APP SECRET HERE') # Connect to database print('Connecting to {0}'.format(database_name)) conn = sqlite3.connect(database_name) c = conn.cursor() # Start scraping print('Scraping : {0}'.format(url_to_scrape)) number_found = 0 post_count = 0 while number_found < number: # Get tumblr posts print('Checking posts: {0} : {1}'.format(post_count * limit + offset, (1 + post_count) * limit + offset)) # Check url is correct, authorize if url_type == 'blog': posts = authorization.get('posts', blog_url=url_to_scrape, params={'limit': limit, 'offset': int(post_count) * limit + offset}) post_count += 1 for p in posts['posts']: # Check for posts that don't have a photo and skip if(not('photos' in p)): continue # Check for posts that have multiple photos and skip if(len(p['photos']) != 1): continue # Check for posts that don't have tags and skip if(len(p['tags']) == 0): continue number_found += 1 # Set scraped info note_count = p['note_count'] tags = [y.strip().lower() for x in p['tags'] for y in x.split('\n') ] image_url = p['photos'][0]['original_size']['url'] print('Image Found at: {2}, ' 'Image number: {1}, ' 'Tags are: {3}'.format(username, number_found, image_url, '#' + ' #'.join(tags))) # Add scraped data to database add_tags(c, tags) add_photo(c, image_url, note_count) link_tags_photo(c, tags, image_url) conn.commit() conn.close()
def init_client(): return tumblpy.Tumblpy(helper.get_config('TUMBLR', 'consumer_key'), helper.get_config('TUMBLR', 'consumer_secret'), helper.get_config('TUMBLR', 'token'), helper.get_config('TUMBLR', 'token_secret'))
def scrape_tumblr(username, url_to_scrape, database_name, number, offset, limit=20, url_type='blog'): def getconfig(): if not os.path.isfile('config/tumblyconfig.ini'): print('You do not appear to have a config file' ' let\'s create one') key = input('Please enter an app key: ') secret = input('Please enter an app secret: ') put_config('config/tumblyconfig.ini', key, secret) print('config file created') return (key, secret) else: config_pull = get_config('config/tumblyconfig.ini') app_key = str(config_pull[0]) app_secret = str(config_pull[1]) return (app_key, app_secret) # Default offset if offset is None or offset == 0: offset = 20 # Default number if number is None: number = 1 # Set authorization app_key, app_secret = getconfig() authorization = tumblpy.Tumblpy(app_key=app_key, app_secret=app_secret) # Connect to database print('Connecting to {0}'.format(database_name)) conn = create_check_database(database_name) c = conn.cursor() # Start scraping print('Scraping : {0}'.format(url_to_scrape)) number_found = 0 post_count = 0 while number_found < number: # Get tumblr posts print('Checking posts: {0} : {1}'.format(post_count * limit + offset, (1 + post_count) * limit + offset)) # Check url is correct, authorize if url_type == 'blog': posts = authorization.get('posts', blog_url=url_to_scrape, params={'limit': limit, 'offset': int(post_count) * limit + offset}) post_count += 1 for p in posts['posts']: if(number_found < number): # Check for posts that don't have a photo and skip if(not('photos' in p)): continue # Check for posts that have multiple photos and skip if(len(p['photos']) != 1): continue # Check for posts that don't have tags and skip if(len(p['tags']) == 0): continue number_found += 1 # Set scraped info note_count = p['note_count'] tags = [y.strip().lower() for x in p['tags'] for y in x.split('\n') ] image_url = p['photos'][0]['original_size']['url'] print('Image Found at: {2}, ' 'Image number: {1}, ' 'Tags are: {3}'.format(username, number_found, image_url, '#' + ' #'.join(tags))) # Add scraped data to database add_tags(c, tags) add_photo(c, image_url, note_count) link_tags_photo(c, tags, image_url) conn.commit() conn.close()