Exemplo n.º 1
0
def get_cached_entries():
    blogger_service = service.GDataService()
    blogger_service.service = 'blogger'
    blogger_service.server = 'www.blogger.com'
    blogger_service.ssl = False
    query = service.Query()
    query.feed = '/feeds/6752139154038265086/posts/default'
    query.max_results = 500

    bri_entries = []
    entries = []
    i = 0
    while 1:
        query.start_index = i * 500 + 1
        feed = blogger_service.Get(query.ToUri())
        logging.info('%d entries fetched, fetch number %d' %
                     (len(feed.entry), i + 1))
        entries.extend(feed.entry)

        if len(feed.entry) == 500:
            i += 1
        else:
            break

    logging.info('retrieved %d entries total' % len(entries))

    cachedentries = tuple(format_entry(e) for e in entries)

    return cachedentries
Exemplo n.º 2
0
    def updateLabels(self, labels, newLabels):

        query = service.Query()
        query.feed = self.postsUri
        query.max_results = 10000
        if labels:
            query.categories = labels

        feed = self.gdService.Get(query.ToUri())

        newCategory = []
        if newLabels:
            for label in newLabels:
                category = atom.Category(scheme=blogger.LABEL_SCHEME,
                                         term=label)
                newCategory.append(category)

        for entry in feed.entry:
            while len(entry.category) > 0:
                entry.category.pop()
            entry.category = newCategory
            editUri = entry.GetEditLink().href
            self.gdService.Put(entry, editUri)

        return newCategory
Exemplo n.º 3
0
    def handle_import(self, options):
        """
        Gets posts from Forumger.
        """

        forum_id = options.get("forum_id")
        if forum_id is None:
            raise CommandError("Usage is import_forumger %s" % self.args)

        try:
            from gdata import service
        except ImportError:
            raise CommandError("Could not import the gdata library.")

        forumger = service.GDataService()
        forumger.service = "forumger"
        forumger.server = "www.forumger.com"
        query = service.Query()
        query.feed = "/feeds/%s/posts/full" % forum_id
        query.max_results = 500
        try:
            feed = forumger.Get(query.ToUri())
        except service.RequestError, err:
            message = "There was a service error. The response was: " \
                "%(status)s %(reason)s - %(body)s" % err.message
            raise CommandError(message, forumger.server + query.feed,
                               err.message["status"])
Exemplo n.º 4
0
    def get_feeds(self, page_id, page_items):
        query = service.Query()
        query.feed = 'http://www.blogger.com/feeds/' + self.blog_id + '/posts/default'
        query.max_results = page_items
        query.start_index = ((page_id - 1) * page_items) + 1

        feed = self.blogger_service.Get(query.ToUri())
        feeds = []
        for entry in feed.entry:
            feeds.append({
                'id': self.get_post_id(entry),
                'title': entry.title.text,
                'link': self.get_post_link(entry),
                'published': parser.parse(entry.published.text),
                'updated': entry.updated.text,
                'author': entry.author[0].name.text,
                'content': entry.content.text,
                'comments': self.get_comments(entry)
            })

        blog = {
            'page_id': page_id,
            'post_id': None,
            'posts': feeds,
            'next_link': self.get_next_link(page_id, feed),
            'prev_link': self.get_prev_link(page_id, feed),
            'singlepost': False
        }
        return blog
Exemplo n.º 5
0
    def PrintPostsInDateRange(self, start_time, end_time):
        """This method displays the title and modification time for any posts that
    have been created or updated in the period between the start_time and
    end_time parameters.  The method creates the query, submits it to the
    GDataService, and then displays the results.
  
    Note that while the start_time is inclusive, the end_time is exclusive, so
    specifying an end_time of '2007-07-01' will include those posts up until
    2007-6-30 11:59:59PM.

    The start_time specifies the beginning of the search period (inclusive),
    while end_time specifies the end of the search period (exclusive).
    """

        # Create query and submit a request.
        query = service.Query()
        query.feed = '/feeds/' + self.blog_id + '/posts/default'
        query.updated_min = start_time
        query.updated_max = end_time
        query.orderby = 'updated'
        feed = self.service.Get(query.ToUri())

        # Print the results.
        print feed.title.text + " posts between " + start_time + " and " + end_time
        print feed.title.text
        for entry in feed.entry:
            if not entry.title.text:
                print "\tNo Title"
            else:
                print "\t" + entry.title.text
        print
Exemplo n.º 6
0
def PrintAllPosts(blogger_service, blog_id, max_results='99999'):
    query = service.Query()
    query.feed = '/feeds/' + blog_id + '/posts/default'
    query.max_results = max_results
    feed = blogger_service.Get(query.ToUri())

    if os.path.exists(os.getcwd() + "/" + feed.title.text + " Backup"):
        shutil.rmtree(os.getcwd() + "/" + feed.title.text + " Backup")
    os.mkdir(os.getcwd() + "/" + feed.title.text + " Backup")

    os.makedirs(os.getcwd() + "/" + feed.title.text + " Backup" + "/images/")

    for entry in feed.entry:
        print "Parsing images in " + entry.title.text
        os.makedirs(os.getcwd() + "/" + feed.title.text + " Backup" +
                    "/images/" + entry.title.text)

        html = BeautifulSoup(entry.content.text)
        images = html.findAll('img')

        for i in range(len(images)):
            print "Downloaded " + str(i) + " out of " + str(len(images))
            urllib.urlretrieve(
                images[i]["src"],
                os.getcwd() + "/" + feed.title.text + " Backup" + "/images/" +
                entry.title.text + "/" + images[i]["src"].split('/')[-1])
Exemplo n.º 7
0
def get_hrefs():
    blogger_service = service.GDataService()
    blogger_service.service = 'blogger'
    blogger_service.server = 'www.blogger.com'
    blogger_service.ssl = False
    query = service.Query()
    query.feed = '/feeds/6752139154038265086/posts/default'
    query.max_results = 500

    allhrefs = []
    i = 0
    while 1:
        query.start_index = i * 500 + 1
        feed = blogger_service.Get(query.ToUri())
        logging.info('%d urls fetched, fetch number %d' %
                     (len(feed.entry), i + 1))
        allhrefs.extend(entry.link[-1].href for entry in feed.entry)

        if len(feed.entry) == 500:
            i += 1
        else:
            break

    logging.info('retrieved %d urls total' % len(allhrefs))
    return allhrefs
Exemplo n.º 8
0
    def get_post(self, post_id):
        query = service.Query()
        query.feed = 'http://www.blogger.com/feeds/' + self.blog_id + '/posts/default/' + post_id
        entry = self.blogger_service.Get(query.ToUri())

        feeds = []
        feeds.append({
            'id': entry.id,
            'title': entry.title.text,
            'link': self.get_post_link(entry),
            'published': parser.parse(entry.published.text),
            'updated': entry.updated.text,
            'author': entry.author[0].name.text,
            'content': entry.content.text,
            'comments': self.get_comments(entry)
        })

        blog = {
            'page_id': -1,
            'post_id': post_id,
            'posts': feeds,
            'next_link': None,
            'prev_link': None,
            'singlepost': True
        }
        return blog
Exemplo n.º 9
0
    def getIndividualPost(self, blogID, postID):

        # Create query and submit a request.
        query = service.Query()
        query.feed = '/feeds/' + blogID + '/posts/default/' + postID
        feed = self.service.Get(query.ToUri())

        return feed
Exemplo n.º 10
0
def PrintUserBlogTitles(blogger_service):
    query = service.Query()
    query.feed = '/feeds/default/blogs'
    feed = blogger_service.Get(query.ToUri())

    print feed.title.text
    for entry in feed.entry:
        print "\t" + entry.title.text, "blog_id:", entry.GetSelfLink(
        ).href.split("/")[-1]
Exemplo n.º 11
0
    def PrintUserBlogTitles(self):
        query = service.Query()
        query.feed = '/feeds/default/blogs'
        feed = self.service.Get(query.ToUri())

        # Print the results.
        print feed.title.text
        for entry in feed.entry:
            print "\t" + entry.title.text
Exemplo n.º 12
0
 def GetBlogByTitle(self, title):
     query = service.Query()
     query.feed = '/feeds/default/blogs'
     feed = self.blogger_service.Get(query.ToUri())
     for entry in feed.entry:
         if entry.title.text == title:
             self.blog_id = entry.GetSelfLink().href.split("/")[-1]
             return entry
     print("Can't find blog with title : {0}".format(title))
     sys.exit(0)
Exemplo n.º 13
0
def PrintAllPosts(blogger_service, blog_id, max_results='99999'):
    ''' Grab blogger url feed and get ready for backup'''

    query = service.Query()
    query.feed = '/feeds/' + blog_id + '/posts/default'
    query.max_results = max_results
    feed = blogger_service.Get(query.ToUri())

    #DownloadImages(feed)
    DownloadArticles(feed)
Exemplo n.º 14
0
    def PrintUserBlogTitles(self):
        """Prints a list of all the user's blogs."""

        # Request the feed.
        query = service.Query()
        query.feed = '/feeds/default/blogs'
        feed = self.service.Get(query.ToUri())

        # Print the results.
        print feed.title.text
        for entry in feed.entry:
            print "\t" + entry.title.text
        print
Exemplo n.º 15
0
    def updatePost(self, user, password, startTime, endTime, newContent):

        self.blogerLogin(user, password)
        self.getOneBlog(0)

        # find the update entry.
        query = service.Query()
        query.feed = '/feeds/' + self.blogId + '/posts/default'
        query.published_min = startTime
        query.published_max = endTime
        feed = self.service.Get(query.ToUri())

        # should only have one entry.  anyway we will only get the first entry.
        theEntry = feed.entry[0]

        theEntry.content = atom.Content(content_type='html', text=newContent)
        self.gdService.Put(theEntry, theEntry.GetEditLink().href)
Exemplo n.º 16
0
    def getListPost(self, idBlog, iMaxNumPost, allDraft=None):
        # Si allDraft==True devolverá sólo los borradores, si es False caso las entradas que no sean borradores, en otro caso devuelve todo.
        query = service.Query()  #Creamos una query.
        query.feed = '/feeds/' + idBlog + '/posts/default'
        query.max_results = iMaxNumPost

        feed = self.service.GetFeed(
            query.ToUri())  # Lanzo la query a la petición.
        #feed = self.service.Get('/feeds/' + idBlog + '/posts/default')
        misEntradas = []
        if allDraft != None:
            for post in feed.entry:
                if allDraft and blogger.is_draft(post):
                    misEntradas = misEntradas + [post]
                elif (not allDraft) and (not blogger.is_draft(post)):
                    misEntradas = misEntradas + [post]
        else:
            for post in feed.entry:
                misEntradas = misEntradas + [post]
        return misEntradas
Exemplo n.º 17
0
 def run(self):
     blogs = []
     query = service.Query()
     query.feed = '/feeds/default/blogs'
     feed = self.service.Get(query.ToUri())
     for entry in feed.entry:
         blog_dict = {
             'id': entry.id.text,
             'title': entry.title.text,
             'updated': entry.updated.text,
         }
         for link in entry.link:
             rel = link.rel.split('#')[-1]
             href = link.href
             blog_dict[rel] = href
             if rel == 'self':
                 blog_id = href.split('/')[-1]
                 blog_dict['blog_id'] = blog_id
         blogs.append(blog_dict)
     self.queue.put_nowait(blogs)
Exemplo n.º 18
0
  def PrintPostsInLastWeek(self):
    """This method displays the title and modification time for any posts that
    have been created or updated in the period between the start_time and
    end_time parameters.  The method creates the query, submits it to the
    GDataService, and then displays the results.
  
    Note that while the start_time is inclusive, the end_time is exclusive, so
    specifying an end_time of '2007-07-01' will include those posts up until
    2007-6-30 11:59:59PM.
    The start_time specifies the beginning of the search period (inclusive),
    while end_time specifies the end of the search period (exclusive).
    """

    # Create query and submit a request.
    query = service.Query()
    query.feed = '/feeds/' + self.blog_id + '/posts/default'
    end_time =  datetime.strftime(datetime.now(), "%Y-%m-%d")
    start_time   =  datetime.strftime(datetime.now() + timedelta( days = -7 ), "%Y-%m-%d")    
    print  start_time  ,  end_time
    query.updated_min = start_time
    query.updated_max = end_time
    query.orderby = 'updated'
    feed = self.service.Get(query.ToUri())
    try: 
      import simplejson as json
    except:
      import json
    out  =  [ {'selected'   : False  ,    'updated_text'  :    entry.updated.text  or  ""  , 'updated_summary'  :    entry.summary.text   or  ""  ,
        'article_body'  :   entry.content.text or  ""  ,'article_title'  :    entry.article.text  or  ""  }
        for    entry  in   feed.entry   ]
    print json.dumps(out)


    
    html = html + template.render (templatepath + 'newsletter_start.html', {})
    html = html + template.render (templatepath + 'newsletter_js.html', {})               
    html = html + """<table width="500" class='out' border="1" data-dynamic="%s">""" % json.dumps(your_generated_data_dict)
    f =  open( 'c:/xampp/htdocs/newsletter.php'  , 'w')
    f.write(html)
    f.close()                
Exemplo n.º 19
0
    def GetPostByTitle(self, title):
        ''' Fetch a single post which matches the title most. If "all" or
      "recent" are given then fetches all or recents posts. 
      '''
        posts = list()
        if title != "all":
            feed = self.blogger_service.GetFeed('/feeds/' + self.blog_id +
                                                '/posts/default')
            for entry in feed.entry:
                if entry.title.text:
                    if title != "recent":
                        match = difflib.SequenceMatcher(
                            None, entry.title.text, title).ratio()
                        if match > 0.7:
                            print(" |- Found with title : {0} ".format(
                                entry.title.text))
                            posts.append(entry)
                            return posts
                        else:
                            pass
                    else:  # We want all recent posts
                        posts.append(entry)
                else:
                    pass  # Titleless post
            print("== Total {0} posts fetched . ".format(len(posts)))
            return posts
        else:  # fetch all
            query = service.Query()
            query.feed = '/feeds/' + self.blog_id + '/posts/default'
            query.published_min = '1980-01-01'
            query.published_max = time.strftime('%Y-%m-%d')
            feed = self.blogger_service.Get(query.ToUri())

            print(feed.title.text + " posts between " + query.published_min + " and " \
                + query.published_max)
            for entry in feed.entry:
                if entry.title.text:
                    posts.append(entry)
            return posts
Exemplo n.º 20
0
    def getPosts(self,
                 labels=None,
                 publishedDate=None,
                 orderby=None,
                 maxResults=25):

        posts = []

        query = service.Query()
        query.feed = self.postsUri

        # default sort option is updated.
        query.orderby = 'updated'
        if orderby:
            query.orderby = orderby

        # adding labels for the query.
        if labels:
            query.categories = labels

        # adding publication range for the query.
        if publishedDate:
            # the time format should be like this:
            # 2008-02-09T08:00:00-08:00
            query.published_min = publishedDate[0]
            query.published_max = publishedDate[1]

        query.max_results = maxResults

        feed = self.gdService.Get(query.ToUri())

        for entry in feed.entry:

            posts.append((entry.title.text, entry.GetSelfLink().href,
                          entry.GetAlternateLink().href))

        return posts
Exemplo n.º 21
0
    def __init__(self, email, password):
        # Authenticate using ClientLogin.
        self.service = service.GDataService(email, password)
        self.service.source = 'Blogger_Python_Sample-1.0'
        self.service.service = 'blogger'
        self.service.server = 'www.blogger.com'
        self.service.ProgrammaticLogin()
        self.blog_id = 0

        # Get the blog ID for http://pythonjobs.blogspot.com
        query = service.Query()
        query.feed = '/feeds/default/blogs'
        feed = self.service.Get(query.ToUri())

        for entry in feed.entry:
            print "\t" + entry.title.text
            print entry.link[0].href

            # if entry.link[0].href=='http://pythonjobs.blogspot.com/':
            if entry.link[
                    0].href == 'http://www.blogger.com/feeds/18362312542208032325/blogs/5503040385101187323':
                self_link = entry.GetSelfLink()
                self.blog_id = self_link.href.split('/')[-1]
                break
    def handle_import(self, options):
        """
        Gets posts from Blogger.
        """

        blog_id = options.get("blog_id")
        if blog_id is None:
            raise CommandError("Usage is import_blogger %s" % self.args)

        try:
            from gdata import service
        except ImportError:
            raise CommandError("Could not import the gdata library.")

        blogger = service.GDataService()
        blogger.service = "blogger"
        blogger.server = "www.blogger.com"

        start_index = 1
        processed_posts = []
        new_posts = 1

        while new_posts:
            new_posts = 0

            query = service.Query()
            query.feed = "/feeds/%s/posts/full" % blog_id
            query.max_results = 500
            query.start_index = start_index

            try:
                feed = blogger.Get(query.ToUri())
            except service.RequestError as err:
                message = ("There was a service error. The response was: "
                           "%(status)s %(reason)s - %(body)s" % err.message)
                raise CommandError(message, blogger.server + query.feed,
                                   err.message["status"])

            for (i, entry) in enumerate(feed.entry):
                # this basically gets the unique post ID from the URL to itself
                # and pulls the ID off the end.
                post_id = entry.GetSelfLink().href.split("/")[-1]

                # Skip duplicate posts. Important for the last query.
                if post_id in processed_posts:
                    continue

                title = entry.title.text
                content = entry.content.text
                # this strips off the time zone info off the end as we want UTC
                clean_date = entry.published.text[:re.search(
                    r"\.\d{3}", entry.published.text).end()]

                published_date = self.parse_datetime(clean_date)

                # TODO - issues with content not generating correct <P> tags

                tags = [tag.term for tag in entry.category]
                post = self.add_post(title=title,
                                     content=content,
                                     pub_date=published_date,
                                     tags=tags)

                # get the comments from the post feed and then add them to
                # the post details
                comment_url = "/feeds/%s/%s/comments/full?max-results=1000"
                comments = blogger.Get(comment_url % (blog_id, post_id))

                for comment in comments.entry:
                    email = comment.author[0].email.text
                    author_name = comment.author[0].name.text
                    # Strip off the time zone info off the end as we want UTC
                    clean_date = comment.published.text[:re.search(
                        r"\.\d{3}", comment.published.text).end()]

                    comment_date = self.parse_datetime(clean_date)

                    website = ""
                    if comment.author[0].uri:
                        website = comment.author[0].uri.text
                    body = comment.content.text

                    # add the comment as a dict to the end of the comments list
                    self.add_comment(
                        post=post,
                        name=author_name,
                        email=email,
                        body=body,
                        website=website,
                        pub_date=comment_date,
                    )

                processed_posts.append(post_id)
                new_posts += 1

            start_index += 500
Exemplo n.º 23
0
  def PrintPostsInLastWeek(self):
    """This method displays the title and modification time for any posts that
    have been created or updated in the period between the start_time and
    end_time parameters.  The method creates the query, submits it to the
    GDataService, and then displays the results.
  
    Note that while the start_time is inclusive, the end_time is exclusive, so
    specifying an end_time of '2007-07-01' will include those posts up until
    2007-6-30 11:59:59PM.
    The start_time specifies the beginning of the search period (inclusive),
    while end_time specifies the end of the search period (exclusive).
    """

    # Create query and submit a request.
    query = service.Query()
    query.feed = '/feeds/' + self.blog_id + '/posts/default'
    end_time =  datetime.strftime(datetime.now(), "%Y-%m-%d")
    start_time   =  datetime.strftime(datetime.now() + timedelta( days = -7 ), "%Y-%m-%d")    
    print  start_time  ,  end_time
    query.updated_min = start_time
    query.updated_max = end_time
    query.orderby = 'updated'
    feed = self.service.Get(query.ToUri())
    fd =  open( 'c:/xampp/htdocs/template.html'  , 'r')
    content   = fd.read()
    fContent =""
    fd.close()
    rowContent =""
    count =0
    for entry in feed.entry:
        count+=1 
        try :
            article_update =entry.updated.text
        except:
            article_update =   ""
        try :
            article_summary =entry.summary.text
        except:
            article_summary =   ""

        try :
            article_body =  entry.content.text
        except:
            article_body =   ""
        
        try :
            article_title = entry.title.text
        except:
            article_title =   "" 
            
        rowContent  += """<table>
                   <tr>
                   <td><input type="checkbox" name="articleList[%d]['selOpt']" value="on" /></td>

                   <td><input type="text" name="<?php echo "articleList[%d]['date']"; ?>" value="%s" /></td>

                   <td><input type="text" name="<?php echo "articleList[%d]['title']"; ?>" value="%s"  /></td>

                   <td><input type="text" name="<?php echo "articleList[%d]['body']"; ?>"  value="%s"  /></td>

                   </tr>

                   </table>"""%( count  ,  count  ,  article_update  , count , article_title ,  count ,  article_body )   
    fContent += content % (  rowContent , contentPhp)
    print  fContent 
    f =  open( 'c:/xampp/htdocs/newsletter.php'  , 'w')
    f.write(fContent)
    f.close()                
Exemplo n.º 24
0
# login...
selfservice = service.GDataService('user', 'password')
selfservice.source = 'Blogger_Python_Sample-1.0'
selfservice.service = 'blogger'
selfservice.server = 'www.blogger.com'
selfservice.ProgrammaticLogin()

# Get the blog ID for the first blog.
feed = selfservice.Get('/feeds/default/blogs')
self_link = feed.entry[0].GetSelfLink()
if self_link:
    selfblog_id = self_link.href.split('/')[-1]

# find the update entry.
query = service.Query()
query.feed = '/feeds/' + selfblog_id + '/posts/default'
query.published_min = "2008-02-17T14:00:00-08:00"
query.published_max = "2008-02-18T00:00:00-08:00"
#query.orderby = 'published'
feed = selfservice.Get(query.ToUri())

print query.ToUri()
print feed.title.text
print feed.entry
for entry in feed.entry:
    print '\t' + entry.title.text
# should have only one entry.
theEntry = feed.entry[0]

# update the post on server side.
Exemplo n.º 25
0
    def handle_import(self, options):
        """
        Gets posts from Blogger.
        """

        blog_id = options.get("blog_id")
        if blog_id is None:
            raise CommandError("Usage is import_blogger %s" % self.args)

        try:
            from gdata import service
        except ImportError:
            raise CommandError("Could not import the gdata library.")

        blogger = service.GDataService()
        blogger.service = "blogger"
        blogger.server = "www.blogger.com"
        query = service.Query()
        query.feed = "/feeds/%s/posts/full" % blog_id
        query.max_results = 500
        try:
            feed = blogger.Get(query.ToUri())
        except service.RequestError as err:
            message = "There was a service error. The response was: " \
                "%(status)s %(reason)s - %(body)s" % err.message
            raise CommandError(message, blogger.server + query.feed,
                               err.message["status"])

        for (i, entry) in enumerate(feed.entry):
            # this basically gets the unique post ID from the URL to itself
            # and pulls the ID off the end.
            post_id = entry.GetSelfLink().href.split("/")[-1]
            title = entry.title.text
            content = entry.content.text
            #this strips off the time zone info off the end as we want UTC
            published_date = datetime.strptime(
                entry.published.text[:-6],
                "%Y-%m-%dT%H:%M:%S.%f") - timedelta(seconds=timezone)

            #TODO - issues with content not generating correct <P> tags

            tags = [tag.term for tag in entry.category]
            post = self.add_post(title=title,
                                 content=content,
                                 pub_date=published_date,
                                 tags=tags)

            # get the comments from the post feed and then add them to
            # the post details
            ids = (blog_id, post_id)
            comment_url = "/feeds/%s/%s/comments/full?max-results=1000" % ids
            comments = blogger.Get(comment_url)

            for comment in comments.entry:
                email = comment.author[0].email.text
                author_name = comment.author[0].name.text
                #this strips off the time zone info off the end as we want UTC
                comment_date = datetime.strptime(
                    comment.published.text[:-6],
                    "%Y-%m-%dT%H:%M:%S.%f") - timedelta(seconds=timezone)
                website = ""
                if comment.author[0].uri:
                    website = comment.author[0].uri.text
                body = comment.content.text

                # add the comment as a dict to the end of the comments list
                self.add_comment(post=post,
                                 name=author_name,
                                 email=email,
                                 body=body,
                                 website=website,
                                 pub_date=comment_date)