Exemplo n.º 1
0
def _getfeed(site):
    """ Get the RSS feed of site """
    #
    # Note: here we must not change the encoding of body, because the
    # SAX library already performs this operation.
    #
    bodyvec = []
    status = subr_rss.fetch(site, bodyvec, [])
    if status != 0:
        return
    body = "".join(bodyvec)
    return body
Exemplo n.º 2
0
def _getfeed(site):
    """ Get the RSS feed of site """
    #
    # Note: here we must not change the encoding of body, because the
    # SAX library already performs this operation.
    #
    bodyvec = []
    status = subr_rss.fetch(site, bodyvec, [])
    if status != 0:
        return
    body = "".join(bodyvec)
    return body
Exemplo n.º 3
0
def process_site(site, noisy):
    """ Process the feeds of a site """

    logging.info("")
    logging.info("* site: %s", site)
    logging.info("")

    result = subr_rss.fetch(site, noisy=noisy)
    if not result or not result[0]:
        return
    body = result[0]

    if "<rss" not in body:
        handler = sax_atom.AtomHandler()
    else:
        handler = sax_rss.RssHandler()
    sax.parseString(body, handler)

    content = zip(handler.links, handler.pub_dates)
    for link, date in content:

        if date[0] < 2013:
            continue
        if date[1] != 5:
            continue
        if date[2] < 15:
            continue

        logging.info("")
        logging.info("- <%s>", link)
        logging.info("")

        folder = subr_misc.make_post_folder(date, site)
        subr_misc.mkdir_recursive_idempotent(folder)

        time.sleep(random.randrange(5, 8))
        link = subr_bitly.shorten(link, noisy=noisy)

        filename = subr_misc.bitlink_to_filename(link)
        pname = os.sep.join([folder, filename])
        if os.path.isfile(pname):
            logging.info("main: file already exists: %s", pname)
            continue

        time.sleep(random.randrange(5, 8))
        _, body = subr_http.fetch_url(link, noisy=noisy)

        filep = open(pname, "w")
        filep.write(body)
        filep.close()
Exemplo n.º 4
0
def process_site(site, noisy):
    """ Process the feeds of a site """

    logging.info("")
    logging.info("* site: %s", site)
    logging.info("")

    result = subr_rss.fetch(site, noisy=noisy)
    if not result or not result[0]:
        return
    body = result[0]

    if "<rss" not in body:
        handler = sax_atom.AtomHandler()
    else:
        handler = sax_rss.RssHandler()
    sax.parseString(body, handler)

    content = zip(handler.links, handler.pub_dates)
    for link, date in content:

        if date[0] < 2013:
            continue
        if date[1] != 5:
            continue
        if date[2] < 15:
            continue

        logging.info("")
        logging.info("- <%s>", link)
        logging.info("")

        folder = subr_misc.make_post_folder(date, site)
        subr_misc.mkdir_recursive_idempotent(folder)

        time.sleep(random.randrange(5, 8))
        link = subr_bitly.shorten(link, noisy=noisy)

        filename = subr_misc.bitlink_to_filename(link)
        pname = os.sep.join([folder, filename])
        if os.path.isfile(pname):
            logging.info("main: file already exists: %s", pname)
            continue

        time.sleep(random.randrange(5, 8))
        _, body = subr_http.fetch_url(link, noisy=noisy)

        filep = open(pname, "w")
        filep.write(body)
        filep.close()