Пример #1
0
def main():
    # get the articles that ac_acticle_number % 7 == 0 
    # so that we are able to process only 1/7 articles every day, and 7/7 every week
    sql = "SELECT ac_article_number "\
        "FROM `ac_article` "\
        "ORDER BY ac_article_date DESC LIMIT 3000 "
    n = cursor.execute(sql)
    articles_to_process = cursor.fetchall()
    link = '/api/?type=xml&current=yes&charset=utf8&id='
    i = 0
    for article_to_process in articles_to_process:
        i+=1
        retfile = ac_core.gethtmlfile( 'www.acfun.tv', link+str(article_to_process[0]) )
        if retfile == "ERROR":
            # this aricle may be removed
            mark_removed(article_to_process[0])
        else:
            try:
                info = parseString(retfile)
            except:
                print str(article_to_process[0])+" xml broken"
                continue
            keywords = ''
            if i<600:
                keywords = getText(info, 'keywords')
            update_article(article_to_process[0], (getText(info, 'arctitle'),
                getText(info, 'typeid'), getText(info, 'memberID'), 
                getText(info, 'description'), getText(info, 'click'), getText(info, 'stow'),
                keywords))
    cursor.close()
    conn.close()

    print "\ndone"
Пример #2
0
def main():
    sql = "SELECT ac_article_id, ac_article_link FROM `ac_article` WHERE ac_author_name = '' and ac_author_id = 0"
    n = cursor.execute(sql)
    articles_to_process = cursor.fetchall()
    for ac in articles_to_process:
        print ac[1]
        retfile = ac_core.gethtmlfile( 'www.acfun.tv', '/v/'+ac[1]+'/' )
        fix_article(retfile, ac[0])
    cursor.close()
    conn.close()

    print "\ndone"
Пример #3
0
def main():
    # get the articles that ac_acticle_number % 7 == 0
    # so that we are able to process only 1/7 articles every day, and 7/7 every week
    sql = "SELECT ac_article_number " "FROM `ac_article` " "ORDER BY ac_article_date ASC"
    n = cursor.execute(sql)
    articles_to_process = cursor.fetchall()
    link = "/api/?type=xml&current=yes&charset=utf8&id="
    i = 0
    for article_to_process in articles_to_process:
        i += 1
        retfile = ac_core.gethtmlfile("www.acfun.tv", link + str(article_to_process[0]))
        if retfile == "ERROR":
            # this aricle may be removed
            mark_removed(article_to_process[0])
        else:
            try:
                info = parseString(retfile)
            except:
                print str(article_to_process[0]) + " xml broken"
                continue
            keywords = ""
            if i < 600:
                keywords = getText(info, "keywords")
            update_article(
                article_to_process[0],
                (
                    getText(info, "arctitle"),
                    getText(info, "typeid"),
                    getText(info, "memberID"),
                    getText(info, "description"),
                    getText(info, "click"),
                    getText(info, "stow"),
                    keywords,
                ),
            )
    cursor.close()
    conn.close()

    print "\ndone"
Пример #4
0
def main():
    end = 120
    if len(sys.argv)==2:
        sql = "SELECT COUNT(*) AS count FROM ac_article WHERE ac_article_category = %s"%sys.argv[1]
        cursor.execute(sql)
        row = cursor.fetchone()
        end = row[0]/15
        if end == 0:
            return 
        else:
            end += 10

    category = ('1', '8', '9', '10', '13', '14')
    for cate in category:
        link = '/plus/list.php?typeid='+cate+'&PageNo='
        for i in range(1, end):
            retfile = ac_core.gethtmlfile( 'www.acfun.tv', link+str(i) )
            process_text(retfile.decode('GB18030'), cate)

    cursor.close()
    conn.close()

    print "\ndone"