Exemplo n.º 1
0
def review_read_loop(app_id, page_num, review_type, review_sort_order):
    params = {
        'id':app_id, 
        'reviewSortOrder':review_sort_order,
        'reviewType':review_type,
        'pageNum':page_num
        }
    param = urllib.urlencode(params)
    url = '/store/getreviews'
    print param, url
    ### only for test, this part
    #if int(page_num)>2:
    #    db_review.db_execute_g(db_sql.sql_review_read_status_update, (app_id, ))
    #    return 404, page_num
    ###
    status, body = android_https_post(url, param)
    if status == 404:
        print '==: 404'
        db_review.db_execute_g(db_sql.sql_review_read_status_update, (app_id, ))
        return status, page_num
    if status != 200:
        print 'app review https connection error: %s'%(str(status))
        return status, page_num
        #raise Exception('app getreview ajax status != 200')
    body = body.lstrip(")]}'").strip()
    try:
        review_read(app_id, body)
        db_review.db_execute_g(db_sql.sql_review_read_update, (page_num, app_id, ))
        page_num = int(page_num) + 1
    except Exception as e:
        err.except_p(e)
    return status, page_num
def main():
    db_init()
    try:
        categories_read_main() ## comment this if run after first time
        category_read_main() ## comment this if run after first time
        app_read_main()
    except Exception as e:
        err.except_p(e)
def main():
    db_app.db_init()
    finish = False
    while finish == False:
        try:
            finish = video_read_main()
        except Exception as e:
            err.except_p(e)
def app_read_main():
    finish = True
    try:
        finish = app_read_main_temp()
        return finish
    except Exception as e:
        err.except_p('e')
        finish = False
        return finish
def main():
    db_init()
    finish = False ## comment this if run after first time
    while finish == False:
        try:
            categories_read_main()
            finish = category_read_main()
        except Exception as e:
            err.except_p(e)
def main():
    db_init()
    try:
        language_read_main() ## comment this if run after first time
        home_read_main() ## comment this if run after first time
        home_read_max_main() ## comment this if run after first time
        cate_read_main()
        cate_link_read_main()
    except Exception as e:
        err.except_p(e)
Exemplo n.º 7
0
def task_app_review_get():
    page = page_get(client_id, 'task', '', '')
    if page is None:
        err.except_p('page is none')
    try:
        j = json.loads(page)
        print 'TASK review:', len(j)
        app_review.c_task_review_read_main(j)
    except Exception as e:
        err.except_p(e)
def main():
    db_app.db_init()
    finish = False
    review_read_main_init()
    finish = review_read_main()
    while finish == False:
        try:
            finish = review_read_main()
        except Exception as e:
            err.except_p(e)   
def google_plus_read_main():
    finish = True
    rows = db_app.db_get_g(db_sql.sql_app_google_plus_get, ())
    for row in rows:
        finish = False
        app_id = row[0]
        google_plus_href = row[1]
        try:
            google_plus_read(app_id, google_plus_href, )
            util.sleep()
        except Exception as e:
            err.except_p(e)
def category_read_main():
    finish = True
    rows = db_play.db_get_g(db_sql.sql_cate_read_get, ())
    for row in rows:
        finish = False
        cate_name = row[0]
        cate_path = row[1]
        cate_param = row[2]
        cate_type = row[3]
        try:
            category_read(cate_path, cate_name, cate_type, cate_param)
            db_play.db_execute_g(db_sql.sql_cate_read_update, (cate_name, cate_path, cate_param, cate_type, ))
        except Exception as e:
            err.except_p(e)
        util.sleep()
    return finish
def cate_link_read_main():
    global lib_host_http
    global lib_conn_http
    rows = db_lib.db_get_g(db_sql.sql_lib_lang_cate_link_read_get, ())
    for row in rows:
        lang_href = row[0]
        cate_path = row[1]
        cate_param = row[2]
        link_href = row[3]
        if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip():
            lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
            lib_conn_http = http.get_conn_http(lib_host_http)
        try:
            cate_link_read(link_href, lang_href, cate_path, cate_param)
            util.sleep_i(1)
        except Exception as e:
            err.except_p(e)
def developer_read_store(developer_href, start_num):
    url = '%s&start=%d&num=12'%(developer_href, start_num)
    print '** developer %s **'%(url)
    try:
        status, body = android_https_get(url)
        if status == 404:
            print '== 404'
            db_developer.db_execute_g(db_sql.sql_developer_store_read_status_update, (developer_href, )) 
            return False
        if status != 200:
            raise Exception('app read https connection error: %s'%(str(status)))
        soup = BeautifulSoup(body)
        developer_read_store_website(developer_href, soup)
        developer_read_store_app(developer_href, soup)
        db_developer.db_execute_g(db_sql.sql_developer_store_start_num_update, (start_num, developer_href,)) ## record this page has been successfully read
        return True
    except Exception as e:
        err.except_p(e)
        return False
def related_read(app_id):
    try:
        url = '/%s/details?id=%s'%(android_root, app_id)
        print '** related %s **'%(url)
        status, body = android_https_get(url)
        #print status, body
        if status == 404:
            print '== 404'
            db_related.db_execute_g(db_sql.sql_related_read_update, (1, str(datetime.now()), app_id))
            return 
        if status != 200:
            raise Exception('related read https connection error: %s'%(str(status)))
        soup = BeautifulSoup(body)
        related_view(app_id, soup)
        related_install(app_id, soup)
        db_related.db_execute_g(db_sql.sql_related_read_update, (1, str(datetime.now()), app_id))
        util.sleep()
    except Exception as e:
        err.except_p(e)
def video_read_main():
    finish = True
    rows = db_app.db_get_g(db_sql.sql_video_get, ())
    i_t = len(rows)
    i = 0
    for row in rows:
        i = i + 1
        print '%d of %d'%(i, i_t), 
        finish = False
        app_id = row[0]
        video_href = row[1]
        view_total = row[2]
        video_href_d = video_href.split('/')[-1]
        video_id = video_href_d.split('?')[0].strip()
        try:
            video_read(video_id, app_id, video_href)
            util.sleep()
        except Exception as e:
            err.except_p(e)
    return finish
def cate_read_main():
    global lib_host_http
    global lib_conn_http
    rows = db_lib.db_get_g(db_sql.sql_lib_lang_cate_read_get, ())
    i_t = len(rows)
    i = 0
    for row in rows:
        i = i + 1
        print '%d of %d'%(i, i_t), 
        lang_href = row[0]
        cate_path = row[1]
        cate_param = row[2]
        if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip():
            lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
            lib_conn_http = http.get_conn_http(lib_host_http)
        try:
            cate_read(lang_href, cate_path, cate_param)
            util.sleep_i(1)
        except Exception as e:
            err.except_p(e)
def home_read_main():
    global lib_host_http
    global lib_conn_http
    rows = db_lib.db_get_g(db_sql.sql_lib_lang_get, ())
    for row in rows:
        lang_href = row[0]
        lang_title = row[1]
        lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
        lib_conn_http = http.get_conn_http(lib_host_http)
        ''' # if working on horizon pc, uncommit this if and commit above two lines. 
        if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip():
            lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
            lib_conn_http = http.get_conn_http(lib_host_http)
        '''
        try:
            home_read(lang_href)
            db_lib.db_execute_g(db_sql.sql_lib_lang_update, (lang_href,))
            util.sleep_i(1)
        except Exception as e:
            err.except_p(e)
def app_read(app_id):
    try:
        url = '/%s/details?id=%s'%(android_root, app_id)
        print '** app %s **'%(url)
        status, body = android_https_get(url)
        #print status, body
        if status == 404:
            print '== 404'
            db_app.db_execute_g(db_sql.sql_app_read_update, (1, str(datetime.now()), app_id))
            return 
        if status != 200:
            raise Exception('app read https connection error: %s'%(str(status)))
        soup = BeautifulSoup(body)
        app_read_banner(app_id, soup)
        app_read_tab_overview(app_id, soup)
        app_read_tab_review(app_id, soup)
        app_read_tab_permission(app_id, soup)
        db_app.db_execute_g(db_sql.sql_app_read_update, (1, str(datetime.now()), app_id))
        #util.sleep()
    except Exception as e:
        err.except_p(e)
def home_read_max_main():
    global lib_host_http
    global lib_conn_http
    rows = db_lib.db_get_g(db_sql.sql_lib_lang_cate_get, ())
    for row in rows:
        lang_href = row[0]
        cate_path = row[1]
        cate_title = row[2]
        cate_param_max = row[3]
        lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
        lib_conn_http = http.get_conn_http(lib_host_http)
        ''' # if working on horizon pc, uncommit this if, otherwise, commit above two lines. 
        if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip():
            lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
            lib_conn_http = http.get_conn_http(lib_host_http)
        '''
        try:
            home_read_max(lang_href, cate_path, cate_title)
        except Exception as e:
            err.except_p(e)
        print '======='
def website_read(developer_website, real_href):
    url = real_href
    try:
        opener = urllib2.build_opener()
        opener.addHeaders = [('User-agent', 'Mozilla/5.1 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/9.0.1')]
        f = opener.open(url, timeout=20)
        body = f.read()
        '''
        print body
        refresh_url = meta_redirect(body)
        while refresh_url:
            f = opener.open(url, timeout=5)
            body = f.read()
            refresh_url = meta_redirect(body)
            print refresh_url
        '''
        soup = BeautifulSoup(body)
        website_twitter(developer_website, soup)
        website_facebook(developer_website, soup)
        website_youtube(developer_website, soup)
        website_google_plus(developer_website, soup)
        db_developer.db_execute_g(db_sql.sql_developer_website_read_status_update, (developer_website, ))
    except Exception as e:
        err.except_p(e)