Пример #1
0
def appCommunityHandlerNoStyle(community_id):
    #time.sleep(0.01)
    cid = str(community_id)
    print '3->>>>>hand community:' + cid + ' start====='
    app_url = lianjia_app_host + cid
    print app_url
    global proxies
    all_p = len(proxies) - 1
    for i in range(0, all_p):
        p = proxies[i]
        try:
            strategy_res = requests.get(app_url, proxies=p, timeout=2)
        except Exception, e:
            print '3->>>>>hand community get error:' + cid + ' end=====' + str(
                p)
            proxies = our_proxy.getListProxies()
            i = 0
            continue
        strategy_http_code = strategy_res.status_code
        if (strategy_http_code != 200):
            print '3->>>>>hand community http code error:' + cid + ' end=====' + str(
                p)
            proxies = our_proxy.getListProxies()
            i = 0
            continue
        else:
            break
Пример #2
0
def httpGet(url):
    global proxies
    #time.sleep(0.01)
    a = str(random.randint(1, 10))
    aa = str(random.randint(1, 10))
    b = str(random.randint(1, 11))
    c = str(random.randint(1, 12))
    d = str(random.randint(1, 8))
    user_agent = 'Mozilla/' + a + '.' + aa + ' (Macintosh; Intel Mac OS X ' + b + '_' + c + '_' + d + ')'
    headers = {'User-Agent': user_agent}
    session = requests.session()
    #page = session.get(url, headers=headers)
    length = len(proxies) - 1
    for i in range(0, length):
        p = proxies[i]
        try:
            page = session.get(url, proxies=p, headers=headers, timeout=2)
            if (type(page) != 'NoneType'):
                print 'http get ok,proxy:' + str(p)
                break
            else:
                continue
        except Exception, e:
            proxies = our_proxy.getListProxies()
            i = 0
            print 'http get failed,proxy:' + str(p)
            continue
Пример #3
0
def getPageCommunityCount(url):
    global proxies
    try:
        page_soup = httpGet(url)
        page_community_count_div = page_soup.find(
            'div', attrs={'class': 'list-head clear'})
        p_community_count = page_community_count_div.find('span').string
    except Exception, e:
        print 'get community count failed,try again:' + url
        proxies = our_proxy.getListProxies()
        return getPageCommunityCount(url)
Пример #4
0
def listHandler(url):
    proxies = our_proxy.getListProxies()
    print '1->>>>>hand list:' + url + ' start====='
    p_community_count = getPageCommunityCount(url)
    cc = int(math.ceil(int(p_community_count) / float(20)))
    print 'community total:' + str(p_community_count) + ';page total:' + str(
        cc)
    for i in range(1, cc + 1):
        page_url = url + 'd' + str(i) + '/'
        pageHandler(page_url)
    print '1->>>>>hand list:' + url + ' end====='
Пример #5
0
def setDistrictName(district_url):
    global proxies
    try:
        page = httpGet(district_url)
        l_txt = page.find('div', attrs={'class': 'fl l-txt'})
        l_txt_a = l_txt.find_all('a')
        district_name = l_txt_a[2].text
        mkdirPcDoc(district_name)
        mkdirAppDoc(district_name)
        return district_name
    except Exception, e:
        print 'get community count failed,try again:' + district_url
        proxies = our_proxy.getListProxies()
        return setDistrictName(district_url)
Пример #6
0
# -*- coding: utf-8 -*-
import shield_proxy, our_proxy, comm_mapping
import requests
from bs4 import BeautifulSoup
import re
import random
import os.path, sys
import math
import time
import json
reload(sys)
sys.setdefaultencoding('utf8')

lianjia_host = "http://sh.lianjia.com"
lianjia_app_host = "http://m.sh.lianjia.com/api/v1/m/strategy/contents/"
proxies = our_proxy.getListProxies()

dirname, filename = os.path.split(os.path.abspath(sys.argv[0]))
print "running from", dirname
print "file is", filename

home = dirname + '/链家'
pc = home + '/pc'
app = home + '/app'
district_name = ''

if (not os.path.exists(home)):
    os.makedirs(home)
if (not os.path.exists(pc)):
    os.makedirs(pc)
if (not os.path.exists(app)):