示例#1
0
def makeParam(table):
    paramDic = {}
    paramDic["station"] = "信用浙江"
    paramDic["begin_url"] = "http://www.zjcredit.gov.cn:8000/CreditQuery.aspx?sectionID=02"
    paramDic["query_url"] = "http://www.zjcredit.gov.cn:8000/ListQuery.aspx"
    paramDic["post_data_dic"] = {
        "isIntermediary": "False",
        "isOpen": "False",
        "pageLength": "20",
        "recordTotal": "1778190",
        "sectionID": "02",
        "sortDirection": "1",
        "sortField": "CreditID",
    }
    paramDic["preUrl"] = "http://www.zjcredit.gov.cn:8000/EnterpriseInfo.aspx?creditID="
    paramDic["preUrlip"] = "http://218.108.28.28:8000/EnterpriseInfo.aspx?creditID="
    paramDic["basePostUrl"] = "http://www.zjcredit.gov.cn:8000/GetInfoByDataSupplier.aspx"
    paramDic["basePostUrlip"] = "http://218.108.28.28:8000/GetInfoByDataSupplier.aspx"
    paramDic["dbHost"] = "localhost"
    paramDic["dbUser"] = "******"
    paramDic["dbPasswd"] = "root"
    paramDic["rdb"] = "rawData"
    conn = jTool.initCursor(paramDic["dbHost"], paramDic["dbUser"], paramDic["dbPasswd"], paramDic["rdb"])
    paramDic["conn"] = conn
    return paramDic
示例#2
0
def getAllpageList(start, end):
    conn = jTool.initCursor('localhost', 'root', 'root', 'rawData')
    while start<=end:
        pageNo = start
        getDetailListPages(pageNo, conn)
        print 'page ok:'+str(start)
        start += 1
示例#3
0
def mainLoop(start, end):
    proxyList = jTool.getProxy('proxy.txt')
    pcount = len(proxyList)-1
    head = ['Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                    'Accept-Charset:GBK,utf-8;q=0.7,*;q=0.3',
                    'Accept-Encoding:gzip,deflate,sdch',
                    'Accept-Language:zh-CN,zh;q=0.8',
                    'Cache-Control:max-age=0',
                    'Connection:keep-alive',
                    'Cookie:ASP.NET_SessionId=t3isah45gu5kb4454qyxkhzy; lzstat_uv=6061202253430616218|2529639; lzstat_ss=953382219_1_1373621147_2529639; _gscu_374314293=7359234405sddy11; _gscs_374314293=73592344d74zfy11|pv:3; _gscbrs_374314293=1; ECStaticSession=ECS81',
                    'Host:www.zjcredit.gov.cn:8000',
                    'Pragma:no-cache',
                    'Cookie:_gscu_374314293=73631708ff8h1y17; lzstat_uv=106813037832225946|2529639; ECStaticSession=ECS80; ASP.NET_SessionId=5dhxxl45gr4d0aexnf1uiu55; _gscbrs_374314293=1; lzstat_ss=815622537_1_1374448570_2529639; _gscs_374314293=t74419759zee6a318|pv:2',
                    'Origin:http://www.zjcredit.gov.cn:8000',
                    'Referer:http://www.zjcredit.gov.cn:8000/ListPrompts.aspx?sectionID=01&tableID=CourtNotCarryOut&associateID=00000000000000000&hasPromptHistroy=False',
                    'User-Agent:Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36'
                    ]
    conn = jTool.initCursor('localhost', 'root', 'root', 'rawData')
    cursor = conn.cursor()
    cursor2 = conn.cursor()
    while start<=end:
        sql = 'select *  from base_page_list where id = '+str(start)+' and status = 0 limit 1'
        cursor.execute(sql)
        record = cursor.fetchone()  
        if not record and start<=end:
            start += 1
            continue
        
        corpName = record[1]
        rowID = record[3]
        print corpName+', '+str(start)
        rt = None
        count = 1
        while not rt and count<=2:
#            print count
            proxy = str(proxyList[random.randint(0, pcount)]).strip()
#            print proxy
            rt = getPageField(conn, proxy, head, str(start), rowID, corpName)
#            print rt
            count += 1
            if rt:
                print 'id'+str(start)+' ok'
                jTool.updateData(cursor2, ' where id = '+str(start)+' ', 'base_page_list', {'status': '1'})
                continue
        start += 1
        conn.commit()
    cursor.close()
    cursor2.close()
    conn.close()
示例#4
0
def makeParam():
    paramDic = {}
    paramDic['station'] = '信用浙江'
    paramDic['begin_url'] = 'http://www.zjcredit.gov.cn:8000/CreditQuery.aspx?sectionID=02'
    paramDic['query_url'] = 'http://www.zjcredit.gov.cn:8000/ListQuery.aspx'
    paramDic['post_data_dic'] = {'isIntermediary': 'False', 'isOpen': 'False', 'pageLength': '20', 'recordTotal': '1778190', 'sectionID': '02', 'sortDirection': '1', 'sortField': 'CreditID'}
    paramDic['preUrl'] = 'http://www.zjcredit.gov.cn:8000/EnterpriseInfo.aspx?creditID='
    paramDic['preUrlip'] = 'http://218.108.28.28:8000/EnterpriseInfo.aspx?creditID='
    paramDic['basePostUrl'] = 'http://www.zjcredit.gov.cn:8000/GetInfoByDataSupplier.aspx'
    paramDic['basePostUrlip'] = 'http://218.108.28.28:8000/GetInfoByDataSupplier.aspx'
    paramDic['dbHost'] = 'localhost'
    paramDic['dbUser'] = '******'
    paramDic['dbPasswd'] = 'root'
    paramDic['rdb'] = 'rawData'
    conn = jTool.initCursor(paramDic['dbHost'], paramDic['dbUser'], paramDic['dbPasswd'], paramDic['rdb'])
    paramDic['conn'] = conn
    return paramDic
示例#5
0
def initMyCursor(db):
    host = 'localhost'
    user = '******'
    passwd = 'root'
    conn = jTool.initCursor(host, user, passwd, db)
    return conn    
示例#6
0
#!/usr/bin
#encoding=utf-8

import sys
import jTool
import extLib


reload(sys)
sys.setdefaultencoding("utf-8")


#对enterprise_record_raw enterprise_raw的相关字段操作,提取,清洗,转换数据
#数据会还在对应表保存,完善后可以添加数据转换功能,替代php脚本,完成字段转换和默认值填充

def enterprise_record_raw_function(conn, start, end):
    print 'hi'

if __name__=='__main__':
    print '*'*50
    print 'Run like this : python ext2.py enterprise_raw  1 10'
    print 'now accept enterprise_raw,enterprise_record_raw'
    print '*'*50
    conn = jTool.initCursor('localhost', 'root', 'root', 'rawData')
    table = sys.argv[1]
    start = sys.argv[2]
    end = sys.argv[3]
    print 'begin to extract and clean data from table ' + str(table)
    output_function = getattr(extLib, table+'_function')
    output_function(conn, start, end) 
示例#7
0
def makeParam():
    paramDic = jTool.getConfigParam(['hostSvr', 'dbHost', 'dbUser', 'dbPasswd', 'rdb', 'basePostUrl', 'preUrl', 'basePostUrlip', 'preUrlip'], 'config.ini')
    conn = jTool.initCursor(paramDic['dbHost'], paramDic['dbUser'], paramDic['dbPasswd'], paramDic['rdb'])
    paramDic['conn'] = conn
    return paramDic
示例#8
0
# -*-coding: utf-8 -*-
#encoding=utf-8

import sys
import jTool

reload(sys)
sys.setdefaultencoding("utf-8")

#初始化任务表用,每个任务跑一次就可以

paramDic = jTool.getConfigParam(['host', 'user', 'passwd', 'db', 'initTable'], 'server.ini')
conn = jTool.initCursor(paramDic['host'], paramDic['user'], paramDic['passwd'], paramDic['db'])
jTool.initTaskTable(conn, paramDic['initTable'])
示例#9
0
 def __init__(self):
     paramDic = jTool.getConfigParam(['host', 'user', 'passwd', 'db'], 'server.ini')
     conn = jTool.initCursor(paramDic['host'], paramDic['user'], paramDic['passwd'], paramDic['db'])
     self.conn = conn