Пример #1
0
def check_out(proxies, check_header):
    while 1:
        proxie = proxies[random.randint(0, len(proxies) - 1)]
        if check_out_base(proxie, check_header):
            result = proxie
            break
        else:
            session.query(Ip_Pool).filter(Ip_Pool.ip == proxie).update(
                {Ip_Pool.datastatus: 2})
            session.commit()
            continue
    return result
Пример #2
0
def download_data(url, referer_header, stock, proxies, check_header):
    while 1:
        proxies_down = check_out(proxies, check_header)
        if "https://" in proxies_down:
            proxie = {"https": proxies_down}
        elif "http://" in proxies_down:
            proxie = {"http": proxies_down}
        else:
            proxie = None
        try:
            response = requests.get(url,
                                    headers=referer_header,
                                    proxies=proxie,
                                    timeout=5)
            response.raise_for_status()
            status_code = response.status_code
        except requests.exceptions.RequestException as e:
            print e
            status_code = 400
            response = None
        if status_code < 300 and response is not None:
            result = response.text.encode('utf-8')
            strresult = str(result)
            try:
                strJsonData = strresult[strresult.find('(') +
                                        1:strresult.rfind(')')]
                dict_data = dict(json.loads(strJsonData))
            except Exception as e:
                print e
                continue
            else:
                db = MySqlCon()
                data = {}
                bulletinid_list = []
                sql = """INSERT INTO sh_a_share(bulletinid,stockcode,stockname,
                      title,category,url,bulletinyear,bulletindate,uploadtime,datastatus)
                      VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
                for da in dict_data["pageHelp"]["data"]:
                    pdfurl = 'http://static.sse.com.cn' + da["URL"]
                    bulletinid = hashlib.md5(pdfurl).hexdigest()
                    bulletinid_list.append(bulletinid)
                    data.update({
                        bulletinid:
                        (bulletinid, da["security_Code"].encode('utf-8'),
                         stock["stockname"], da["title"].encode('utf-8'),
                         da["bulletin_Type"].encode('utf-8'),
                         pdfurl.encode('utf-8'),
                         da["bulletin_Year"].encode('utf-8'),
                         da["SSEDate"].encode('utf-8'), str(datetime.now()), 1)
                    })

                    # sql = "INSERT INTO sh_a_share(bulletinid,stockcode,stockname, " \
                    #       "title,category,url,bulletinyear,bulletindate,uploadtime,datastatus) " \
                    #       "VALUES('{}','{}','{}','{}','{}','{}','{}','{}','{}','{}') ".format(bulletinid,
                    #                                                                           i["security_Code"].encode(
                    #                                                                               'utf-8'),
                    #                                                                           stock["stockname"],
                    #                                                                           i["title"].encode(
                    #                                                                               'utf-8'),
                    #                                                                           i["bulletin_Type"].encode(
                    #                                                                               'utf-8'),
                    #                                                                           pdfurl.encode('utf-8'),
                    #                                                                           i["bulletin_Year"].encode(
                    #                                                                               'utf-8'),
                    #                                                                           i["SSEDate"].encode(
                    #                                                                               'utf-8'),
                    #                                                                           str(datetime.now()), 1)
                    # try:
                    #     db.cursor.execute(sql)
                    #     db.conn.commit()
                    # except Exception as e:
                    #     db.conn.rollback()
                repeat = session.query(Sh_A_Share.bulletinid).filter(
                    Sh_A_Share.bulletinid.in_(bulletinid_list)).all()
                for kk in repeat:
                    data.pop(kk.bulletinid)
                if data:
                    try:
                        db.cursor.executemany(sql, data.values())
                        db.conn.commit()
                    except Exception as e:
                        print(e)
                        db.conn.rollback()

                db.conn.close()
                break
        else:
            continue
Пример #3
0
 def get_proxies():
     proxies = session.query(Ip_Pool).filter(Ip_Pool.datastatus == 1).all()
     proxielist = []
     for proxie in proxies:
         proxielist.append(proxie.ip)
     return proxielist
Пример #4
0
        # logging.info(msg)
        # session.query(Sh_Share).filter(Sh_Share.stockcode == stock["stockcode"]).update({Sh_Share.datastatus: 2})
        # session.commit()
        # time.sleep(5)
        stock = i
        print stock
        k = MyReptile(stock)
        start = time.time()
        for dd in k.page_urls:
            thread = myThread(urls=dd,
                              proxies=k.proxies,
                              check_header=k.check_header,
                              referer_header=k.referer_header,
                              stock=k.stock)
            thread.start()
            threads.append(thread)
        for t in threads:
            t.join()
        print 'down_success'
        end = time.time()
        msg = '股票代码:{},股票名称:{},耗时:{}s,日期:{}'.format(stock["stockcode"],
                                                    stock["stockname"],
                                                    end - start,
                                                    datetime.now())
        logging.info(msg)
        session.query(Sh_Share).filter(
            Sh_Share.stockcode == stock["stockcode"]).update(
                {Sh_Share.datastatus: 2})
        session.commit()
    print '{}:end'.format(datetime.now())