Пример #1
0
"""
import requests
from lib.proxy_iterator import Proxies
from lib.mongo import Mongo
import pika
import json
from multiprocessing import Process

m = Mongo(host='114.80.150.196',
          port=27777,
          user_name='goojia',
          password='******')
collection = m.connect['amap']['amap_road_clean']

p = Proxies()
proxies = p.get_one()

city_code = {
    '绍兴': '330600',
    '烟台': '370600',
    '湖州': '330500',
    '枣庄': '370400',
    '丽水': '331100',
    '金华': '330700',
    '衢州': '330800',
    '临沂': '371300',
    '舟山': '330900',
    '莱芜': '371200',
    '威海': '371000',
    '青岛': '370200',
    '嘉兴': '330400',
Пример #2
0
import requests
from lxml import etree
from lib.proxy_iterator import Proxies
from pymongo import MongoClient
import re
import aiohttp
import asyncio
import time
from lib.log import LogHandler
import time
import pika
import json
log = LogHandler('fanggugu')
p = Proxies()
p = p.get_one(proxies_number=1)

client = MongoClient(host='192.168.0.105', port=27018)
db = client.fangjia_craw
db.authenticate('developer', 'goojia@123456')
collection = db['third_party_price']

m = MongoClient(host='114.80.150.196', port=27777, username='******', password='******')
crawler_collection = m['hilder_gv']['fanggugu']

top_city_list = ['上海', '北京', '广州', '深圳', '天津',
                 '无锡', '西安', '武汉', '大连', '宁波',
                 '南京', '沈阳', '苏州', '青岛', '长沙',
                 '成都', '重庆', '杭州', '厦门']


class FangGuGu:
import requests
import pika
import json
from lib.proxy_iterator import Proxies
log = LogHandler(__name__)

m = Mongo(host='114.80.150.196',
          port=27777,
          user_name='goojia',
          password='******')

xiaozijia_build_collection = m.connect['friends']['xiaozijia_build']
xiaozijia_house_detail_2018_10_8_collection = m.connect['friends'][
    'xiaozijia_house_detail_2018_10_8']
p = Proxies()
proxies = p.get_one(proxies_number=5)

connection = pika.BlockingConnection(
    pika.ConnectionParameters(host='114.80.150.196', port=5673, heartbeat=0))
channel = connection.channel()
channel.queue_declare(queue='xiaozijia_detail_url')


class Consumer(object):
    def __init__(self, cookie):
        self.headers = {
            'Host': 'www.xiaozijia.cn',
            'Referer': 'http://www.xiaozijia.cn/Evaluation/Evaluation',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
            'Cookie': cookie
from xiaozijia_core.y666yun import GetPhone
import requests
import re
from lib.mongo import Mongo
import datetime
from lib.proxy_iterator import Proxies
proxies = Proxies()
proxies = proxies.get_one(proxies_number=1)


class Register(object):
    def __init__(self):
        self.password = '******'
        self.s = requests.session()
        self.g = GetPhone('小资家')
        self.phone = self.g.phone
        self.headers = {
            'Connection': 'keep-alive',
            'Host': 'www.xiaozijia.cn:8002',
            'User-Agent':
            'xiao zi jiaiOS/1.2.1 (iPhone; iOS 11.4.1; Scale/2.00)',
        }
        self.code = ''
        self.m = Mongo('114.80.150.196',
                       27777,
                       user_name='goojia',
                       password='******')
        self.coll = self.m.connect['friends']['xiaozijia_user']
        self.result = ''

    def sent_phone(self):
# _*_ coding:utf-8 _*_
# from company.baidumap_consumer import BaiduMapConsumer
from company.baidumap_producer import baiduproducer

from lib.proxy_iterator import Proxies
from multiprocessing import Process
from company.baidumap_consumer_update import BaiduMapConsumer

if __name__ == '__main__':
    # Process(target=baiduproducer).start()

    p = Proxies()
    # # Process(target=BaiduMapConsumer(proxies=next(p)).start_consume).start()
    #
    for x in range(1,7):
        Process(target=BaiduMapConsumer(proxies=p.get_one(x)).start_consume).start()

    # proxy = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
    #     "host": "http-dyn.abuyun.com",
    #     "port": "9020",
    #     "user": "******",
    #     "pass": "******",
    # }
    # proxies = {"https": proxy,
    #            "http": proxy}
    # Process(target=BaiduMapConsumer(proxies=proxies).start_consume).start()
Пример #6
0
from multiprocessing import Process
from company.liepin_consumer_single import LiepinConsumeSingle
from company.liepin_producer_detail import LiepinProducerDetail
from company.liepin_consumer_gevent import LiepinConsumeGevent
if __name__ == '__main__':
    #1.分别将城市代码及分类代码存入到mysql数据库中
    # get_city()
    # get_category()

    #2.生产者,将分页也就是列表页链接放入到队列中
    p = Proxies()
    Process(target=LiepinProduceList(proxies=next(p)).start_crawler).start()

    #3.生产者,消费2中队列的url,解析出来公司的url,将公司详情页放入到队列中
    Process(target=LiepinProducerDetail(proxies=next(p)).start_consume).start()

    #4.消费3中队列中的URL,发请求\解析\入库
    p = Proxies()
    for x in range(1, 7):
        Process(target=LiepinConsumeSingle(proxies=p.get_one(
            proxies_number=x)).start_consume).start()

    proxy = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
        "host": "http-dyn.abuyun.com",
        "port": "9020",
        "user": "******",
        "pass": "******",
    }
    proxies = {"https": proxy, "http": proxy}
    Process(target=LiepinConsumeGevent(proxies=proxies).start_consume).start()
Пример #7
0
        else:
            if text['status'] == '1':
                poi_list = text['data']['poi_list']
                for poi in poi_list:
                    address = poi['address']
                    if map_street in address:
                        dict_text = dict(poi)
                        poi_info.append(dict_text)
                    else:
                        break
                # 注意此处是更新
                if len(poi_info) != 31:
                    mongo_collection.update_one(
                        {
                            'city_code': data['city_code'],
                            'region': data['region'],
                            'street_number': data['street_number']
                        }, {'$set': {
                            'poi_info': poi_info
                        }})
            else:
                print(res.json())
                log.error('请求失败,status不为1,url = {}'.format(res.url))
            return True


if __name__ == '__main__':
    p = Proxies()
    street = AddStreet(proxies=p.get_one(proxies_number=1))
    street.add_streets()