""" import requests from lib.proxy_iterator import Proxies from lib.mongo import Mongo import pika import json from multiprocessing import Process m = Mongo(host='114.80.150.196', port=27777, user_name='goojia', password='******') collection = m.connect['amap']['amap_road_clean'] p = Proxies() proxies = p.get_one() city_code = { '绍兴': '330600', '烟台': '370600', '湖州': '330500', '枣庄': '370400', '丽水': '331100', '金华': '330700', '衢州': '330800', '临沂': '371300', '舟山': '330900', '莱芜': '371200', '威海': '371000', '青岛': '370200', '嘉兴': '330400',
import requests from lxml import etree from lib.proxy_iterator import Proxies from pymongo import MongoClient import re import aiohttp import asyncio import time from lib.log import LogHandler import time import pika import json log = LogHandler('fanggugu') p = Proxies() p = p.get_one(proxies_number=1) client = MongoClient(host='192.168.0.105', port=27018) db = client.fangjia_craw db.authenticate('developer', 'goojia@123456') collection = db['third_party_price'] m = MongoClient(host='114.80.150.196', port=27777, username='******', password='******') crawler_collection = m['hilder_gv']['fanggugu'] top_city_list = ['上海', '北京', '广州', '深圳', '天津', '无锡', '西安', '武汉', '大连', '宁波', '南京', '沈阳', '苏州', '青岛', '长沙', '成都', '重庆', '杭州', '厦门'] class FangGuGu:
import requests import pika import json from lib.proxy_iterator import Proxies log = LogHandler(__name__) m = Mongo(host='114.80.150.196', port=27777, user_name='goojia', password='******') xiaozijia_build_collection = m.connect['friends']['xiaozijia_build'] xiaozijia_house_detail_2018_10_8_collection = m.connect['friends'][ 'xiaozijia_house_detail_2018_10_8'] p = Proxies() proxies = p.get_one(proxies_number=5) connection = pika.BlockingConnection( pika.ConnectionParameters(host='114.80.150.196', port=5673, heartbeat=0)) channel = connection.channel() channel.queue_declare(queue='xiaozijia_detail_url') class Consumer(object): def __init__(self, cookie): self.headers = { 'Host': 'www.xiaozijia.cn', 'Referer': 'http://www.xiaozijia.cn/Evaluation/Evaluation', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36', 'Cookie': cookie
from xiaozijia_core.y666yun import GetPhone import requests import re from lib.mongo import Mongo import datetime from lib.proxy_iterator import Proxies proxies = Proxies() proxies = proxies.get_one(proxies_number=1) class Register(object): def __init__(self): self.password = '******' self.s = requests.session() self.g = GetPhone('小资家') self.phone = self.g.phone self.headers = { 'Connection': 'keep-alive', 'Host': 'www.xiaozijia.cn:8002', 'User-Agent': 'xiao zi jiaiOS/1.2.1 (iPhone; iOS 11.4.1; Scale/2.00)', } self.code = '' self.m = Mongo('114.80.150.196', 27777, user_name='goojia', password='******') self.coll = self.m.connect['friends']['xiaozijia_user'] self.result = '' def sent_phone(self):
# _*_ coding:utf-8 _*_ # from company.baidumap_consumer import BaiduMapConsumer from company.baidumap_producer import baiduproducer from lib.proxy_iterator import Proxies from multiprocessing import Process from company.baidumap_consumer_update import BaiduMapConsumer if __name__ == '__main__': # Process(target=baiduproducer).start() p = Proxies() # # Process(target=BaiduMapConsumer(proxies=next(p)).start_consume).start() # for x in range(1,7): Process(target=BaiduMapConsumer(proxies=p.get_one(x)).start_consume).start() # proxy = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % { # "host": "http-dyn.abuyun.com", # "port": "9020", # "user": "******", # "pass": "******", # } # proxies = {"https": proxy, # "http": proxy} # Process(target=BaiduMapConsumer(proxies=proxies).start_consume).start()
from multiprocessing import Process from company.liepin_consumer_single import LiepinConsumeSingle from company.liepin_producer_detail import LiepinProducerDetail from company.liepin_consumer_gevent import LiepinConsumeGevent if __name__ == '__main__': #1.分别将城市代码及分类代码存入到mysql数据库中 # get_city() # get_category() #2.生产者,将分页也就是列表页链接放入到队列中 p = Proxies() Process(target=LiepinProduceList(proxies=next(p)).start_crawler).start() #3.生产者,消费2中队列的url,解析出来公司的url,将公司详情页放入到队列中 Process(target=LiepinProducerDetail(proxies=next(p)).start_consume).start() #4.消费3中队列中的URL,发请求\解析\入库 p = Proxies() for x in range(1, 7): Process(target=LiepinConsumeSingle(proxies=p.get_one( proxies_number=x)).start_consume).start() proxy = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % { "host": "http-dyn.abuyun.com", "port": "9020", "user": "******", "pass": "******", } proxies = {"https": proxy, "http": proxy} Process(target=LiepinConsumeGevent(proxies=proxies).start_consume).start()
else: if text['status'] == '1': poi_list = text['data']['poi_list'] for poi in poi_list: address = poi['address'] if map_street in address: dict_text = dict(poi) poi_info.append(dict_text) else: break # 注意此处是更新 if len(poi_info) != 31: mongo_collection.update_one( { 'city_code': data['city_code'], 'region': data['region'], 'street_number': data['street_number'] }, {'$set': { 'poi_info': poi_info }}) else: print(res.json()) log.error('请求失败,status不为1,url = {}'.format(res.url)) return True if __name__ == '__main__': p = Proxies() street = AddStreet(proxies=p.get_one(proxies_number=1)) street.add_streets()