def test_Mongo_insert_then_find(): #Mongodb must be running in background for this to work mongo = Mongo("test", "people") dic = {"name": "Darth Vader", "company": "Empire", "interests": "The Force"} mongo.insert_one(dic) results = mongo.find({"company": "Empire"}) # raise Exception('type of results is '+str(type(results))) del dic['_id'] for x in results: del x['_id'] assert dic==x
def __init__(self): cmd2.Cmd.__init__(self, completekey='tab', persistent_history_file=get_option('core', 'hist_file', config), persistent_history_length=int(get_option('core', 'hist_size', config))) self.allow_cli_args = False self.default_to_shell = False self.intro = 'Welcome to the Omnibus shell! Type "session" to get started or "help" to view all commands.' self.allow_redirection = True self.prompt = 'omnibus >> ' self.redirector = '>' self.quit_on_sigint = False del cmd2.Cmd.do_alias del cmd2.Cmd.do_edit del cmd2.Cmd.do_eof del cmd2.Cmd.do_shell del cmd2.Cmd.do_eos del cmd2.Cmd.do_load del cmd2.Cmd.do_py del cmd2.Cmd.do_pyscript del cmd2.Cmd.do_shortcuts del cmd2.Cmd.do_unalias del cmd2.Cmd.do__relative_load self.db = Mongo(config) self.dispatch = Dispatch(self.db) self.session = None if DEBUG: self.do_set('debug true')
def test_Mongo_list_insert_then_find(): #Mongodb must be running in background for this to work a = Mongo("test","people") dic1 = {"name": "Ethan Hunt", "company": "IMF", "interests": "Epionage"} dic2 = {"name": "Chef Eddie", "company": "Unemployed", "interests": "food"} x = [dic1,dic2] a.insert_many(x) results1 = a.find({"name":"Ethan Hunt"}) del dic1['_id'] del dic2['_id'] for x in results1: del x["_id"] assert dic1 == x results1 = a.find({"name":"Chef Eddie"}) for x in results1: del x["_id"] assert dic2 == x
def test_Mongo_insert_then_delete(): #Mongodb must be running in background for this to work a = Mongo("test","people") dic = {"name": "Prof I", "company": "Rutgers", "interests": "Statistics"} a.insert_one(dic) a.delete_many({"name":"Prof I"}) results = a.find({"name":"Prof I"}) assert results.count() == 0
def __init__(self, bot): self.bot = bot self.db = Mongo.init_db(Mongo()) self.server_db = None
import requests from auction import Auction from lib.log import LogHandler from sql_mysql import inquire, TypeAuction from lib.mongo import Mongo from lxml import etree import datetime import yaml import re setting = yaml.load(open('config.yaml')) # client = Mongo(host=setting['mongo']['host'], port=setting['mongo']['port'], user_name=setting['mongo']['user_name'], # password=setting['mongo']['password']).connect client = Mongo(host=setting['mongo']['host'], port=setting['mongo']['port']).connect coll = client[setting['mongo']['db']][setting['mongo']['collection']] source = 'gongpaiwang' log = LogHandler(__name__) class Gongpaiwang: def __init__(self): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36' } self.list_info = [] self.type_list = inquire(TypeAuction, source) def start_crawler(self):
import requests from lib.mongo import Mongo import re m = Mongo(host='114.80.150.196', port=27777, user_name='fangjia', password='******') collection = m.connect['dianping']['dianping_zhangshang'] collection_lat = m.connect['dianping']['dianping_zhangshang_lat'] class ShopDetail: def __init__(self): self.url = 'https://m.dianping.com/shop/20721516/map' self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' } def get_shop_lat(self): for i in collection.find(): try: r = requests.get(url='https://m.dianping.com/shop/{}/map'.format(i['id']), headers=self.headers) info = re.search('PAGE_INITIAL_STATE(.*?)</script>', r.text, re.S | re.M).group(1) lat = re.search('"shopLat":(.*?),', info, re.S | re.M).group(1) lng = re.search('"shopLng":(.*?),', info, re.S | re.M).group(1) print(lat, lng) collection_lat.insert_one({ 'info': info, 'id': i['id'], 'lng': lng, 'lat': lat, }) except Exception as e:
class Story: class Output(Enum): STD = 1 CACHE = 2 FILE = 3 DATABASE = 4 def __init__(self, name, persistent=True): self.name = name self.sessions = [] self.session_id = "" self.epoch_id = 0 self.persistent = persistent self.database = Mongo() self.log = Logger("story-{}".format(name)) def session(self, session_id=None): if session_id: self.session_id = session_id return next(session for session in self.sessions if session["_id"] == self.session_id) def epoch(self, epoch_id=None, session_id=None): if epoch_id: self.epoch_id = epoch_id if session_id: self.session_id = session_id return next(epoch for epoch in self.session()["epochs"] if epoch["_id"] == self.epoch_id) def new_acc(self, value, epoch_id=None, session_id=None): if epoch_id: self.epoch_id = epoch_id if session_id: self.session_id = session_id self.epoch(self.epoch_id)["acc"] = np.append(self.epoch(self.epoch_id)["acc"], value) # save to the database if self.persistent: self.database.push(self.epoch_id, "acc", float(value), "epochs") def new_loss(self, value, epoch_id=None, session_id=None): if epoch_id: self.epoch_id = epoch_id if session_id: self.session_id = session_id self.epoch(self.epoch_id)["loss"] = np.append(self.epoch(self.epoch_id)["loss"], value) # save to the database if self.persistent: self.database.push(self.epoch_id, "loss", float(value), "epochs") def epoch_compute_acc(self, epoch_id=None, session_id=None): if epoch_id: self.epoch_id = epoch_id if session_id: self.session_id = session_id epoch_acc = np.mean(self.epoch(self.epoch_id)["acc"]) self.epoch(self.epoch_id)["acc_mean"] = epoch_acc # save to the database if self.persistent: self.database.update(self.epoch_id, {"acc_mean": epoch_acc}, "epochs") def epoch_compute_loss(self, epoch_id=None, session_id=None): if epoch_id: self.epoch_id = epoch_id if session_id: self.session_id = session_id epoch_loss = np.mean(self.epoch(self.epoch_id)["loss"]) self.epoch(self.epoch_id)["loss_mean"] = epoch_loss # save to the database if self.persistent: self.database.update(self.epoch_id, {"loss_mean": epoch_loss}, "epochs") def epoch_set(self, key, value, epoch_id=None, session_id=None): if epoch_id: self.epoch_id = epoch_id if session_id: self.session_id = session_id self.epoch(self.epoch_id)[key] = value def new_session(self, label): # create a session session_id = object_id() session = {"_id": session_id, "time": datetime.now(), "platform": platform.node(), "label": label, "epochs": [], "acc": 0.0, "loss": 0.0} # save to the database if self.persistent: self.database.upsert(session, "sessions") # add to the local sessions list self.sessions.append(session) # move the session cursor to the new session self.session_id = session_id # return the current session object return self.session(session_id) def new_epoch(self, num, session_id=None): if session_id: self.session_id = session_id self.epoch_id = object_id() # the new epoch epoch = {"_id": self.epoch_id, "session": { "$ref": "epochs", "$id": self.session_id, "$db": "sessions"}, "rank": num, "acc": [], "loss": [], "acc_mean": 0.0, "loss_mean": 0.0} # save to the database self.database.upsert(epoch, "epochs") # add to the local epochs of the current session self.session()["epochs"].append(epoch) # move the epoch cursor to the new epoch return self.epoch(self.epoch_id) def close_epoch(self, epoch_id=None, session_id=None): if epoch_id: self.epoch_id = epoch_id if session_id: self.session_id = session_id self.epoch_compute_acc(self.epoch_id, self.session_id) self.epoch_compute_loss(self.epoch_id, self.session_id) def session_compute_acc(self, session_id=None): if session_id: self.session_id = session_id self.session()["acc"] = np.mean([epoch["acc_mean"] for epoch in self.session()["epochs"]]) # save to the database if self.persistent: self.database.update(self.session_id, {"acc": self.session()["acc"]}, "sessions") return self.session()["acc"] def session_compute_loss(self, session_id=None): if session_id: self.session_id = session_id self.session()["loss"] = np.mean([epoch["loss_mean"] for epoch in self.session()["epochs"]]) # save to the database if self.persistent: self.database.update(self.session_id, {"loss": self.session()["loss"]}, "sessions") return self.session()["loss"] def close_session(self, session_id=None): if session_id: self.session_id = session_id # accuracy and loss compute from epochs if len(self.session()["epochs"]) > 0: self.session_compute_acc() self.session_compute_loss()
def test_property_setter(): a = Mongo("test", "people") a.database = "new_test" a.collection = "new_collection" assert a.database == "new_test" assert a.collection == "new_collection"
def __init__(self): m = Mongo('192.168.0.235', 27017) self.connection = m.get_connection()
from dataclasses import dataclass, asdict, field import datetime import yaml from lib.log import LogHandler from lib.mongo import Mongo log = LogHandler(__name__) setting = yaml.load(open('config.yaml')) client = Mongo(host=setting['mongo']['host'], port=setting['mongo']['port'], user_name=setting['mongo']['user_name'], password=setting['mongo']['password']).connect soldcoll = client[setting['mongo']['db']][setting['mongo']['collection_1']] listcoll = client[setting['mongo']['db']][setting['mongo']['collection_2']] rentcoll = client[setting['mongo']['db']][setting['mongo']['collection_3']] @dataclass() class Estate: co_id: str # 小区id source: str # 网站来源 state: str # 州 county: str # 州下一级行政区 city: str # 市 zipcode: str # 邮编 # street_number: str # 街道号码 # street: str # 街道名 # apartment_number: str # 门牌号 address: str # 地址 house_type: str # 房屋类型
""" 消费xiaozijia_house_detail队列,请求,入楼栋库xiaozijia_detail_fast """ from lib.log import LogHandler from lib.mongo import Mongo import requests import json import pika import itertools log = LogHandler(__name__) m = Mongo(host='114.80.150.196', port=27777, user_name='goojia', password='******') # m = Mongo(host='localhost', port=27017) user_collection = m.connect['friends']['xiaozijia_user'] cookie_iter = itertools.cycle([_['cookie'] for _ in user_collection.find(no_cursor_timeout=True)]) collection = m.connect['friends']['xiaozijia_house_detail'] proxies = { 'http': 'localhost:8787', 'https': 'localhost:8787' } def change(): pass class HouseDetail: def message(self, info):
def connect_mongo(self): m = Mongo(self.m_host, self.m_port, user_name='fangjia', password='******') return m.connect[self.db][self.coll]
class MongoSingle: connection = Mongo(setting['db'], setting['port']).get_connection()
# Process(target=con_detail).start() # # 消费楼栋页面 # from sh_wuye.get_house_num import consume_queue as con_house # for i in range(60): # Process(target=con_house).start() # 房估估 # # 放入队列 # from fanggugu.get_all_community_id import produce # produce() # 消费,得到楼栋信息 from fanggugu.get_building_info import GetBuild from lib.mongo import Mongo m = Mongo('192.168.0.235', 27017) connection = m.get_connection() coll_user = m.get_connection()['fgg']['user_info'] count = 0 build = GetBuild() for i in coll_user.find(): user_name = i['user_name'] print(user_name) build.consume_queue(user_name) # 消费楼栋,得到房号数据 from fanggugu.get_house_info import GetHouse from lib.mongo import Mongo m = Mongo('192.168.0.235', 27017) connection = m.get_connection() coll_user = m.get_connection()['fgg']['user_info']
def __init__(self, comment_count=None, group_id=None, crawler_time=None): self.comment_count = comment_count self.group_id = group_id self.crawler_time = crawler_time self.coll = Mongo(setting['mongo']['host'], setting['mongo']['port'])
from lib.log import LogHandler from lib.mongo import Mongo from lib.rabbitmq import Rabbit import requests import json from xiaozijia.user_headers import get_headers import yaml log = LogHandler('小资家_house_fast') setting = yaml.load(open('config.yaml')) # mongo m = Mongo(setting['xiaozijia']['mongo']['host'], setting['xiaozijia']['mongo']['port'], user_name=setting['xiaozijia']['mongo']['user_name'], password=setting['xiaozijia']['mongo']['password']) coll_house = m.connect[setting['xiaozijia']['mongo']['db']][ setting['xiaozijia']['mongo']['house_coll']] # rabbit r = Rabbit(setting['xiaozijia']['rabbit']['host'], setting['xiaozijia']['rabbit']['port']) channel = r.get_channel() house_queue = setting['xiaozijia']['rabbit']['queue']['xiaozijia_house'] detail_queue = setting['xiaozijia']['rabbit']['queue'][ 'xiaozijia_house_detail'] channel.queue_declare(queue=house_queue) channel.queue_declare(queue=detail_queue)
import requests import re from ceic.country import country from dateutil import parser from lib.mongo import Mongo import random import yaml from lib.log import LogHandler m = Mongo('192.168.0.235') connect = m.connect setting = yaml.load(open('config.yaml')) db_name = setting['CEIC']['mongo']['db'] State_indicators_name = setting['CEIC']['mongo']['State_indicators'] State_indicators_details_name = setting['CEIC']['mongo']['State_indicators_details'] log = LogHandler('CEIC') proxy = [{"https": "https://192.168.0.96:4234"}, {"https": "https://192.168.0.93:4234"}, {"https": "https://192.168.0.90:4234"}, {"https": "https://192.168.0.94:4234"}, {"https": "https://192.168.0.98:4234"}, {"https": "https://192.168.0.99:4234"}, {"https": "https://192.168.0.100:4234"}, {"https": "https://192.168.0.101:4234"}, {"https": "https://192.168.0.102:4234"}, {"https": "https://192.168.0.103:4234"}, ] class CEIC:
mongo_host = setting['cityhouse']['mongo']['host'] mongo_port = setting['cityhouse']['mongo']['port'] user_name = setting['cityhouse']['mongo']['user_name'] password = setting['cityhouse']['mongo']['password'] db_name = setting['cityhouse']['mongo']['db'] db_coll = setting['cityhouse']['mongo']['comm_coll'] p = Proxies() proxy = next(p) headers = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Mobile Safari/537.36' } m = Mongo(host=mongo_host, port=mongo_port, user_name=user_name, password=password) collection = m.connect[db_name]['cityhouse_9_3'] def count(): for province in items['items']: city_list = province['citys'] for city in city_list: city_code = city['cityCode'] city_name = city['cityName'] url = 'http://api.cityhouse.cn/csfc/v2/ha/list?percount=10&proptype=11&page=1&apiKey=4LiEDwxaRaAYTA3GBfs70L&ver=2&city=' \ + city_code real_num = collection.find({'city': city_name}).count() # try: # res = requests.get(url, headers=headers, proxies=proxy)
""" 根据城市的经纬度获取高德所有的poi 城市经纬度数据库地址 host:192.168.0.136 port:27017 db:fangjia_base collection:city_bounds_box 把对角点的经纬度+类型放入 队列 amap_all_url 一共900w+对对角点 """ from lib.mongo import Mongo import pika import json m = Mongo('192.168.0.136') collection = m.connect['fangjia_base']['city_bounds_box'] def all_url(a_type, rabbit): """ 把类型和经纬度放入amap_all_url队列 :param a_type: :param rabbit: :return: """ for info in collection.find({'city': {'$nin': ['中国']}}): square_list = info['bound_gd'] body = json.dumps({'square_list': square_list, 'type': a_type}) print(body)
import pika from lib.mongo import Mongo from lib.rabbitmq import Rabbit setting = yaml.load(open('config.yaml')) # rabbit r = Rabbit(setting['dianping']['rabbit']['host'], setting['dianping']['rabbit']['port']) connection = r.connection channel = connection.channel() city_queue = setting['dianping']['rabbit']['queue']['city_queue'] channel.queue_declare(queue=city_queue) # mongo m = Mongo(setting['dianping']['mongo']['host'], setting['dianping']['mongo']['port']) coll = m.connect[setting['dianping']['mongo']['db']][setting['dianping'] ['mongo']['find_coll']] kind_list = { # '美食': 'ch10', # '休闲娱乐': 'ch30', # '丽人': 'ch50', # '周边游': 'ch35', # '运动健身': 'ch45', # '购物': 'ch20', # '学习培训': 'ch75', # '生活服务': 'ch80', # '医疗健康': 'ch85', '爱车': 'ch65', # '宠物': 'ch95',
class Console(cmd2.Cmd): def __init__(self): cmd2.Cmd.__init__(self, completekey='tab', persistent_history_file=get_option('core', 'hist_file', config), persistent_history_length=int(get_option('core', 'hist_size', config))) self.allow_cli_args = False self.default_to_shell = False self.intro = 'Welcome to the Omnibus shell! Type "session" to get started or "help" to view all commands.' self.allow_redirection = True self.prompt = 'omnibus >> ' self.redirector = '>' self.quit_on_sigint = False del cmd2.Cmd.do_alias del cmd2.Cmd.do_edit del cmd2.Cmd.do_eof del cmd2.Cmd.do_shell del cmd2.Cmd.do_eos del cmd2.Cmd.do_load del cmd2.Cmd.do_py del cmd2.Cmd.do_pyscript del cmd2.Cmd.do_shortcuts del cmd2.Cmd.do_unalias del cmd2.Cmd.do__relative_load self.db = Mongo(config) self.dispatch = Dispatch(self.db) self.session = None if DEBUG: self.do_set('debug true') def sigint_handler(self, signum, frame): """Ensure Redis DB is cleared before exiting application""" pipe_proc = self.pipe_proc if pipe_proc is not None: pipe_proc.terminate() if self.session is not None: self.session.flush() raise KeyboardInterrupt('Caught keyboard interrupt; quitting ...') def default(self, arg): """Override default function for custom error message""" if arg.startswith('#'): return error('Unknown command') return def do_quit(self, _): """Exit Omnibus shell.""" self._should_quit = True if self.session is not None: running('Clearing artifact cache ...') self.session.flush() warning('Closing Omnibus shell ...') return self._STOP_AND_EXIT def do_clear(self, arg): """Clear the console""" os.system('clear') def do_modules(self, arg): """Show module list""" bold_msg('[ Modules ]') for cmd in help_dict['modules']: print(cmd) def do_artifacts(self, arg): """Show artifact information and available commands""" bold_msg('[ Artifacts ]') for cmd in help_dict['artifacts']: print(cmd) def do_general(self, arg): """Show general commands""" bold_msg('[ General Commands ]') for cmd in help_dict['general']: print(cmd) def do_sessions(self, arg): """Show session commands""" bold_msg('[ Session Commands ]') for cmd in help_dict['sessions']: print(cmd) def do_redirect(self, arg): """ Show redirection command help """ info('Omnibus supports command redirection to output files using the ">" character. For example, "cat host zeroharbor.org > zh.json" will pipe the output of the cat command to ./zh.json on disk.') def do_banner(self, arg): """Display random ascii art banner""" print(asciiart.show_banner()) def do_session(self, arg): """Open a new session""" self.session = RedisCache(config) if self.session.db is None: error('Failed to connect to Redis back-end. Please ensure the Redis service is running') else: success('Opened new session') def do_ls(self, arg): """View current sessions artifacts""" if self.session is None: warning('No active session') return count = 0 keys = self.session.db.scan_iter() for key in keys: value = self.session.get(key) print('[%s] %s' % (key, value)) count += 1 info('Active Artifacts: %d' % count) def do_wipe(self, arg): """Clear currently active artifacts """ if self.session is not None: info('Clearing active artifacts from cache ...') self.session.flush() success('Artifact cache cleared') else: warning('No active session; start a new session by running the "session" command') def do_rm(self, arg): """Remove artifact from session by ID Usage: rm <session id>""" try: arg = int(arg) except: error('Artifact ID must be an integer') return if self.session is not None: if self.session.exists(arg): self.session.delete(arg) success('Removed artifact from cache (%s)' % arg) else: warning('Unable to find artifact by ID (%s)' % arg) else: warning('No active session; start a new session by running the "session" command') def do_new(self, arg): """Create a new artifact Artifacts are created by their name. An IP address artifacts name would be the IP address itself, an FQDN artifacts name is the domain name, and so on. Usage: new <artifact name> """ artifact = create_artifact(arg) if not self.db.exists(artifact.type, {'name': artifact.name}): doc_id = self.db.insert_one(artifact.type, artifact) if doc_id is not None: success('Created new artifact (%s - %s)' % (artifact.name, artifact.type)) if self.session is None: self.session = RedisCache(config) self.session.set(1, artifact.name) success('Opened new session') print('Artifact ID: 1') else: count = 0 for key in self.session.db.scan_iter(): count += 1 _id = count + 1 self.session.set(_id, artifact.name) print('Artifact ID: %s' % _id) def do_delete(self, arg): """Remove artifact from database by name or ID Usage: delete <name> delete <session id>""" is_key, value = lookup_key(self.session, arg) if is_key and value is None: error('Unable to find artifact key in session (%s)' % arg) return elif is_key and value is not None: arg = value else: pass artifact_type = detect_type(arg) self.db.delete_one(artifact_type, {'name': arg}) def do_cat(self, arg): """View artifact details or list API keys Usage: cat apikeys cat <artifact name>""" if arg == 'apikeys': data = json.load(open(common.API_CONF, 'rb')) print json.dumps(data, indent=2) else: is_key, value = lookup_key(self.session, arg) if is_key and value is None: error('Unable to find artifact key in session (%s)' % arg) return elif is_key and value is not None: arg = value else: pass artifact_type = detect_type(arg) result = self.db.find(artifact_type, {'name': arg}, one=True) if len(result) == 0: info('No entry found for artifact (%s)' % arg) else: print json.dumps(result, indent=2, separators=(',', ':')) def do_open(self, arg): """Load text file list of artifacts Command will detect each line items artifact type, create the artifact, and add it to the current session if there is one. Usage: open <path/to/file.txt> """ if not os.path.exists(arg): warning('Cannot find file on disk (%s)' % arg) return artifacts = read_file(arg, True) for artifact in artifacts: new_artifact = create_artifact(artifact) if not self.db.exists(new_artifact.type, {'name': new_artifact.name}): doc_id = self.db.insert_one(new_artifact.type, new_artifact) if doc_id is not None: success('Created new artifact (%s - %s)' % (artifact.name, artifact.type)) if self.session is None: self.session = RedisCache(config) self.session.set(1, arg) success('Opened new session') print('Artifact ID: 1') else: count = 0 for key in self.session.db.scan_iter(): count += 1 _id = count + 1 self.session.set(_id, arg) print('Artifact ID: %s' % _id) success('Finished loading artifact list') def do_report(self, arg): """Save artifact report as JSON file Usage: report <artifact name> report <session id>""" is_key, value = lookup_key(self.session, arg) if is_key and value is None: error('Unable to find artifact key in session (%s)' % arg) return elif is_key and value is not None: arg = value else: pass _type = detect_type(arg) result = self.db.find(_type, {'name': arg}, one=True) if len(result) == 0: warning('No entry found for artifact (%s)' % arg) else: report = storage.JSON(data=result, file_path=output_dir) report.save() if os.path.exists(report.file_path): success('Saved artifact report (%s)' % report.file_path) else: error('Failed to properly save report') def do_machine(self, arg): """Run all modules available for an artifacts type Usage: machine <artifact name> machine <session id>""" result = self.dispatch.machine(self.session, arg) pp_json(result) def do_abusech(self, arg): """Search Abuse.ch for artifact details """ pass def do_clearbit(self, arg): """Search Clearbit for email address """ result = self.dispatch.submit(self.session, 'clearbit', arg) pp_json(result) def do_censys(self, arg): """Search Censys for IPv4 address """ result = self.dispatch.submit(self.session, 'censys', arg) pp_json(result) def do_csirtg(self, arg): """Search CSIRTG for hash information""" result = self.dispatch.submit(self.session, 'csirtg', arg) pp_json(result) def do_cymon(self, arg): """Search Cymon for host """ result = self.dispatch.submit(self.session, 'cymon', arg) pp_json(result) def do_dnsbrute(self, arg): """Enumerate DNS subdomains of FQDN """ pass def do_dnsresolve(self, arg): """Retrieve DNS records for host """ result = self.dispatch.submit(self.session, 'dnsresolve', arg) pp_json(result) def do_geoip(self, arg): """Retrieve Geolocation details for host """ result = self.dispatch.submit(self.session, 'geoip', arg) pp_json(result) def do_fullcontact(self, arg): """Search FullContact for email address """ result = self.dispatch.submit(self.session, 'fullcontact', arg) pp_json(result) def do_gist(self, arg): """Search Github Gist's for artifact as string """ pass def do_gitlab(self, arg): """Check Gitlab for active username """ pass def do_github(self, arg): """Check GitHub for active username""" result = self.dispatch.submit(self.session, 'github', arg) pp_json(result) def do_hackedemails(self, arg): """Check hacked-emails.com for email address""" result = self.dispatch.submit(self.session, 'hackedemails', arg) pp_json(result) def do_he(self, arg): """Search Hurricane Electric for host""" result = self.dispatch.submit(self.session, 'he', arg) pp_json(result) def do_hibp(self, arg): """Check HaveIBeenPwned for email address""" result = self.dispatch.submit(self.session, 'hibp', arg) pp_json(result) def do_ipinfo(self, arg): """Retrieve ipinfo resutls for host""" result = self.dispatch.submit(self.session, 'ipinfo', arg) pp_json(result) def do_ipvoid(self, arg): """Search IPVoid for host""" result = self.dispatch.submit(self.session, 'ipvoid', arg) pp_json(result) def do_isc(self, arg): """Search SANS ISC for host""" result = self.dispatch.submit(self.session, 'sans', arg) pp_json(result) def do_keybase(self, arg): """Search Keybase for active username""" result = self.dispatch.submit(self.session, 'keybase', arg) pp_json(result) def do_monitor(self, arg): """Setup active monitors for RSS Feeds, Pastebin, Gist, and other services""" pass def do_mdl(self, arg): """Search Malware Domain List for host""" pass def do_nmap(self, arg): """Run NMap discovery scan against host""" result = self.dispatch.submit(self.session, 'nmap', arg) pp_json(result) def do_otx(self, arg): """Search AlienVault OTX for host or hash artifacts""" result = self.dispatch.submit(self.session, 'otx', arg) pp_json(result) def do_passivetotal(self, arg): """Search PassiveTotal for host""" result = self.dispatch.submit(self.session, 'passivetotal', arg) pp_json(result) def do_pastebin(self, arg): """Search Pastebin for artifact as string""" pass def do_pgp(self, arg): """Search PGP records for email address or user""" result = self.dispatch.submit(self.session, 'pgp', arg) pp_json(result) def do_projecthp(self, arg): """Search Project Honeypot for host""" pass def do_reddit(self, arg): """Search Reddit for active username""" pass def do_rss(self, arg): """Read latest from RSS feed Usage: rss <feed url>""" result = self.dispatch.submit(self.session, 'rss', arg, True) pp_json(result) def do_securitynews(self, arg): """Get current cybersecurity headlines from Google News""" result = self.dispatch.submit(self.session, 'securitynews', arg, True) pp_json(result) def do_shodan(self, arg): """Query Shodan for host""" result = self.dispatch.submit(self.session, 'shodan', arg) pp_json(result) def do_source(self, arg): """Add source to given artifact or most recently added artifact if not specified Usage: source # adds to last created artifact source <artifact name|session id> # adds to specific artifact """ if arg == '': last = self.session.receive('artifacts') _type = detect_type(last) else: _type = detect_type(arg) is_key, value = lookup_key(self.session, arg) if is_key and value is None: error('Unable to find artifact key in session (%s)' % arg) return elif is_key and value is not None: arg = value else: pass if self.db.exists(_type, {'name': last}): self.db.update_one(_type, {'name': last}, {'source': arg}) success('Added source to artifact entry (%s: %s)' % (last, arg)) else: warning('Failed to find last artifact in MongoDB. Run "new <artifact name>" before using the source command') def do_threatcrowd(self, arg): """Search ThreatCrowd for host""" result = self.dispatch.submit(self.session, 'threatcrowd', arg) pp_json(result) def do_threatexpert(self, arg): """Search ThreatExpert for host""" result = self.dispatch.submit(self.session, 'threatexpert', arg) pp_json(result) def do_totalhash(self, arg): """Search TotalHash for host""" pass def do_twitter(self, arg): """Get Twitter info for username""" pass def do_urlvoid(self, arg): """Search URLVoid for domain name""" result = self.dispatch.submit(self.session, 'urlvoid', arg) pp_json(result) def do_usersearch(self, arg): """Search Usersearch.com for active usernames""" pass def do_virustotal(self, arg): """Search VirusTotal for IPv4, FQDN, or Hash""" result = self.dispatch.submit(self.session, 'virustotal', arg) pp_json(result) def do_vxvault(self, arg): """Search VXVault for IPv4 or FQDN""" pass def do_web(self, arg): """Fingerprint webserver""" pass def do_whois(self, arg): """Perform WHOIS lookup on host""" result = self.dispatch.submit(self.session, 'whois', arg) pp_json(result) def do_whoismind(self, arg): """Search Whois Mind for domains associated to an email address""" result = self.dispatch.submit(self.session, 'whoismind', arg) pp_json(result)
from lib.mongo import Mongo from dianping.request_detail import request_get headers = { 'Cookie': "showNav=#nav-tab|0|1; navCtgScroll=200; showNav=javascript:; navCtgScroll=100; _lxsdk_cuid=16420be4e6bc8-01d123b766c0b2-39614101-1aeaa0-16420be4e6dc8; _lxsdk=16420be4e6bc8-01d123b766c0b2-39614101-1aeaa0-16420be4e6dc8; _hc.v=b83d3f69-dd86-b525-f3e0-70de4b48876e.1529557700; s_ViewType=10; aburl=1; wedchatguest=g-63166371096986944; __mta=223777060.1529993859415.1529996989084.1529996989087.4; Hm_lvt_e6f449471d3527d58c46e24efb4c343e=1530000088; Hm_lpvt_e6f449471d3527d58c46e24efb4c343e=1530000088; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; wed_user_path=55|0; Hm_lvt_dbeeb675516927da776beeb1d9802bd4=1529995150,1530062234; Hm_lpvt_dbeeb675516927da776beeb1d9802bd4=1530062234; cityInfo=%7B%22cityId%22%3A952%2C%22cityEnName%22%3A%22huaining%22%2C%22cityName%22%3A%22%E6%80%80%E5%AE%81%E5%8E%BF%22%7D; cy=1; cye=shanghai; _lxsdk_s=1643ed097cd-de5-109-cf7%7C%7C142", 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36", } ip = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {"host": "http-pro.abuyun.com", "port": "9010", "user": "******", "pass": "******"} proxy = { 'http': ip, 'https': ip } m = Mongo('114.80.150.196', 27777) coll = m.connect['dianping']['city_region_hot_new'] for i in city_dict: pinyin = city_dict[i] city = i print(city) coll.remove({'city': i}) url = 'http://www.dianping.com/' + pinyin + '/ch10' response = requests.get(url, headers=headers) html = response.text tree = etree.HTML(html) # # 收集菜系字典 # cookie_list = tree.xpath('//*[@id="classfy"]/a') # kind_dict = {} # for kind in cookie_list: # kind_url = kind.xpath('@href')[0]
import requests from lib.mongo import Mongo from lib.rabbitmq import Rabbit r = Rabbit('127.0.0.1', 5673) channel = r.get_channel() m = Mongo('114.80.150.196', 27777, user_name='goojia', password='******') coll = m.connect['fgg']['comm'] class Fgg: def __init__(self): self.headers = { 'Authorization': "", } self.ip = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % { "host": "http-pro.abuyun.com", "port": "9010", "user": "******", "pass": "******" } self.proxies = { 'http': self.ip, 'https': self.ip, } self.s = requests.session() def login(self): url_login = "******"
collection:city_bounds_box 把对角点的经纬度+类型放入 队列 amap_all_url 一共900w+对对角点 优先级排序 '120000','170000','190000','050000','060000','100000','010000','020000','030000','090000','130000','160000','040000','070000','080000','110000','140000','150000','180000','200000' """ from lib.mongo import Mongo from amap_reconfiguration.amap_exception import AmapException from amap_reconfiguration.api_builder import ApiKey, api_key_list import pika import json m = Mongo('114.80.150.198', 38888) collection = m.connect['fangjia_base']['city_bounds_box'] API_KEY_BUILDER = ApiKey() DAILY_COUNT_ACCORDING_KEYS = len(api_key_list) * 300000 def all_url(a_type, rabbit): """ 把类型和经纬度放入amap_all_url队列 :param a_type: :param rabbit: :return: """ url_list = [] for info in collection.find({'city': {'$nin': ['中国']}}):
from lib.mongo import Mongo from lib.standardization import standard_city, standard_block from pymongo import MongoClient m = Mongo('192.168.0.235', 27017) coll_name = m.connect['comm_price']['zhugefang_backup'] n = MongoClient('192.168.0.61', 27017) save_coll = n['fangjia_tmp']['zhugefang_unitprice_source'] def mongo_chanch(): for i in coll_name.find({}, no_cursor_timeout=True): name = i['comm_name'] city_name_ = i['city'] DistrictName_ = i['comm_addr'] UnitPrice = int(i['price']) update_time = i['time'] category = 'district' s_date = int(update_time.strftime('%Y%m')) city_name = standard_city(city_name_) DistrictName = standard_block(DistrictName_) data = { 'category': category, 'city': city_name, 'name': name, 'region': DistrictName, 's_date': s_date, 'zhugefang_esf_price': UnitPrice, } if not data['region']:
def connect_mongo(self): m = Mongo(self.m_host, self.m_port, user_name='goojia', password='******') return m.connect[self.db][self.coll]
import pymongo from lib.mongo import Mongo m = Mongo('114.80.150.196', 27777, user_name='fangjia', password='******') key_coll = m.connect['wuye']['key_name'] def connect_mongodb(host, port, database, collection): client = pymongo.MongoClient(host, port) db = client[database] coll = db.get_collection(collection) return coll set_ = set([]) comm_coll = connect_mongodb('114.80.150.198', 38888, 'fangjia', 'seaweed') # key_coll = connect_mongodb('114.80.150.196', 27777, 'wuye', 'key_name') list_ = comm_coll.find({'city': '上海'}) count = 0 for i in list_: name = i['name'] for i in name: print(i) data = {'_id': i} try: key_coll.insert(data) except Exception as e: print('key重复')
def __init__(self, bot): self.bot = bot self.db = Mongo.init_db(Mongo()) self.server_db = None self.start_time = time()