Exemplo n.º 1
0
def test_Mongo_insert_then_find():
    #Mongodb must be running in background for this to work
    mongo = Mongo("test", "people")
    dic = {"name": "Darth Vader", "company": "Empire", "interests": "The Force"}
    mongo.insert_one(dic)
    results = mongo.find({"company": "Empire"})
    # raise Exception('type of results is '+str(type(results)))
    del dic['_id']
    for x in results:
        del x['_id']
        assert dic==x
Exemplo n.º 2
0
    def __init__(self):
        cmd2.Cmd.__init__(self,
            completekey='tab',
            persistent_history_file=get_option('core', 'hist_file', config),
            persistent_history_length=int(get_option('core', 'hist_size', config)))

        self.allow_cli_args = False
        self.default_to_shell = False
        self.intro = 'Welcome to the Omnibus shell! Type "session" to get started or "help" to view all commands.'
        self.allow_redirection = True
        self.prompt = 'omnibus >> '
        self.redirector = '>'
        self.quit_on_sigint = False

        del cmd2.Cmd.do_alias
        del cmd2.Cmd.do_edit
        del cmd2.Cmd.do_eof
        del cmd2.Cmd.do_shell
        del cmd2.Cmd.do_eos
        del cmd2.Cmd.do_load
        del cmd2.Cmd.do_py
        del cmd2.Cmd.do_pyscript
        del cmd2.Cmd.do_shortcuts
        del cmd2.Cmd.do_unalias
        del cmd2.Cmd.do__relative_load

        self.db = Mongo(config)
        self.dispatch = Dispatch(self.db)
        self.session = None

        if DEBUG:
            self.do_set('debug true')
Exemplo n.º 3
0
def test_Mongo_list_insert_then_find():
    #Mongodb must be running in background for this to work
    a = Mongo("test","people")
    dic1 = {"name": "Ethan Hunt", "company": "IMF", "interests": "Epionage"}
    dic2 = {"name": "Chef Eddie", "company": "Unemployed", "interests": "food"}
    x = [dic1,dic2]
    a.insert_many(x)
    results1 = a.find({"name":"Ethan Hunt"})
    del dic1['_id']
    del dic2['_id']
    for x in results1:
        del x["_id"]
        assert dic1 == x
    results1 = a.find({"name":"Chef Eddie"})
    for x in results1:
        del x["_id"]
        assert dic2 == x
Exemplo n.º 4
0
def test_Mongo_insert_then_delete():
    #Mongodb must be running in background for this to work
    a = Mongo("test","people")
    dic = {"name": "Prof I", "company": "Rutgers", "interests": "Statistics"}
    a.insert_one(dic)
    a.delete_many({"name":"Prof I"})
    results = a.find({"name":"Prof I"})
    assert results.count() == 0
Exemplo n.º 5
0
 def __init__(self, bot):
     self.bot = bot
     self.db = Mongo.init_db(Mongo())
     self.server_db = None
Exemplo n.º 6
0
import requests
from auction import Auction
from lib.log import LogHandler
from sql_mysql import inquire, TypeAuction
from lib.mongo import Mongo
from lxml import etree
import datetime
import yaml
import re

setting = yaml.load(open('config.yaml'))
# client = Mongo(host=setting['mongo']['host'], port=setting['mongo']['port'], user_name=setting['mongo']['user_name'],
#                password=setting['mongo']['password']).connect
client = Mongo(host=setting['mongo']['host'],
               port=setting['mongo']['port']).connect
coll = client[setting['mongo']['db']][setting['mongo']['collection']]

source = 'gongpaiwang'
log = LogHandler(__name__)


class Gongpaiwang:
    def __init__(self):
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
        }
        self.list_info = []
        self.type_list = inquire(TypeAuction, source)

    def start_crawler(self):
Exemplo n.º 7
0
import requests
from lib.mongo import Mongo
import re

m = Mongo(host='114.80.150.196', port=27777, user_name='fangjia', password='******')
collection = m.connect['dianping']['dianping_zhangshang']
collection_lat = m.connect['dianping']['dianping_zhangshang_lat']


class ShopDetail:
    def __init__(self):
        self.url = 'https://m.dianping.com/shop/20721516/map'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
        }

    def get_shop_lat(self):
        for i in collection.find():
            try:
                r = requests.get(url='https://m.dianping.com/shop/{}/map'.format(i['id']), headers=self.headers)
                info = re.search('PAGE_INITIAL_STATE(.*?)</script>', r.text, re.S | re.M).group(1)
                lat = re.search('"shopLat":(.*?),', info, re.S | re.M).group(1)
                lng = re.search('"shopLng":(.*?),', info, re.S | re.M).group(1)
                print(lat, lng)
                collection_lat.insert_one({
                    'info': info,
                    'id': i['id'],
                    'lng': lng,
                    'lat': lat,
                })
            except Exception as e:
Exemplo n.º 8
0
class Story:
    class Output(Enum):
        STD = 1
        CACHE = 2
        FILE = 3
        DATABASE = 4

    def __init__(self, name, persistent=True):
        self.name = name
        self.sessions = []
        self.session_id = ""
        self.epoch_id = 0
        self.persistent = persistent
        self.database = Mongo()
        self.log = Logger("story-{}".format(name))

    def session(self, session_id=None):
        if session_id:
            self.session_id = session_id

        return next(session for session in self.sessions if session["_id"] == self.session_id)

    def epoch(self, epoch_id=None, session_id=None):
        if epoch_id:
            self.epoch_id = epoch_id
        if session_id:
            self.session_id = session_id

        return next(epoch for epoch in self.session()["epochs"] if epoch["_id"] == self.epoch_id)

    def new_acc(self, value, epoch_id=None, session_id=None):
        if epoch_id:
            self.epoch_id = epoch_id
        if session_id:
            self.session_id = session_id

        self.epoch(self.epoch_id)["acc"] = np.append(self.epoch(self.epoch_id)["acc"], value)

        # save to the database
        if self.persistent:
            self.database.push(self.epoch_id, "acc", float(value), "epochs")

    def new_loss(self, value, epoch_id=None, session_id=None):
        if epoch_id:
            self.epoch_id = epoch_id
        if session_id:
            self.session_id = session_id

        self.epoch(self.epoch_id)["loss"] = np.append(self.epoch(self.epoch_id)["loss"], value)

        # save to the database
        if self.persistent:
            self.database.push(self.epoch_id, "loss", float(value), "epochs")

    def epoch_compute_acc(self, epoch_id=None, session_id=None):
        if epoch_id:
            self.epoch_id = epoch_id
        if session_id:
            self.session_id = session_id

        epoch_acc = np.mean(self.epoch(self.epoch_id)["acc"])
        self.epoch(self.epoch_id)["acc_mean"] = epoch_acc

        # save to the database
        if self.persistent:
            self.database.update(self.epoch_id, {"acc_mean": epoch_acc}, "epochs")

    def epoch_compute_loss(self, epoch_id=None, session_id=None):
        if epoch_id:
            self.epoch_id = epoch_id
        if session_id:
            self.session_id = session_id

        epoch_loss = np.mean(self.epoch(self.epoch_id)["loss"])
        self.epoch(self.epoch_id)["loss_mean"] = epoch_loss

        # save to the database
        if self.persistent:
            self.database.update(self.epoch_id, {"loss_mean": epoch_loss}, "epochs")

    def epoch_set(self, key, value, epoch_id=None, session_id=None):
        if epoch_id:
            self.epoch_id = epoch_id
        if session_id:
            self.session_id = session_id

        self.epoch(self.epoch_id)[key] = value

    def new_session(self, label):
        # create a session
        session_id = object_id()
        session = {"_id": session_id,
                   "time": datetime.now(),
                    "platform": platform.node(),
                    "label": label,
                    "epochs": [],
                    "acc": 0.0,
                    "loss": 0.0}

        # save to the database
        if self.persistent:
            self.database.upsert(session, "sessions")

        # add to the local sessions list
        self.sessions.append(session)

        # move the session cursor to the new session
        self.session_id = session_id

        # return the current session object
        return self.session(session_id)

    def new_epoch(self, num, session_id=None):
        if session_id:
            self.session_id = session_id
        self.epoch_id = object_id()

        # the new epoch
        epoch = {"_id": self.epoch_id,
                 "session": { "$ref": "epochs", "$id": self.session_id, "$db": "sessions"},
                 "rank": num, "acc": [], "loss": [], "acc_mean": 0.0, "loss_mean": 0.0}

        # save to the database
        self.database.upsert(epoch, "epochs")

        # add to the local epochs of the current session
        self.session()["epochs"].append(epoch)

        # move the epoch cursor to the new epoch
        return self.epoch(self.epoch_id)

    def close_epoch(self, epoch_id=None, session_id=None):
        if epoch_id:
            self.epoch_id = epoch_id
        if session_id:
            self.session_id = session_id

        self.epoch_compute_acc(self.epoch_id, self.session_id)
        self.epoch_compute_loss(self.epoch_id, self.session_id)

    def session_compute_acc(self, session_id=None):
        if session_id:
            self.session_id = session_id

        self.session()["acc"] = np.mean([epoch["acc_mean"] for epoch in self.session()["epochs"]])

        # save to the database
        if self.persistent:
            self.database.update(self.session_id, {"acc": self.session()["acc"]}, "sessions")

        return self.session()["acc"]

    def session_compute_loss(self, session_id=None):
        if session_id:
            self.session_id = session_id

        self.session()["loss"] = np.mean([epoch["loss_mean"] for epoch in self.session()["epochs"]])

        # save to the database
        if self.persistent:
            self.database.update(self.session_id, {"loss": self.session()["loss"]}, "sessions")

        return self.session()["loss"]

    def close_session(self, session_id=None):
        if session_id:
            self.session_id = session_id

        # accuracy and loss compute from epochs
        if len(self.session()["epochs"]) > 0:
            self.session_compute_acc()
            self.session_compute_loss()
Exemplo n.º 9
0
def test_property_setter():
    a = Mongo("test", "people")
    a.database = "new_test"
    a.collection = "new_collection"
    assert a.database == "new_test"
    assert a.collection == "new_collection"
Exemplo n.º 10
0
 def __init__(self):
     m = Mongo('192.168.0.235', 27017)
     self.connection = m.get_connection()
Exemplo n.º 11
0
from dataclasses import dataclass, asdict, field
import datetime
import yaml
from lib.log import LogHandler
from lib.mongo import Mongo

log = LogHandler(__name__)

setting = yaml.load(open('config.yaml'))
client = Mongo(host=setting['mongo']['host'],
               port=setting['mongo']['port'],
               user_name=setting['mongo']['user_name'],
               password=setting['mongo']['password']).connect
soldcoll = client[setting['mongo']['db']][setting['mongo']['collection_1']]
listcoll = client[setting['mongo']['db']][setting['mongo']['collection_2']]
rentcoll = client[setting['mongo']['db']][setting['mongo']['collection_3']]


@dataclass()
class Estate:
    co_id: str  # 小区id
    source: str  # 网站来源
    state: str  # 州
    county: str  # 州下一级行政区
    city: str  # 市
    zipcode: str  # 邮编
    # street_number: str      # 街道号码
    # street: str             # 街道名
    # apartment_number: str    # 门牌号
    address: str  # 地址
    house_type: str  # 房屋类型
"""
    消费xiaozijia_house_detail队列,请求,入楼栋库xiaozijia_detail_fast
"""

from lib.log import LogHandler
from lib.mongo import Mongo
import requests
import json
import pika
import itertools

log = LogHandler(__name__)

m = Mongo(host='114.80.150.196', port=27777, user_name='goojia', password='******')
# m = Mongo(host='localhost', port=27017)


user_collection = m.connect['friends']['xiaozijia_user']
cookie_iter = itertools.cycle([_['cookie'] for _ in user_collection.find(no_cursor_timeout=True)])
collection = m.connect['friends']['xiaozijia_house_detail']

proxies = {
    'http': 'localhost:8787',
    'https': 'localhost:8787'
}

def change():
    pass

class HouseDetail:
    def message(self, info):
Exemplo n.º 13
0
 def connect_mongo(self):
     m = Mongo(self.m_host,
               self.m_port,
               user_name='fangjia',
               password='******')
     return m.connect[self.db][self.coll]
Exemplo n.º 14
0
class MongoSingle:
    connection = Mongo(setting['db'], setting['port']).get_connection()
Exemplo n.º 15
0
    #     Process(target=con_detail).start()
    # # 消费楼栋页面
    # from sh_wuye.get_house_num import consume_queue as con_house
    # for i in range(60):
    #     Process(target=con_house).start()

    # 房估估
    #
    # 放入队列
    # from fanggugu.get_all_community_id import produce
    # produce()
    # 消费,得到楼栋信息
    from fanggugu.get_building_info import GetBuild
    from lib.mongo import Mongo

    m = Mongo('192.168.0.235', 27017)
    connection = m.get_connection()
    coll_user = m.get_connection()['fgg']['user_info']
    count = 0
    build = GetBuild()
    for i in coll_user.find():
        user_name = i['user_name']
        print(user_name)
        build.consume_queue(user_name)
    # 消费楼栋,得到房号数据
    from fanggugu.get_house_info import GetHouse
    from lib.mongo import Mongo

    m = Mongo('192.168.0.235', 27017)
    connection = m.get_connection()
    coll_user = m.get_connection()['fgg']['user_info']
Exemplo n.º 16
0
    def __init__(self, comment_count=None, group_id=None, crawler_time=None):
        self.comment_count = comment_count
        self.group_id = group_id
        self.crawler_time = crawler_time

        self.coll = Mongo(setting['mongo']['host'], setting['mongo']['port'])
Exemplo n.º 17
0
from lib.log import LogHandler
from lib.mongo import Mongo
from lib.rabbitmq import Rabbit
import requests
import json
from xiaozijia.user_headers import get_headers
import yaml

log = LogHandler('小资家_house_fast')

setting = yaml.load(open('config.yaml'))

# mongo
m = Mongo(setting['xiaozijia']['mongo']['host'],
          setting['xiaozijia']['mongo']['port'],
          user_name=setting['xiaozijia']['mongo']['user_name'],
          password=setting['xiaozijia']['mongo']['password'])
coll_house = m.connect[setting['xiaozijia']['mongo']['db']][
    setting['xiaozijia']['mongo']['house_coll']]

# rabbit
r = Rabbit(setting['xiaozijia']['rabbit']['host'],
           setting['xiaozijia']['rabbit']['port'])
channel = r.get_channel()
house_queue = setting['xiaozijia']['rabbit']['queue']['xiaozijia_house']
detail_queue = setting['xiaozijia']['rabbit']['queue'][
    'xiaozijia_house_detail']
channel.queue_declare(queue=house_queue)
channel.queue_declare(queue=detail_queue)

Exemplo n.º 18
0
import requests
import re
from ceic.country import country
from dateutil import parser
from lib.mongo import Mongo
import random
import yaml
from lib.log import LogHandler

m = Mongo('192.168.0.235')
connect = m.connect

setting = yaml.load(open('config.yaml'))
db_name = setting['CEIC']['mongo']['db']
State_indicators_name = setting['CEIC']['mongo']['State_indicators']
State_indicators_details_name = setting['CEIC']['mongo']['State_indicators_details']
log = LogHandler('CEIC')

proxy = [{"https": "https://192.168.0.96:4234"},
         {"https": "https://192.168.0.93:4234"},
         {"https": "https://192.168.0.90:4234"},
         {"https": "https://192.168.0.94:4234"},
         {"https": "https://192.168.0.98:4234"},
         {"https": "https://192.168.0.99:4234"},
         {"https": "https://192.168.0.100:4234"},
         {"https": "https://192.168.0.101:4234"},
         {"https": "https://192.168.0.102:4234"},
         {"https": "https://192.168.0.103:4234"}, ]


class CEIC:
Exemplo n.º 19
0
mongo_host = setting['cityhouse']['mongo']['host']
mongo_port = setting['cityhouse']['mongo']['port']
user_name = setting['cityhouse']['mongo']['user_name']
password = setting['cityhouse']['mongo']['password']
db_name = setting['cityhouse']['mongo']['db']
db_coll = setting['cityhouse']['mongo']['comm_coll']

p = Proxies()
proxy = next(p)
headers = {
    'User-Agent':
    'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Mobile Safari/537.36'
}

m = Mongo(host=mongo_host,
          port=mongo_port,
          user_name=user_name,
          password=password)
collection = m.connect[db_name]['cityhouse_9_3']


def count():
    for province in items['items']:
        city_list = province['citys']
        for city in city_list:
            city_code = city['cityCode']
            city_name = city['cityName']
            url = 'http://api.cityhouse.cn/csfc/v2/ha/list?percount=10&proptype=11&page=1&apiKey=4LiEDwxaRaAYTA3GBfs70L&ver=2&city=' \
                  + city_code
            real_num = collection.find({'city': city_name}).count()
            # try:
            #     res = requests.get(url, headers=headers, proxies=proxy)
"""
根据城市的经纬度获取高德所有的poi
城市经纬度数据库地址
host:192.168.0.136
port:27017
db:fangjia_base
collection:city_bounds_box

把对角点的经纬度+类型放入 队列 amap_all_url
一共900w+对对角点
"""
from lib.mongo import Mongo
import pika
import json

m = Mongo('192.168.0.136')
collection = m.connect['fangjia_base']['city_bounds_box']


def all_url(a_type, rabbit):
    """
    把类型和经纬度放入amap_all_url队列
    :param a_type:
    :param rabbit:
    :return:
    """

    for info in collection.find({'city': {'$nin': ['中国']}}):
        square_list = info['bound_gd']
        body = json.dumps({'square_list': square_list, 'type': a_type})
        print(body)
Exemplo n.º 21
0
import pika
from lib.mongo import Mongo
from lib.rabbitmq import Rabbit

setting = yaml.load(open('config.yaml'))

# rabbit
r = Rabbit(setting['dianping']['rabbit']['host'],
           setting['dianping']['rabbit']['port'])
connection = r.connection
channel = connection.channel()
city_queue = setting['dianping']['rabbit']['queue']['city_queue']
channel.queue_declare(queue=city_queue)

# mongo
m = Mongo(setting['dianping']['mongo']['host'],
          setting['dianping']['mongo']['port'])
coll = m.connect[setting['dianping']['mongo']['db']][setting['dianping']
                                                     ['mongo']['find_coll']]

kind_list = {
    # '美食': 'ch10',
    # '休闲娱乐': 'ch30',
    # '丽人': 'ch50',
    # '周边游': 'ch35',
    # '运动健身': 'ch45',
    # '购物': 'ch20',
    # '学习培训': 'ch75',
    # '生活服务': 'ch80',
    # '医疗健康': 'ch85',
    '爱车': 'ch65',
    # '宠物': 'ch95',
Exemplo n.º 22
0
class Console(cmd2.Cmd):
    def __init__(self):
        cmd2.Cmd.__init__(self,
            completekey='tab',
            persistent_history_file=get_option('core', 'hist_file', config),
            persistent_history_length=int(get_option('core', 'hist_size', config)))

        self.allow_cli_args = False
        self.default_to_shell = False
        self.intro = 'Welcome to the Omnibus shell! Type "session" to get started or "help" to view all commands.'
        self.allow_redirection = True
        self.prompt = 'omnibus >> '
        self.redirector = '>'
        self.quit_on_sigint = False

        del cmd2.Cmd.do_alias
        del cmd2.Cmd.do_edit
        del cmd2.Cmd.do_eof
        del cmd2.Cmd.do_shell
        del cmd2.Cmd.do_eos
        del cmd2.Cmd.do_load
        del cmd2.Cmd.do_py
        del cmd2.Cmd.do_pyscript
        del cmd2.Cmd.do_shortcuts
        del cmd2.Cmd.do_unalias
        del cmd2.Cmd.do__relative_load

        self.db = Mongo(config)
        self.dispatch = Dispatch(self.db)
        self.session = None

        if DEBUG:
            self.do_set('debug true')


    def sigint_handler(self, signum, frame):
        """Ensure Redis DB is cleared before exiting application"""
        pipe_proc = self.pipe_proc
        if pipe_proc is not None:
            pipe_proc.terminate()

        if self.session is not None:
            self.session.flush()

        raise KeyboardInterrupt('Caught keyboard interrupt; quitting ...')


    def default(self, arg):
        """Override default function for custom error message"""
        if arg.startswith('#'):
            return

        error('Unknown command')
        return


    def do_quit(self, _):
        """Exit Omnibus shell."""
        self._should_quit = True

        if self.session is not None:
            running('Clearing artifact cache ...')
            self.session.flush()

        warning('Closing Omnibus shell ...')
        return self._STOP_AND_EXIT


    def do_clear(self, arg):
        """Clear the console"""
        os.system('clear')


    def do_modules(self, arg):
        """Show module list"""
        bold_msg('[ Modules ]')
        for cmd in help_dict['modules']:
            print(cmd)


    def do_artifacts(self, arg):
        """Show artifact information and available commands"""
        bold_msg('[ Artifacts ]')
        for cmd in help_dict['artifacts']:
            print(cmd)


    def do_general(self, arg):
        """Show general commands"""
        bold_msg('[ General Commands ]')
        for cmd in help_dict['general']:
            print(cmd)


    def do_sessions(self, arg):
        """Show session commands"""
        bold_msg('[ Session Commands ]')
        for cmd in help_dict['sessions']:
            print(cmd)


    def do_redirect(self, arg):
        """ Show redirection command help """
        info('Omnibus supports command redirection to output files using the ">" character. For example, "cat host zeroharbor.org > zh.json" will pipe the output of the cat command to ./zh.json on disk.')


    def do_banner(self, arg):
        """Display random ascii art banner"""
        print(asciiart.show_banner())


    def do_session(self, arg):
        """Open a new session"""
        self.session = RedisCache(config)
        if self.session.db is None:
            error('Failed to connect to Redis back-end. Please ensure the Redis service is running')
        else:
            success('Opened new session')


    def do_ls(self, arg):
        """View current sessions artifacts"""
        if self.session is None:
            warning('No active session')
            return

        count = 0
        keys = self.session.db.scan_iter()
        for key in keys:
            value = self.session.get(key)
            print('[%s] %s' % (key, value))
            count += 1
        info('Active Artifacts: %d' % count)


    def do_wipe(self, arg):
        """Clear currently active artifacts """
        if self.session is not None:
            info('Clearing active artifacts from cache ...')
            self.session.flush()
            success('Artifact cache cleared')
        else:
            warning('No active session; start a new session by running the "session" command')


    def do_rm(self, arg):
        """Remove artifact from session by ID

        Usage: rm <session id>"""
        try:
            arg = int(arg)
        except:
            error('Artifact ID must be an integer')
            return

        if self.session is not None:
            if self.session.exists(arg):
                self.session.delete(arg)
                success('Removed artifact from cache (%s)' % arg)
            else:
                warning('Unable to find artifact by ID (%s)' % arg)
        else:
            warning('No active session; start a new session by running the "session" command')


    def do_new(self, arg):
        """Create a new artifact

        Artifacts are created by their name. An IP address artifacts name would be the IP address itself,
        an FQDN artifacts name is the domain name, and so on.

        Usage: new <artifact name> """
        artifact = create_artifact(arg)

        if not self.db.exists(artifact.type, {'name': artifact.name}):
            doc_id = self.db.insert_one(artifact.type, artifact)
            if doc_id is not None:
                success('Created new artifact (%s - %s)' % (artifact.name, artifact.type))

        if self.session is None:
            self.session = RedisCache(config)
            self.session.set(1, artifact.name)
            success('Opened new session')
            print('Artifact ID: 1')
        else:
            count = 0
            for key in self.session.db.scan_iter():
                count += 1
            _id = count + 1
            self.session.set(_id, artifact.name)
            print('Artifact ID: %s' % _id)


    def do_delete(self, arg):
        """Remove artifact from database by name or ID

        Usage: delete <name>
               delete <session id>"""
        is_key, value = lookup_key(self.session, arg)

        if is_key and value is None:
            error('Unable to find artifact key in session (%s)' % arg)
            return
        elif is_key and value is not None:
            arg = value
        else:
            pass

        artifact_type = detect_type(arg)
        self.db.delete_one(artifact_type, {'name': arg})


    def do_cat(self, arg):
        """View artifact details or list API keys

        Usage: cat apikeys
               cat <artifact name>"""
        if arg == 'apikeys':
            data = json.load(open(common.API_CONF, 'rb'))
            print json.dumps(data, indent=2)
        else:
            is_key, value = lookup_key(self.session, arg)

            if is_key and value is None:
                error('Unable to find artifact key in session (%s)' % arg)
                return
            elif is_key and value is not None:
                arg = value
            else:
                pass

            artifact_type = detect_type(arg)
            result = self.db.find(artifact_type, {'name': arg}, one=True)
            if len(result) == 0:
                info('No entry found for artifact (%s)' % arg)
            else:
                print json.dumps(result, indent=2, separators=(',', ':'))


    def do_open(self, arg):
        """Load text file list of artifacts

        Command will detect each line items artifact type, create the artifact,
        and add it to the current session if there is one.

        Usage: open <path/to/file.txt> """
        if not os.path.exists(arg):
            warning('Cannot find file on disk (%s)' % arg)
            return

        artifacts = read_file(arg, True)
        for artifact in artifacts:
            new_artifact = create_artifact(artifact)

            if not self.db.exists(new_artifact.type, {'name': new_artifact.name}):
                doc_id = self.db.insert_one(new_artifact.type, new_artifact)
                if doc_id is not None:
                    success('Created new artifact (%s - %s)' % (artifact.name, artifact.type))

            if self.session is None:
                self.session = RedisCache(config)
                self.session.set(1, arg)
                success('Opened new session')
                print('Artifact ID: 1')
            else:
                count = 0
                for key in self.session.db.scan_iter():
                    count += 1
                _id = count + 1
                self.session.set(_id, arg)
                print('Artifact ID: %s' % _id)

        success('Finished loading artifact list')


    def do_report(self, arg):
        """Save artifact report as JSON file

        Usage: report <artifact name>
               report <session id>"""
        is_key, value = lookup_key(self.session, arg)

        if is_key and value is None:
            error('Unable to find artifact key in session (%s)' % arg)
            return
        elif is_key and value is not None:
            arg = value
        else:
            pass

        _type = detect_type(arg)

        result = self.db.find(_type, {'name': arg}, one=True)
        if len(result) == 0:
            warning('No entry found for artifact (%s)' % arg)
        else:
            report = storage.JSON(data=result, file_path=output_dir)
            report.save()
            if os.path.exists(report.file_path):
                success('Saved artifact report (%s)' % report.file_path)
            else:
                error('Failed to properly save report')


    def do_machine(self, arg):
        """Run all modules available for an artifacts type

        Usage: machine <artifact name>
               machine <session id>"""
        result = self.dispatch.machine(self.session, arg)
        pp_json(result)


    def do_abusech(self, arg):
        """Search Abuse.ch for artifact details """
        pass


    def do_clearbit(self, arg):
        """Search Clearbit for email address """
        result = self.dispatch.submit(self.session, 'clearbit', arg)
        pp_json(result)


    def do_censys(self, arg):
        """Search Censys for IPv4 address """
        result = self.dispatch.submit(self.session, 'censys', arg)
        pp_json(result)


    def do_csirtg(self, arg):
        """Search CSIRTG for hash information"""
        result = self.dispatch.submit(self.session, 'csirtg', arg)
        pp_json(result)


    def do_cymon(self, arg):
        """Search Cymon for host """
        result = self.dispatch.submit(self.session, 'cymon', arg)
        pp_json(result)


    def do_dnsbrute(self, arg):
        """Enumerate DNS subdomains of FQDN """
        pass


    def do_dnsresolve(self, arg):
        """Retrieve DNS records for host """
        result = self.dispatch.submit(self.session, 'dnsresolve', arg)
        pp_json(result)


    def do_geoip(self, arg):
        """Retrieve Geolocation details for host """
        result = self.dispatch.submit(self.session, 'geoip', arg)
        pp_json(result)


    def do_fullcontact(self, arg):
        """Search FullContact for email address """
        result = self.dispatch.submit(self.session, 'fullcontact', arg)
        pp_json(result)


    def do_gist(self, arg):
        """Search Github Gist's for artifact as string """
        pass


    def do_gitlab(self, arg):
        """Check Gitlab for active username """
        pass


    def do_github(self, arg):
        """Check GitHub for active username"""
        result = self.dispatch.submit(self.session, 'github', arg)
        pp_json(result)


    def do_hackedemails(self, arg):
        """Check hacked-emails.com for email address"""
        result = self.dispatch.submit(self.session, 'hackedemails', arg)
        pp_json(result)


    def do_he(self, arg):
        """Search Hurricane Electric for host"""
        result = self.dispatch.submit(self.session, 'he', arg)
        pp_json(result)


    def do_hibp(self, arg):
        """Check HaveIBeenPwned for email address"""
        result = self.dispatch.submit(self.session, 'hibp', arg)
        pp_json(result)


    def do_ipinfo(self, arg):
        """Retrieve ipinfo resutls for host"""
        result = self.dispatch.submit(self.session, 'ipinfo', arg)
        pp_json(result)


    def do_ipvoid(self, arg):
        """Search IPVoid for host"""
        result = self.dispatch.submit(self.session, 'ipvoid', arg)
        pp_json(result)


    def do_isc(self, arg):
        """Search SANS ISC for host"""
        result = self.dispatch.submit(self.session, 'sans', arg)
        pp_json(result)


    def do_keybase(self, arg):
        """Search Keybase for active username"""
        result = self.dispatch.submit(self.session, 'keybase', arg)
        pp_json(result)


    def do_monitor(self, arg):
        """Setup active monitors for RSS Feeds, Pastebin, Gist, and other services"""
        pass


    def do_mdl(self, arg):
        """Search Malware Domain List for host"""
        pass


    def do_nmap(self, arg):
        """Run NMap discovery scan against host"""
        result = self.dispatch.submit(self.session, 'nmap', arg)
        pp_json(result)


    def do_otx(self, arg):
        """Search AlienVault OTX for host or hash artifacts"""
        result = self.dispatch.submit(self.session, 'otx', arg)
        pp_json(result)


    def do_passivetotal(self, arg):
        """Search PassiveTotal for host"""
        result = self.dispatch.submit(self.session, 'passivetotal', arg)
        pp_json(result)


    def do_pastebin(self, arg):
        """Search Pastebin for artifact as string"""
        pass


    def do_pgp(self, arg):
        """Search PGP records for email address or user"""
        result = self.dispatch.submit(self.session, 'pgp', arg)
        pp_json(result)


    def do_projecthp(self, arg):
        """Search Project Honeypot for host"""
        pass


    def do_reddit(self, arg):
        """Search Reddit for active username"""
        pass


    def do_rss(self, arg):
        """Read latest from RSS feed

        Usage: rss <feed url>"""
        result = self.dispatch.submit(self.session, 'rss', arg, True)
        pp_json(result)


    def do_securitynews(self, arg):
        """Get current cybersecurity headlines from Google News"""
        result = self.dispatch.submit(self.session, 'securitynews', arg, True)
        pp_json(result)


    def do_shodan(self, arg):
        """Query Shodan for host"""
        result = self.dispatch.submit(self.session, 'shodan', arg)
        pp_json(result)


    def do_source(self, arg):
        """Add source to given artifact or most recently added artifact if not specified

        Usage: source                            # adds to last created artifact
               source <artifact name|session id> # adds to specific artifact
        """
        if arg == '':
            last = self.session.receive('artifacts')
            _type = detect_type(last)
        else:
            _type = detect_type(arg)
            is_key, value = lookup_key(self.session, arg)

            if is_key and value is None:
                error('Unable to find artifact key in session (%s)' % arg)
                return
            elif is_key and value is not None:
                arg = value
            else:
                pass

        if self.db.exists(_type, {'name': last}):
            self.db.update_one(_type, {'name': last}, {'source': arg})
            success('Added source to artifact entry (%s: %s)' % (last, arg))
        else:
            warning('Failed to find last artifact in MongoDB. Run "new <artifact name>" before using the source command')


    def do_threatcrowd(self, arg):
        """Search ThreatCrowd for host"""
        result = self.dispatch.submit(self.session, 'threatcrowd', arg)
        pp_json(result)


    def do_threatexpert(self, arg):
        """Search ThreatExpert for host"""
        result = self.dispatch.submit(self.session, 'threatexpert', arg)
        pp_json(result)


    def do_totalhash(self, arg):
        """Search TotalHash for host"""
        pass


    def do_twitter(self, arg):
        """Get Twitter info for username"""
        pass


    def do_urlvoid(self, arg):
        """Search URLVoid for domain name"""
        result = self.dispatch.submit(self.session, 'urlvoid', arg)
        pp_json(result)


    def do_usersearch(self, arg):
        """Search Usersearch.com for active usernames"""
        pass


    def do_virustotal(self, arg):
        """Search VirusTotal for IPv4, FQDN, or Hash"""
        result = self.dispatch.submit(self.session, 'virustotal', arg)
        pp_json(result)


    def do_vxvault(self, arg):
        """Search VXVault for IPv4 or FQDN"""
        pass

    def do_web(self, arg):
        """Fingerprint webserver"""
        pass


    def do_whois(self, arg):
        """Perform WHOIS lookup on host"""
        result = self.dispatch.submit(self.session, 'whois', arg)
        pp_json(result)


    def do_whoismind(self, arg):
        """Search Whois Mind for domains associated to an email address"""
        result = self.dispatch.submit(self.session, 'whoismind', arg)
        pp_json(result)
Exemplo n.º 23
0
from lib.mongo import Mongo
from dianping.request_detail import request_get

headers = {
    'Cookie': "showNav=#nav-tab|0|1; navCtgScroll=200; showNav=javascript:; navCtgScroll=100; _lxsdk_cuid=16420be4e6bc8-01d123b766c0b2-39614101-1aeaa0-16420be4e6dc8; _lxsdk=16420be4e6bc8-01d123b766c0b2-39614101-1aeaa0-16420be4e6dc8; _hc.v=b83d3f69-dd86-b525-f3e0-70de4b48876e.1529557700; s_ViewType=10; aburl=1; wedchatguest=g-63166371096986944; __mta=223777060.1529993859415.1529996989084.1529996989087.4; Hm_lvt_e6f449471d3527d58c46e24efb4c343e=1530000088; Hm_lpvt_e6f449471d3527d58c46e24efb4c343e=1530000088; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; wed_user_path=55|0; Hm_lvt_dbeeb675516927da776beeb1d9802bd4=1529995150,1530062234; Hm_lpvt_dbeeb675516927da776beeb1d9802bd4=1530062234; cityInfo=%7B%22cityId%22%3A952%2C%22cityEnName%22%3A%22huaining%22%2C%22cityName%22%3A%22%E6%80%80%E5%AE%81%E5%8E%BF%22%7D; cy=1; cye=shanghai; _lxsdk_s=1643ed097cd-de5-109-cf7%7C%7C142",
    'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36",
}

ip = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {"host": "http-pro.abuyun.com", "port": "9010",
                                                     "user": "******", "pass": "******"}
proxy = {
    'http': ip,
    'https': ip
}

m = Mongo('114.80.150.196', 27777)
coll = m.connect['dianping']['city_region_hot_new']
for i in city_dict:
    pinyin = city_dict[i]
    city = i
    print(city)
    coll.remove({'city': i})
    url = 'http://www.dianping.com/' + pinyin + '/ch10'
    response = requests.get(url, headers=headers)
    html = response.text
    tree = etree.HTML(html)
    # # 收集菜系字典
    # cookie_list = tree.xpath('//*[@id="classfy"]/a')
    # kind_dict = {}
    # for kind in cookie_list:
    #     kind_url = kind.xpath('@href')[0]
Exemplo n.º 24
0
import requests
from lib.mongo import Mongo
from lib.rabbitmq import Rabbit

r = Rabbit('127.0.0.1', 5673)
channel = r.get_channel()

m = Mongo('114.80.150.196', 27777, user_name='goojia', password='******')
coll = m.connect['fgg']['comm']


class Fgg:
    def __init__(self):
        self.headers = {
            'Authorization': "",
        }

        self.ip = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
            "host": "http-pro.abuyun.com",
            "port": "9010",
            "user": "******",
            "pass": "******"
        }
        self.proxies = {
            'http': self.ip,
            'https': self.ip,
        }
        self.s = requests.session()

    def login(self):
        url_login = "******"
Exemplo n.º 25
0
collection:city_bounds_box

把对角点的经纬度+类型放入 队列 amap_all_url
一共900w+对对角点

优先级排序
'120000','170000','190000','050000','060000','100000','010000','020000','030000','090000','130000','160000','040000','070000','080000','110000','140000','150000','180000','200000'

"""
from lib.mongo import Mongo
from amap_reconfiguration.amap_exception import AmapException
from amap_reconfiguration.api_builder import ApiKey, api_key_list
import pika
import json

m = Mongo('114.80.150.198', 38888)
collection = m.connect['fangjia_base']['city_bounds_box']

API_KEY_BUILDER = ApiKey()
DAILY_COUNT_ACCORDING_KEYS = len(api_key_list) * 300000


def all_url(a_type, rabbit):
    """
    把类型和经纬度放入amap_all_url队列
    :param a_type:
    :param rabbit:
    :return:
    """
    url_list = []
    for info in collection.find({'city': {'$nin': ['中国']}}):
Exemplo n.º 26
0
from lib.mongo import Mongo
from lib.standardization import standard_city, standard_block
from pymongo import MongoClient

m = Mongo('192.168.0.235', 27017)
coll_name = m.connect['comm_price']['zhugefang_backup']

n = MongoClient('192.168.0.61', 27017)
save_coll = n['fangjia_tmp']['zhugefang_unitprice_source']


def mongo_chanch():
    for i in coll_name.find({}, no_cursor_timeout=True):
        name = i['comm_name']
        city_name_ = i['city']
        DistrictName_ = i['comm_addr']
        UnitPrice = int(i['price'])
        update_time = i['time']
        category = 'district'
        s_date = int(update_time.strftime('%Y%m'))
        city_name = standard_city(city_name_)
        DistrictName = standard_block(DistrictName_)
        data = {
            'category': category,
            'city': city_name,
            'name': name,
            'region': DistrictName,
            's_date': s_date,
            'zhugefang_esf_price': UnitPrice,
        }
        if not data['region']:
Exemplo n.º 27
0
 def connect_mongo(self):
     m = Mongo(self.m_host,
               self.m_port,
               user_name='goojia',
               password='******')
     return m.connect[self.db][self.coll]
Exemplo n.º 28
0
import pymongo
from lib.mongo import Mongo

m = Mongo('114.80.150.196',
          27777,
          user_name='fangjia',
          password='******')
key_coll = m.connect['wuye']['key_name']


def connect_mongodb(host, port, database, collection):
    client = pymongo.MongoClient(host, port)
    db = client[database]
    coll = db.get_collection(collection)
    return coll


set_ = set([])
comm_coll = connect_mongodb('114.80.150.198', 38888, 'fangjia', 'seaweed')
# key_coll = connect_mongodb('114.80.150.196', 27777, 'wuye', 'key_name')
list_ = comm_coll.find({'city': '上海'})
count = 0
for i in list_:
    name = i['name']
    for i in name:
        print(i)
        data = {'_id': i}
        try:
            key_coll.insert(data)
        except Exception as e:
            print('key重复')
Exemplo n.º 29
0
 def __init__(self, bot):
     self.bot = bot
     self.db = Mongo.init_db(Mongo())
     self.server_db = None
     self.start_time = time()