示例#1
0
    def __init__(self, domain, proxy=None):
        self.domain = domain
        self.domain_name = []
        self.smiliar_domain_name = []
        self.related_domain_name = []
        self.email = []
        self.url = "http://api.whoxy.com/"
        self.engine_name = "Whoxy"
        try:
            self.api_key = config.Whoxy_API_KEY
        except:
            logger.warning("No Whoxy API Key Configured,Exit")
            exit(0)
        self.print_banner()
        self.proxy = proxy

        self.company_names = []
        self.company_emails = []
        self.company_phones = []  #该接口不支持
        '''
        whois查询其实可以有四种反查:
        公司名称
        联系人
        联系邮箱
        联系电话
        但whoxy并不是所有都支持https://www.whoxy.com/reverse-whois/demo.php
        '''

        self.blocked_names = []
        self.blocked_emails = []
        self.bChanged = False
示例#2
0
async def scan_result(url, semaphore, method, params):
    try:
        async with semaphore:
            headers = {
                'User-Agent': random.choice(USER_AGENTS),
                "X-Forwarded-For": random.choice(USER_AGENTS),
                "X-Originating-IP": random.choice(USER_AGENTS),
                "X-Remote-IP": random.choice(USER_AGENTS),
                "X-Remote-Addr": random.choice(USER_AGENTS),
            }
            async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False),
                                             headers=headers) as session:
                proxy = random_proxy()
                async with session.request(method=method, url=url, proxy=proxy, timeout=TimeOut, verify_ssl=False, **params) as response:
                    status_code = response.status
                    res_json = await response.read()
                    msg = {"url": url, "status_code": status_code, "Content-Length": len(res_json)}
                    if status_code == 200:
                        logger.info(msg)
                    else:
                        logger.warning(msg)
                    return msg

    except Exception as e:
        msg = {"url": url, "status_code": 500, "Content-Length":0}
        logger.error(msg)
        await asyncio.sleep(1)
        return msg
示例#3
0
 def run(self):
     try:
         timestemp = time.time()
         url = "{0}?0.{1}&callback=&k={2}&page=1&order=default&sort=desc&action=moreson&_={3}&verify={4}".format(
             self.url, timestemp, self.domain, timestemp, self.verify)
         #response = req.get(url,proxies=self.proxy).content
         # no proxy needed for this class
         response = req.get(url).content
         result = json.loads(response)
         if result.get('status') == '1':
             for item in result.get('data'):
                 if is_domain(item.get('domain')):
                     self.domain_name.append(item.get('domain'))
         elif result.get('status') == 3:
             logger.warning("chaxun.la api block our ip...")
             logger.info("input you verify_code")
             # print('get verify_code():', self.verify)
             # self.verify_code()
             # self.run()
         self.domain_name = list(set(self.domain_name))
     except Exception as e:
         logger.error("Error in {0}: {1}".format(__file__.split('/')[-1], e))
     finally:
         logger.info("{0} found {1} domains".format(self.engine_name, len(self.domain_name)))
         return self.domain_name,self.smiliar_domain_name,self.email
示例#4
0
def request(url=None, header={}, value=None):

    if url is None:
        logger.error("URL is not found...")
        exit(0)
    else:
        logger.info("Target url is {}".format(url))

    if len(header) == 0:
        logger.warning("Header is empty...")
        header = {
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36'
        }

    req = requests.Session()

    try:
        if value is None:
            response = req.get(url, headers=header)
        else:
            response = req.post(url, data=value, headers=header)
    except:
        logger.error("Something error")
        return None

    return response.text.encode('utf-8')
示例#5
0
    def click(self, count: int = 1, delay_ms: int = 0, comment=None):

        if count <= 0 or count > 100:
            logger.warning("click: count out of range ({0})".format(count))
            return

        if delay_ms < 0 or delay_ms > 10000:
            logger.warning(
                "click: delay_ms out of range ({0})".format(delay_ms))
            return

        with PiCamera() as camera:
            camera.exif_tags['IFD0.Copyright'] = self.copyright.format(
                datetime.now().year)
            camera.exif_tags['IFD0.Artist'] = self.artist
            camera.exif_tags[
                'EXIF.UserComment'] = '' if comment is None else comment.strip(
                )

            camera.resolution = (800, 600)
            camera.start_preview()
            now = datetime.now(timezone.utc).astimezone()
            camera.start_recording(
                f'{cfg.paths.photos}/{now:%Y%m%d}_{now:%H%M%S}.h264')
            camera.wait_recording(1)
            for i in range(count):
                camera.capture(
                    f'{cfg.paths.photos}/{now:%Y%m%d}_{now:%H%M%S}.jpg',
                    use_video_port=True)
                camera.wait_recording(delay_ms)
            camera.stop_recording()
示例#6
0
def ph_request(url=None, header={}, value=None):

    if url is None:
        logger.error("URL is not found...")
        exit(0)
    else:
        logger.info("Target url is {}".format(url))

    if len(header) == 0:
        logger.warning("Header is empty...")
        header = {
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36'
        }

    try:
        driver = webdriver.PhantomJS(executable_path=set_phantomjs_path())

    except WebDriverException:
        logger.error("phantomjs path error...")
        exit(0)

    try:
        driver.get(url)
        time.sleep(3)

    finally:
        return driver.page_source
示例#7
0
 def destroy_nodes(self):
     L.error("DESTROYING ALL NODES FOR MANAGER %s"%self.name)
     for node in self.nodes:
         L.warning("KILLING NODE: %s"%node)
         try:
             call("docker-machine kill %s && sleep 10"%node)
             call("docker-machine rm %s"%node)
         except: pass
示例#8
0
def Get_Api(api_type, query):
    if api_type == "fofa":
        data = GetFofaApi(query).run()
    elif api_type == "zoomeye":
        data = GetZoomeye(query).run()
    else:
        logger.warning(f"不支持的api类型{api_type}")
        return []
    return data
示例#9
0
def args_check(cmdparse, usage):
    print(random.choice(Banner))
    confs = {}
    args = []
    if hasattr(cmdparse, "items"):
        cmdlines = cmdparse.items()
    else:
        cmdlines = cmdparse.__dict__.items()
    for key, value in cmdlines:
        confs[key] = value
        args.append(value)
    if confs['version']:
        logger.info(f"Version: {Version}")
        exit(0)
    if confs['updateprogram']:
        update()
        exit(0)
    if ((not confs['query'] or not confs['apitype']) and not confs['file']
            and not confs['url']) or (not confs['dict'] and not confs['func']):
        print(usage)
        exit(0)
    if confs['porxy']:
        ProxyPool.extend(red_api(confs['porxy']))
    if confs['code']:
        try:
            StatusCode.extend([int(x) for x in confs['code'].split(",")])
        except:
            print(usage)
            exit(0)
    if confs['params']:
        try:
            kw = {
                x.split("=")[0]: eval(x.split("=")[1])
                for x in confs['params'].split(",")
            }
            if isinstance(kw, dict):
                params = kw['params'] if 'params' in kw and isinstance(
                    kw['params'], dict) else None
                json = kw['json'] if 'json' in kw and isinstance(
                    kw['json'], dict) else None
                data = kw['data'] if 'data' in kw and isinstance(
                    kw['data'], dict) else None
                args[8] = {'params': params, 'json': json, 'data': data}
        except:
            print(usage)
            exit(0)
    if confs['output'] not in ['json', 'txt', "csv", "xlsx", "xls"]:
        logger.warning(f"暂不支持{confs['output']}文件格式,改为默认文件格式txt输出")
        args[5] = "txt"

    return args
示例#10
0
 def do_search(self):
     try:
         url = "http://{0}/search?num={1}&start={2}&hl=en&meta=&q={3}".format(self.server,self.quantity,self.counter,self.word)
         r = requests.get(url, headers=self.headers, proxies=self.proxies)
         if "and not a robot" in r.content:
             logger.warning("Google has blocked your visit")
             return False
         else:
             self.results = r.content
             self.totalresults += self.results
             return True
     except Exception, e:
         logger.error("Error in {0}: {1}".format(__file__.split('/')[-1],e))
         return False
示例#11
0
 def do_search_files(self):
     try:
         query = "filetype:"+self.files+"%20site:"+self.word
         url = "https://{0}/customsearch/v1?key={1}&highRange={2}&lowRange={3}&cx={4}&start={5}&q={6}".format(self.server,self.api_key,self.highRange,self.lowRange,self.cse_id,self.counter,query)
         r = req.get(url,proxies=self.proxies)
         if "and not a robot" in r.content:
             logger.warning("google has blocked your visit")
             return -1
         else:
             self.results = r.content
             self.totalresults += self.results
             return 1
     except Exception, e:
         logger.error("Error in {0}: {1}".format(__file__.split('/')[-1],e))
         return -1
示例#12
0
def red_api(file_path):
    api_list = []
    file_type = file_path.split('.')[-1]
    if file_type in ["xlsx", "xls"]:
        wb = xlrd.open_workbook(file_path)
        for sh in wb.sheets():
            for r in range(sh.nrows):
                domin = sh.row(r)
                api_list.append(add_http(domin))
    elif file_type in ["txt", "csv"]:
        with open(file_path) as f:
            for line in f:
                api_list.append(add_http(line.strip()))
    else:
        logger.warning("不支持文件类型")
    return list(set(api_list))
示例#13
0
 def do_search(self):
     try:
         url = "http://{0}/search/web/results/?q={1}&elements_per_page=50&start_index={2}".format(
             self.server, self.word, self.counter)  # 这里的pn参数是条目数
         r = req.get(url, headers=self.headers, proxies=self.proxies)
         if "We are sorry, but your request has been blocked" in r.content:
             logger.warning("Exalead blocked our request")
             return False
         else:
             self.results = r.content
             self.totalresults += self.results
             return True
     except Exception, e:
         logger.error("Error in {0}: {1}".format(
             __file__.split('/')[-1], e))
         return False
示例#14
0
class search_yandex:
    def __init__(self, word, limit, useragent, proxy=None):
        self.engine_name = "Yandex"
        self.word = word
        self.results = ""
        self.totalresults = ""
        self.server = "yandex.com"
        self.hostname = "yandex.com"
        self.headers = {'User-Agent': useragent}
        self.limit = int(limit)
        self.counter = 0
        self.proxies = proxy
        self.print_banner()
        return

    def print_banner(self):
        logger.info("Searching now in {0}..".format(self.engine_name))
        return

    def do_search(self):
        try:
            url = "http://{0}/search?text={1}&numdoc=50&lr={2}".format(
                self.server, self.word,
                self.counter)  #  %40=@ 搜索内容如:@meizu.com;在关键词前加@有何效果呢?,测试未发现不同
        except Exception, e:
            logger.error(e)
        try:
            r = requests.get(url, headers=self.headers, proxies=self.proxies)
            if "automated requests" in r.content:
                logger.warning("yandex blocked our request.exit")
                exit(0)
            self.results = r.content
            self.totalresults += self.results
        except Exception, e:
            logger.error(e)
示例#15
0
 def __init__(self, domain, proxy=None):
     self.domain = domain
     self.domain_name = []
     self.smiliar_domain_name = []
     self.related_domain_name = []
     self.email = []
     self.url = "https://censys.io/api/v1"
     self.engine_name = "Censys"
     try:
         self.api_id = config.Censys_API_UID
         self.api_secret = config.Censys_API_SECRET
     except:
         logger.warning("No Censys API Config,Exit")
         exit(0)
     self.print_banner()
     self.proxy = proxy
示例#16
0
 def __fetch_stars_all(self, username, page_limit=0):
     _page = 1  # header['link']: page=(\d+).*$
     while True:
         if page_limit > 0 and _page > page_limit:
             logger.warning(
                 f"aborted dumping {username} due to --max page limit")
             break
         logger.debug(f"fetching stars: page {_page}")
         _stars = self.__fetch_stars_by_page(username, page=_page)
         if not _stars:
             break
         try:
             self.__save_to_db(_stars)
             _page += 1
         except RuntimeError as e:
             logger.debug(str(e))
             break
示例#17
0
 def do_search(self):
     try:
         url = "http://{0}/search?text={1}&numdoc=50&lr=10590&pn={2}".format(
             self.server, self.word,
             self.counter)  #  %40=@ 搜索内容如:@meizu.com;在关键词前加@有何效果呢?,测试未发现不同
         r = requests.get(url, headers=self.headers, proxies=self.proxies)
         if "automated requests" in r.content:
             logger.warning("Yandex blocked our request")
             return False
         else:
             self.results = r.content
             self.totalresults += self.results
             return True
     except Exception, e:
         logger.error("Error in {0}: {1}".format(
             __file__.split('/')[-1], e))
         return False
 def __init__(self, word, limit, proxy=None):
     self.engine_name = "Fofa"
     try:
         self.email = config.FOFA_USER_EMAIL
         self.key = config.FOFA_API_KEY
     except:
         logger.warning("No Fofa Config,Exit")
         exit(0)
     self.word = word
     self.results = ""
     self.totalresults = ""
     self.server = "fofa.so"
     self.limit = int(limit)
     self.counter = 0  #useless
     self.proxies = proxy
     self.print_banner()
     return
示例#19
0
 def run(self):
     if not zoomeyeApi:
         logger.warning("请修改配置文件中zoomeyeApi为您的API-KEY")
         exit(0)
     logger.info("zoomeye数据请求中")
     url = f"https://api.zoomeye.org/host/search?query={self.ip}"
     url_list = []
     try:
         req = requests.Session()
         req.headers = self.headers
         req.mount("https://", HTTPAdapter(max_retries=2))
         target = req.get(url, timeout=10)
         datas = json.loads(target.text)
         if datas.get("matches"):
             url_list.extend(self.get_data(datas.get("matches")))
     except Exception as e:
         logger.error(f"请求失败:{e}")
     return url_list
示例#20
0
    def _update_imdb_movies(self, getdata_func):
        """ 更新imdb電影資訊

        利用imdbpy去取得最新的電影資訊,然後更新資料庫

        :param getdata_func: 取得要更新的imdb電影
        :return:
        """
        i = 0
        since = 0
        while True:
            movies = getdata_func(limit=self.DbOperator.LIMIT, since=since)
            if movies:
                for movie in movies:
                    imdbid = movie[0]
                    try:
                        # 檢查是否正確的imdbid格式
                        if not re.match('tt\d{7}', imdbid):
                            raise Exception('not a valid imdbid')
                        if self.DbOperator.is_error_imdbid_movie(imdbid):
                            logger.info('error imdbid: %s' % imdbid)
                            continue
                        imdbmovie = self.IMDbObj.get_movie(imdbid)
                        imdbmovie.save2db(self.DbOperator.HOST,
                                          self.DbOperator.DB)
                        i += 1
                        logger.info(
                            (i, imdbid, imdbmovie['url'], imdbmovie['rating'],
                             imdbmovie['posterurl']).__str__())
                    except Exception as e:
                        time.sleep(30)
                        # 如果imdb網路正常,卻取不到資訊,代表可能是錯誤的imdbid,所以要清除imdbid
                        if self.IMDbObj.is_network_ok():
                            self.DbOperator.clear_imdbid(imdbid)
                            logger.info('clear imdbid: %s' % imdbid)
                        else:
                            logger.warning('update imdb fail: %s' % (str(e)))
                            return

                since += self.DbOperator.LIMIT
                logger.info('exported count: %d' % i)
            else:
                break
示例#21
0
 def __init__(self, word, limit, proxy=None):
     self.engine_name = "BingAPI"
     self.word = word.replace(' ', '%20')
     self.results = ""
     self.totalresults = ""
     self.server = "api.cognitive.microsoft.com"
     self.headers = {
         "Ocp-Apim-Subscription-Key": config.Bing_API_Key,
     }
     self.limit = int(limit)
     try:
         self.bingApikey = config.Bing_API_Key
     except:
         logger.warning("No Bing API Key,Exit")
         exit(0)
     self.counter = 0
     self.proxies = proxy
     self.print_banner()
     return
示例#22
0
    def _update_imdb_movies(self, getdata_func):
        """ 更新imdb電影資訊

        利用imdbpy去取得最新的電影資訊,然後更新資料庫

        :param getdata_func: 取得要更新的imdb電影
        :return:
        """
        i = 0
        since = 0
        while True:
            movies = getdata_func(limit=self.DbOperator.LIMIT, since=since)
            if movies:
                for movie in movies:
                    imdbid = movie[0]
                    try:
                        # 檢查是否正確的imdbid格式
                        if not re.match('tt\d{7}', imdbid):
                            raise Exception('not a valid imdbid')
                        if self.DbOperator.is_error_imdbid_movie(imdbid):
                            logger.info('error imdbid: %s' % imdbid)
                            continue
                        imdbmovie = self.IMDbObj.get_movie(imdbid)
                        imdbmovie.save2db(self.DbOperator.HOST, self.DbOperator.DB)
                        i += 1
                        logger.info(
                            (i, imdbid, imdbmovie['url'], imdbmovie['rating'], imdbmovie['posterurl']).__str__()
                        )
                    except Exception as e:
                        time.sleep(30)
                        # 如果imdb網路正常,卻取不到資訊,代表可能是錯誤的imdbid,所以要清除imdbid
                        if self.IMDbObj.is_network_ok():
                            self.DbOperator.clear_imdbid(imdbid)
                            logger.info('clear imdbid: %s' % imdbid)
                        else:
                            logger.warning('update imdb fail: %s' % (str(e)))
                            return

                since += self.DbOperator.LIMIT
                logger.info('exported count: %d' % i)
            else:
                break
示例#23
0
    def _download(self, filename):
        """ 下載檔案,並且最多可以嘗試MAXTRY次

        :param filename:
        :return:
        """
        max_try = self.MaxTry
        while True:
            try:
                logger.info('download %s' % self.get_ftp_file_uri(filename))
                sys.stdout.flush()
                self.down_ftp_file(filename)
                break
            except Exception as e:
                max_try -= 1
                if max_try >= 0:
                    logger.warning('retry: %s, msg: %s' % (filename, str(e)))
                else:
                    logger.error('download %s fail!' % filename)
                    raise
示例#24
0
def test_model(est, parameters, W, X_train: np.array, y_train: np.array,
               X_test: np.array, y_test: np.array, **kwargs):
    result = np.nan
    if X_train.shape[0] <= W:
        logger.warning("Too few training datapoints for window {}".format(W))
        return result
    features = np.concatenate((X_train[:-W], X_test))
    target = np.concatenate((y_train[:-W], y_test))

    predictions = []
    labels = []

    # Go in reverse
    window = 1
    for i in range(features.shape[0], 0, -1):
        if i < (W + 1):
            break
        train_start = i - W - 1
        train_end = i - 1
        test_start = i - 1
        test_end = i
        # print('[Window {}]\tTrain: B={} E={}\tTest: B={} E={}'.format(window, train_start, train_end, test_start, test_end))
        _X_train = features[train_start:train_end]
        _y_train = target[train_start:train_end]
        _X_test = features[test_start:test_end]
        _y_test = target[test_start:test_end]

        _est = est.set_params(**parameters)
        _est = _est.fit(_X_train, _y_train)
        pred = _est.predict(_X_test)

        predictions.append(pred[0])
        labels.append(_y_test[0])
        window += 1
        # print('\t Expect: {} Predict: {}'.format(_y_test[0], pred[0]))

    labels_arr = np.flip(np.array(labels), axis=0)
    predictions_arr = np.flip(np.array(predictions), axis=0)
    print('======== Final score! =======')
    classification_report(labels_arr, predictions_arr)
    return (labels_arr, predictions_arr)
示例#25
0
def output_shell(line, raise_on_non_zero: bool = False):

    try:
        logger.debug(line)
        print(f'SHELL:{line}', flush=True)
        shell_command = Popen(line, stdout=PIPE, stderr=PIPE, shell=True)
    except OSError:
        return None
    except ValueError:
        return None

    (output, err) = shell_command.communicate()
    shell_command.wait()

    if shell_command.returncode != 0 and raise_on_non_zero:
        print(
            f"Shell command failed to execute:{line}\n{err}\n{output if not None else ''}"
        )
        logger.warning(f"Command failed: {line}")
        return output, False

    return str(output.decode("utf-8")), True
示例#26
0
 def run(self):
     try:
         timestemp = time.time()
         url = "{0}?0.{1}&callback=&k={2}&page=1&order=default&sort=desc&action=moreson&_={3}&verify={4}".format(
             self.url, timestemp, self.domain, timestemp, self.verify)
         result = json.loads(req.get(url).content)
         if result.get('status') == '1':
             for item in result.get('data'):
                 if is_domain(item.get('domain')):
                     self.subset.append(item.get('domain'))
         elif result.get('status') == 3:
             logger.warning("chaxun.la api block our ip...")
             logger.info("input you verify_code")
             # print('get verify_code():', self.verify)
             # self.verify_code()
             # self.run()
         self.subset = list(set(self.subset))
     except Exception as e:
         logger.info(str(e))
     finally:
         logger.info("{0} found {1} domains".format(self.engine_name, len(self.subset)))
         return self.subset
示例#27
0
文件: imdbpy.py 项目: codeguycool/CEC
    def get_posterurl_by_width(self, imdbpymovie, width):
        if self._tmdbresult is None:
            # fixme: 如果要全部都跑的話,記得修改條件
            # 符合2個條件就執行
            matchcount = 0
            matchcount = matchcount + 1 if self['directors'] != [] else matchcount
            matchcount = matchcount + 1 if self['stars'] != [] else matchcount
            matchcount = matchcount + 1 if self['stars'] != [] else matchcount
            if matchcount > 1:
                url = 'http://api.themoviedb.org/3/find/%s?external_source=imdb_id&api_key=%s' % (self._imdbid, self._tmdbapikey)
                response = requests.get(url, timeout=30)

                # set correct encoding
                fix_response_encoding(response)

                if response.status_code == 200:
                    self._tmdbresult = json.loads(response.text)
                else:
                    time.sleep(5)
                    logger.warning('url: %s, status: %d' % (url, response.status_code))

        if self._tmdbresult is not None:
            if len(self._tmdbresult['movie_results']) == 1 and self._tmdbresult['movie_results'][0]['poster_path']:
                return 'http://image.tmdb.org/t/p/w%d%s' % (width, self._tmdbresult['movie_results'][0]['poster_path'])
示例#28
0
class search_google():
    def __init__(self, word, limit, useragent, proxy):
        self.engine_name = "Google"
        self.word = word
        self.results = ""
        self.totalresults = ""
        self.files = "pdf"
        self.server = "www.google.com"
        self.headers = {'User-agent': useragent}
        self.quantity = "100"
        self.limit = int(limit)
        self.counter = 0
        self.proxies = proxy
        self.print_banner()
        return

    def print_banner(self):
        logger.info("Searching now in {0}..".format(self.engine_name))
        return

    def do_search(self):
        try:
            url = "http://{0}/search?num={1}&start={2}&hl=en&meta=&q={3}".format(
                self.server, self.quantity, self.counter, self.word)
        except Exception, e:
            logger.error("Error in {0}: {1}".format(
                __file__.split('/')[-1], e))
        try:
            r = requests.get(url, headers=self.headers, proxies=self.proxies)
            if "and not a robot" in r.content:
                logger.warning("Google has blocked your visit")
                return 0
            else:
                self.results = r.content
                self.totalresults += self.results
                return 1
        except Exception, e:
            logger.error("Error in {0}: {1}".format(
                __file__.split('/')[-1], e))
            return 0
示例#29
0
class search_exalead:

    def __init__(self, word, limit,useragent,proxy=None):
        self.engine_name = "Exalead"
        self.word = word
        self.files = "pdf"
        self.results = ""
        self.totalresults = ""
        self.server = "www.exalead.com"
        self.userAgent = useragent
        self.referer = "http://{0}/search/web/results/?q={1}".format(self.server,self.word)
        self.limit = int(limit)
        self.counter = 0
        self.proxies = proxy
        self.print_banner()
        return

    def print_banner(self):
        logger.info("Searching now in {0}..".format(self.engine_name))
        return

    def do_search(self):
        try:
            url = "http://{0}/search/web/results/?q={1}&elements_per_page=50&start_index={2}".format(self.server,self.word,self.counter)# 这里的pn参数是条目数
        except Exception, e:
            logger.error("Error in {0}: {1}".format(__file__.split('/')[-1],e))
        try:
            r = http_request_get(url, custom_referer=self.referer, proxies = self.proxies)
            if "We are sorry, but your request has been blocked" in r.content:
                logger.warning("Exalead blocked our request")
                return -1
            else:
                self.results = r.content
                self.totalresults += self.results
                return 0
        except Exception,e:
            logger.error("Error in {0}: {1}".format(__file__.split('/')[-1],e))
示例#30
0
def main():
    index = load_dataset('all_merged', return_index=True)
    resultFile = './data/datasets/all_merged/estimators/randomforest_sfm_hyperparameters.json'
    hyperparameters = {}
    if not os.path.exists(resultFile):
        logger.error('no hyperparameters!')
    with open(resultFile, 'r') as f:
        hyperparameters = json.load(f)
    for _sym, data in index.items():
        if _sym not in hyperparameters or not os.path.exists(
                hyperparameters[_sym]['estimator']):
            logger.error('{} does not exist.'.format(_sym))
        else:
            features = pd.read_csv(data['csv'],
                                   sep=',',
                                   encoding='utf-8',
                                   index_col='Date',
                                   parse_dates=True)
            # Replace nan with infinity so that it can later be imputed to a finite value
            features = features.replace([np.inf, -np.inf], np.nan)

            # Derive target classes from closing price
            target_pct = target_price_variation(features['close'])
            target = target_binned_price_variation(target_pct, n_bins=2)
            # target = target_discrete_price_variation(target_pct)

            # Use selected features
            preselected = hyperparameters[_sym]['features']
            #features = features[preselected]

            imp = IterativeImputer()
            features = pd.DataFrame(imp.fit_transform(features.values),
                                    index=features.index,
                                    columns=features.columns)
            sel = SelectKBest(score_func=f_classif,
                              k=min(30, len(features.columns)))
            sel.fit(features.values, target.values)
            bestfeatures = [
                c for c, f in zip(features.columns, sel.get_support()) if f
            ]
            print("Using features:\n{}".format(bestfeatures))
            features = features[bestfeatures]

            # Split data in train and blind test set with 70:30 ratio,
            #  most ML models don't take sequentiality into account, but our pipeline
            #  uses a SimpleImputer with mean strategy, so it's best not to shuffle the data.
            X_train, X_test, y_train, y_test = train_test_split(
                features.values, target.values, shuffle=False, test_size=0.3)
            # Summarize distribution
            print("Training set: # Features {}, # Samples {}".format(
                X_train.shape[1], X_train.shape[0]))
            plot_class_distribution("Training set", _sym, y_train)
            print("Test set: # Features {}, # Samples {}".format(
                X_test.shape[1], X_test.shape[0]))
            plot_class_distribution("Test set", _sym, y_test)
            if not np.isfinite(X_train).all():
                logger.warning("Training x is not finite!")
            if not np.isfinite(y_train).all():
                logger.warning("Training y is not finite!")
            if not np.isfinite(X_test).all():
                logger.warning("Test x is not finite!")
            if not np.isfinite(y_test).all():
                logger.warning("Test y is not finite!")

            # Build pipeline to be used as estimator in grid search
            #  so that each subset of the data is transformed independently
            #  to avoid contamination between folds.
            pipeline = Pipeline([
                (
                    'i', SimpleImputer()
                ),  # Replace nan's with the median value between previous and next observation
                ('s', RobustScaler()),
                ('c',
                 AdaBoostClassifier(base_estimator=DecisionTreeClassifier())),
            ])

            # Perform hyperparameter tuning of the ensemble with 5-fold cross validation
            logger.info("Start Grid search")
            CV_rfc = GridSearchCV(estimator=pipeline,
                                  param_grid=DECISIONTREE_PARAM_GRID,
                                  cv=5,
                                  n_jobs=4,
                                  scoring='neg_mean_squared_error',
                                  verbose=1)
            CV_rfc.fit(X_train, y_train)
            logger.info("End Grid search")

            # Take the fitted ensemble with tuned hyperparameters
            clf = CV_rfc.best_estimator_
            # Test ensemble's performance on training and test sets
            logger.info("Classification report on train set")
            predictions1 = clf.predict(X_train)
            train_report = classification_report(y_train,
                                                 predictions1,
                                                 output_dict=True)
            print(classification_report(y_train, predictions1))
            logger.info("Classification report on test set")
            predictions2 = clf.predict(X_test)
            test_report = classification_report(y_test,
                                                predictions2,
                                                output_dict=True)
            print(classification_report(y_test, predictions2))
            stats = {
                'score': accuracy_score(y_train, predictions1),
                'mse': mean_squared_error(y_train, predictions1),
                'test_score': accuracy_score(y_test, predictions2),
                'test_mse': mean_squared_error(y_test, predictions2),
                'train_report': train_report,
                'test_report': test_report,
            }
            print(stats)
            print("--- end ---")
示例#31
0
                __file__.split('/')[-1], e))
            return False

    def do_search_files(self):
        try:
            query = "filetype:" + self.files + "%20site:" + self.word
            url = "https://{0}/customsearch/v1?key={1}&highRange={2}&lowRange={3}&cx={4}&start={5}&q={6}".format(
                self.server, self.api_key, self.highRange, self.lowRange,
                self.cse_id, self.counter, query)
        except Exception, e:
            logger.error("Error in {0}: {1}".format(
                __file__.split('/')[-1], e))
        try:
            r = requests.get(url, headers=self.headers, proxies=self.proxies)
            if "and not a robot" in r.content:
                logger.warning("google has blocked your visit")
                return -1
            else:
                self.results = r.content
                self.totalresults += self.results
                return 1
        except Exception, e:
            logger.error("Error in {0}: {1}".format(
                __file__.split('/')[-1], e))
            return -1

    def get_emails(self):
        rawres = parser(self.totalresults, self.word)
        return rawres.emails()

    def get_hostnames(self):
def main():
    index = load_dataset('all_merged', return_index=True)
    resultFile = './data/datasets/all_merged/estimators/svc_hyperparameters.json'
    estFile = './data/datasets/all_merged/estimators/svc_{}.p'
    hyperparameters = {}
    for _sym, data in index.items():
        features = pd.read_csv(data['csv'],
                               sep=',',
                               encoding='utf-8',
                               index_col='Date',
                               parse_dates=True)
        # Replace nan with infinity so that it can later be imputed to a finite value
        features = features.replace([np.inf, -np.inf], np.nan)
        # Derive target classes from closing price
        target_pct = target_price_variation(features['close'])
        target = target_binned_price_variation(target_pct, n_bins=2)
        # target = target_discrete_price_variation(target_pct)

        # Split data in train and blind test set with 70:30 ratio,
        #  most ML models don't take sequentiality into account, but our pipeline
        #  uses a SimpleImputer with mean strategy, so it's best not to shuffle the data.
        X_train, X_test, y_train, y_test = train_test_split(features.values,
                                                            target.values,
                                                            shuffle=False,
                                                            test_size=0.3)
        # Summarize distribution
        print("Training set: # Features {}, # Samples {}".format(
            X_train.shape[1], X_train.shape[0]))
        plot_class_distribution("Training set", _sym, y_train)
        print("Test set: # Features {}, # Samples {}".format(
            X_test.shape[1], X_test.shape[0]))
        plot_class_distribution("Test set", _sym, y_test)
        if not np.isfinite(X_train).all():
            logger.warning("Training x is not finite!")
        if not np.isfinite(y_train).all():
            logger.warning("Training y is not finite!")
        if not np.isfinite(X_test).all():
            logger.warning("Test x is not finite!")
        if not np.isfinite(y_test).all():
            logger.warning("Test y is not finite!")
        # Build pipeline to be used as estimator in bagging classifier
        #  so that each subset of the data is transformed independently
        #  to avoid contamination between folds.
        pipeline = Pipeline([
            (
                'i', SimpleImputer()
            ),  # Replace nan's with the median value between previous and next observation
            (
                's', RobustScaler()
            ),  # Scale data in order to center it and increase robustness against noise and outliers
            # ('k', SelectKBest()), # Select top 10 best features
            # ('u', RandomUnderSampler()),
            ('c', SVC()),
        ])

        # Perform hyperparameter tuning of the ensemble with 5-fold cross validation
        logger.info("Start Grid search")
        CV_rfc = GridSearchCV(estimator=pipeline,
                              param_grid=SVC_PARAM_GRID,
                              cv=5,
                              n_jobs=4,
                              scoring='neg_mean_squared_error',
                              verbose=1)
        CV_rfc.fit(X_train, y_train)
        logger.info("End Grid search")

        # Take the fitted ensemble with tuned hyperparameters
        clf = CV_rfc.best_estimator_

        # Test ensemble's performance on training and test sets
        logger.info("Classification report on train set")
        predictions1 = clf.predict(X_train)
        print(classification_report(y_train, predictions1))
        logger.info("Classification report on test set")
        predictions2 = clf.predict(X_test)
        print(classification_report(y_test, predictions2))
        stats = {
            'score': accuracy_score(y_train, predictions1),
            'mse': mean_squared_error(y_train, predictions1),
            'test_score': accuracy_score(y_test, predictions2),
            'test_mse': mean_squared_error(y_test, predictions2),
            'cv_best_mse': -1 * CV_rfc.best_score_,  # CV score is negated MSE
            # 'cv_results': CV_rfc.cv_results_,
            'cv_bestparams': CV_rfc.best_params_,
        }
        print(stats)
        with open(estFile.format(_sym), 'wb') as f:
            pickle.dump(clf, f)
        hyperparameters[_sym] = {
            'estimator': estFile.format(_sym),
            'stats': stats
        }
        # feature_importances = np.mean([
        #     p.named_steps.c.feature_importances_ for p in clf.estimators_
        # ], axis=0)

        # importances = {X.columns[i]: v for i, v in enumerate(feature_importances)}
        # labeled = {str(k): v for k, v in sorted(importances.items(), key=lambda item: -item[1])}

        # print({
        #     # 'features':sel_features
        #     'feature_importances': labeled,
        #     # 'rank': {l: i + 1 for i, l in enumerate(labeled.keys())},
        # })
        with open(resultFile, 'w') as f:  # Save results at every update
            json.dump(hyperparameters, f, indent=4)
        print("--- end ---")
示例#33
0
        d.remove_files(am.mount_path, pattern = "*.WAV", sudo = True)
        moth_disk_check = d.check_disk(report = True, display = True, path = am.mount_path)

        # Configure the AudioMoth for the next recording session
        am.usbModeOn()
        am.setTime()

        # Unmount to allow recording to commence
        am.unmountMoth()
        success = True
    except:
        print(f'Startup attempt {attempt} of {max_attempt} failed')
        attempt = attempt + 1

if not success:
    logger.warning('AudioMoth startup failed')
    print('Please check AudioMoth')
    d.sendmail(cfg.name, f"{cfg.name} Error: AudioMoth Failure", cfg.emailto)
    sleep(5)

    exit()

# Main Loop
while True:
    if movement(None) > 0:
        e = on_motion()
        d.sendmail(cfg.name, f"{cfg.name} Motion Event (id:{e.id})", cfg.emailto)

        # Detect when motion stops
        while not e.has_ended(): 
            e.enqueue(movement(e))
示例#34
0
    def run_content(self):

        if len(self.columns_name) == 0:
            SqliColumns.get_columns(self)

        # 循环解包,进入注入
        for database_name in self.columns_name:
            for table_name in self.columns_name[database_name]:

                # 获取数据的条数,如果小于设置的self.content_count,那需要设置条数等于self.content_count
                content_counts = self.get_content_count(database_name, table_name)
                if content_counts == 0:
                    logger.warning('Database %s Table %s is empty...' % (database_name, table_name))
                    continue
                elif content_counts != self.content_count:
                    logger.debug('Database %s Table %s content amount change to %d' % (database_name, table_name, content_counts))
                    self.content_count = content_counts
                else:
                    pass

                # 声明一个表储存数据
                content = PrettyTable(list(self.columns_name[database_name][table_name]))
                content.padding_width = 1
                content.align = "r"

                # 每个表都要注入指定条数那么多次
                for limits in xrange(self.content_count):

                    # 声明一个队列,储存返回的值
                    result = Queue.Queue()

                    # 声明线程队列、结果队列和最终插入table的数据队列
                    threads = []
                    results = []
                    contents = []

                    # 开始多线程的注入
                    logger.debug("Start multithreading Sqli...")
                    for column_name in self.columns_name[database_name][table_name]:
                        # 开始一个线程注入一个字段
                        try:
                            t = threading.Thread(target=self.get_content, name='thread for %s' % column_name,
                                                 args=(result, database_name, table_name, column_name, limits))
                            t.start()
                        except:
                            logger.error('Thread error...')

                        threads.append(t)

                    # 等待所有线程结束
                    for t in threads:
                        t.join()

                    # 注入处理返回数据,插入content中的一条
                    while not result.empty():
                        results.append(result.get())

                    # 处理返回的数据
                    for i in list(self.columns_name[database_name][table_name]):
                        for item in results:
                            if item[0] == i:
                                contents.append(item[1])
                            else:
                                continue

                    # 插入数据
                    content_str = ','.join(contents)
                    logger.info("Sqli success content is %s" % content_str)
                    content.add_row(contents)

                # 输出表
                logger.debug("Database %s Table %s sqli success..." % (database_name, table_name))
                print "[*] Database %s Table %s content:" % (database_name, table_name)
                print content