def __init__(self, domain, proxy=None): self.domain = domain self.domain_name = [] self.smiliar_domain_name = [] self.related_domain_name = [] self.email = [] self.url = "http://api.whoxy.com/" self.engine_name = "Whoxy" try: self.api_key = config.Whoxy_API_KEY except: logger.warning("No Whoxy API Key Configured,Exit") exit(0) self.print_banner() self.proxy = proxy self.company_names = [] self.company_emails = [] self.company_phones = [] #该接口不支持 ''' whois查询其实可以有四种反查: 公司名称 联系人 联系邮箱 联系电话 但whoxy并不是所有都支持https://www.whoxy.com/reverse-whois/demo.php ''' self.blocked_names = [] self.blocked_emails = [] self.bChanged = False
async def scan_result(url, semaphore, method, params): try: async with semaphore: headers = { 'User-Agent': random.choice(USER_AGENTS), "X-Forwarded-For": random.choice(USER_AGENTS), "X-Originating-IP": random.choice(USER_AGENTS), "X-Remote-IP": random.choice(USER_AGENTS), "X-Remote-Addr": random.choice(USER_AGENTS), } async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False), headers=headers) as session: proxy = random_proxy() async with session.request(method=method, url=url, proxy=proxy, timeout=TimeOut, verify_ssl=False, **params) as response: status_code = response.status res_json = await response.read() msg = {"url": url, "status_code": status_code, "Content-Length": len(res_json)} if status_code == 200: logger.info(msg) else: logger.warning(msg) return msg except Exception as e: msg = {"url": url, "status_code": 500, "Content-Length":0} logger.error(msg) await asyncio.sleep(1) return msg
def run(self): try: timestemp = time.time() url = "{0}?0.{1}&callback=&k={2}&page=1&order=default&sort=desc&action=moreson&_={3}&verify={4}".format( self.url, timestemp, self.domain, timestemp, self.verify) #response = req.get(url,proxies=self.proxy).content # no proxy needed for this class response = req.get(url).content result = json.loads(response) if result.get('status') == '1': for item in result.get('data'): if is_domain(item.get('domain')): self.domain_name.append(item.get('domain')) elif result.get('status') == 3: logger.warning("chaxun.la api block our ip...") logger.info("input you verify_code") # print('get verify_code():', self.verify) # self.verify_code() # self.run() self.domain_name = list(set(self.domain_name)) except Exception as e: logger.error("Error in {0}: {1}".format(__file__.split('/')[-1], e)) finally: logger.info("{0} found {1} domains".format(self.engine_name, len(self.domain_name))) return self.domain_name,self.smiliar_domain_name,self.email
def request(url=None, header={}, value=None): if url is None: logger.error("URL is not found...") exit(0) else: logger.info("Target url is {}".format(url)) if len(header) == 0: logger.warning("Header is empty...") header = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36' } req = requests.Session() try: if value is None: response = req.get(url, headers=header) else: response = req.post(url, data=value, headers=header) except: logger.error("Something error") return None return response.text.encode('utf-8')
def click(self, count: int = 1, delay_ms: int = 0, comment=None): if count <= 0 or count > 100: logger.warning("click: count out of range ({0})".format(count)) return if delay_ms < 0 or delay_ms > 10000: logger.warning( "click: delay_ms out of range ({0})".format(delay_ms)) return with PiCamera() as camera: camera.exif_tags['IFD0.Copyright'] = self.copyright.format( datetime.now().year) camera.exif_tags['IFD0.Artist'] = self.artist camera.exif_tags[ 'EXIF.UserComment'] = '' if comment is None else comment.strip( ) camera.resolution = (800, 600) camera.start_preview() now = datetime.now(timezone.utc).astimezone() camera.start_recording( f'{cfg.paths.photos}/{now:%Y%m%d}_{now:%H%M%S}.h264') camera.wait_recording(1) for i in range(count): camera.capture( f'{cfg.paths.photos}/{now:%Y%m%d}_{now:%H%M%S}.jpg', use_video_port=True) camera.wait_recording(delay_ms) camera.stop_recording()
def ph_request(url=None, header={}, value=None): if url is None: logger.error("URL is not found...") exit(0) else: logger.info("Target url is {}".format(url)) if len(header) == 0: logger.warning("Header is empty...") header = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36' } try: driver = webdriver.PhantomJS(executable_path=set_phantomjs_path()) except WebDriverException: logger.error("phantomjs path error...") exit(0) try: driver.get(url) time.sleep(3) finally: return driver.page_source
def destroy_nodes(self): L.error("DESTROYING ALL NODES FOR MANAGER %s"%self.name) for node in self.nodes: L.warning("KILLING NODE: %s"%node) try: call("docker-machine kill %s && sleep 10"%node) call("docker-machine rm %s"%node) except: pass
def Get_Api(api_type, query): if api_type == "fofa": data = GetFofaApi(query).run() elif api_type == "zoomeye": data = GetZoomeye(query).run() else: logger.warning(f"不支持的api类型{api_type}") return [] return data
def args_check(cmdparse, usage): print(random.choice(Banner)) confs = {} args = [] if hasattr(cmdparse, "items"): cmdlines = cmdparse.items() else: cmdlines = cmdparse.__dict__.items() for key, value in cmdlines: confs[key] = value args.append(value) if confs['version']: logger.info(f"Version: {Version}") exit(0) if confs['updateprogram']: update() exit(0) if ((not confs['query'] or not confs['apitype']) and not confs['file'] and not confs['url']) or (not confs['dict'] and not confs['func']): print(usage) exit(0) if confs['porxy']: ProxyPool.extend(red_api(confs['porxy'])) if confs['code']: try: StatusCode.extend([int(x) for x in confs['code'].split(",")]) except: print(usage) exit(0) if confs['params']: try: kw = { x.split("=")[0]: eval(x.split("=")[1]) for x in confs['params'].split(",") } if isinstance(kw, dict): params = kw['params'] if 'params' in kw and isinstance( kw['params'], dict) else None json = kw['json'] if 'json' in kw and isinstance( kw['json'], dict) else None data = kw['data'] if 'data' in kw and isinstance( kw['data'], dict) else None args[8] = {'params': params, 'json': json, 'data': data} except: print(usage) exit(0) if confs['output'] not in ['json', 'txt', "csv", "xlsx", "xls"]: logger.warning(f"暂不支持{confs['output']}文件格式,改为默认文件格式txt输出") args[5] = "txt" return args
def do_search(self): try: url = "http://{0}/search?num={1}&start={2}&hl=en&meta=&q={3}".format(self.server,self.quantity,self.counter,self.word) r = requests.get(url, headers=self.headers, proxies=self.proxies) if "and not a robot" in r.content: logger.warning("Google has blocked your visit") return False else: self.results = r.content self.totalresults += self.results return True except Exception, e: logger.error("Error in {0}: {1}".format(__file__.split('/')[-1],e)) return False
def do_search_files(self): try: query = "filetype:"+self.files+"%20site:"+self.word url = "https://{0}/customsearch/v1?key={1}&highRange={2}&lowRange={3}&cx={4}&start={5}&q={6}".format(self.server,self.api_key,self.highRange,self.lowRange,self.cse_id,self.counter,query) r = req.get(url,proxies=self.proxies) if "and not a robot" in r.content: logger.warning("google has blocked your visit") return -1 else: self.results = r.content self.totalresults += self.results return 1 except Exception, e: logger.error("Error in {0}: {1}".format(__file__.split('/')[-1],e)) return -1
def red_api(file_path): api_list = [] file_type = file_path.split('.')[-1] if file_type in ["xlsx", "xls"]: wb = xlrd.open_workbook(file_path) for sh in wb.sheets(): for r in range(sh.nrows): domin = sh.row(r) api_list.append(add_http(domin)) elif file_type in ["txt", "csv"]: with open(file_path) as f: for line in f: api_list.append(add_http(line.strip())) else: logger.warning("不支持文件类型") return list(set(api_list))
def do_search(self): try: url = "http://{0}/search/web/results/?q={1}&elements_per_page=50&start_index={2}".format( self.server, self.word, self.counter) # 这里的pn参数是条目数 r = req.get(url, headers=self.headers, proxies=self.proxies) if "We are sorry, but your request has been blocked" in r.content: logger.warning("Exalead blocked our request") return False else: self.results = r.content self.totalresults += self.results return True except Exception, e: logger.error("Error in {0}: {1}".format( __file__.split('/')[-1], e)) return False
class search_yandex: def __init__(self, word, limit, useragent, proxy=None): self.engine_name = "Yandex" self.word = word self.results = "" self.totalresults = "" self.server = "yandex.com" self.hostname = "yandex.com" self.headers = {'User-Agent': useragent} self.limit = int(limit) self.counter = 0 self.proxies = proxy self.print_banner() return def print_banner(self): logger.info("Searching now in {0}..".format(self.engine_name)) return def do_search(self): try: url = "http://{0}/search?text={1}&numdoc=50&lr={2}".format( self.server, self.word, self.counter) # %40=@ 搜索内容如:@meizu.com;在关键词前加@有何效果呢?,测试未发现不同 except Exception, e: logger.error(e) try: r = requests.get(url, headers=self.headers, proxies=self.proxies) if "automated requests" in r.content: logger.warning("yandex blocked our request.exit") exit(0) self.results = r.content self.totalresults += self.results except Exception, e: logger.error(e)
def __init__(self, domain, proxy=None): self.domain = domain self.domain_name = [] self.smiliar_domain_name = [] self.related_domain_name = [] self.email = [] self.url = "https://censys.io/api/v1" self.engine_name = "Censys" try: self.api_id = config.Censys_API_UID self.api_secret = config.Censys_API_SECRET except: logger.warning("No Censys API Config,Exit") exit(0) self.print_banner() self.proxy = proxy
def __fetch_stars_all(self, username, page_limit=0): _page = 1 # header['link']: page=(\d+).*$ while True: if page_limit > 0 and _page > page_limit: logger.warning( f"aborted dumping {username} due to --max page limit") break logger.debug(f"fetching stars: page {_page}") _stars = self.__fetch_stars_by_page(username, page=_page) if not _stars: break try: self.__save_to_db(_stars) _page += 1 except RuntimeError as e: logger.debug(str(e)) break
def do_search(self): try: url = "http://{0}/search?text={1}&numdoc=50&lr=10590&pn={2}".format( self.server, self.word, self.counter) # %40=@ 搜索内容如:@meizu.com;在关键词前加@有何效果呢?,测试未发现不同 r = requests.get(url, headers=self.headers, proxies=self.proxies) if "automated requests" in r.content: logger.warning("Yandex blocked our request") return False else: self.results = r.content self.totalresults += self.results return True except Exception, e: logger.error("Error in {0}: {1}".format( __file__.split('/')[-1], e)) return False
def __init__(self, word, limit, proxy=None): self.engine_name = "Fofa" try: self.email = config.FOFA_USER_EMAIL self.key = config.FOFA_API_KEY except: logger.warning("No Fofa Config,Exit") exit(0) self.word = word self.results = "" self.totalresults = "" self.server = "fofa.so" self.limit = int(limit) self.counter = 0 #useless self.proxies = proxy self.print_banner() return
def run(self): if not zoomeyeApi: logger.warning("请修改配置文件中zoomeyeApi为您的API-KEY") exit(0) logger.info("zoomeye数据请求中") url = f"https://api.zoomeye.org/host/search?query={self.ip}" url_list = [] try: req = requests.Session() req.headers = self.headers req.mount("https://", HTTPAdapter(max_retries=2)) target = req.get(url, timeout=10) datas = json.loads(target.text) if datas.get("matches"): url_list.extend(self.get_data(datas.get("matches"))) except Exception as e: logger.error(f"请求失败:{e}") return url_list
def _update_imdb_movies(self, getdata_func): """ 更新imdb電影資訊 利用imdbpy去取得最新的電影資訊,然後更新資料庫 :param getdata_func: 取得要更新的imdb電影 :return: """ i = 0 since = 0 while True: movies = getdata_func(limit=self.DbOperator.LIMIT, since=since) if movies: for movie in movies: imdbid = movie[0] try: # 檢查是否正確的imdbid格式 if not re.match('tt\d{7}', imdbid): raise Exception('not a valid imdbid') if self.DbOperator.is_error_imdbid_movie(imdbid): logger.info('error imdbid: %s' % imdbid) continue imdbmovie = self.IMDbObj.get_movie(imdbid) imdbmovie.save2db(self.DbOperator.HOST, self.DbOperator.DB) i += 1 logger.info( (i, imdbid, imdbmovie['url'], imdbmovie['rating'], imdbmovie['posterurl']).__str__()) except Exception as e: time.sleep(30) # 如果imdb網路正常,卻取不到資訊,代表可能是錯誤的imdbid,所以要清除imdbid if self.IMDbObj.is_network_ok(): self.DbOperator.clear_imdbid(imdbid) logger.info('clear imdbid: %s' % imdbid) else: logger.warning('update imdb fail: %s' % (str(e))) return since += self.DbOperator.LIMIT logger.info('exported count: %d' % i) else: break
def __init__(self, word, limit, proxy=None): self.engine_name = "BingAPI" self.word = word.replace(' ', '%20') self.results = "" self.totalresults = "" self.server = "api.cognitive.microsoft.com" self.headers = { "Ocp-Apim-Subscription-Key": config.Bing_API_Key, } self.limit = int(limit) try: self.bingApikey = config.Bing_API_Key except: logger.warning("No Bing API Key,Exit") exit(0) self.counter = 0 self.proxies = proxy self.print_banner() return
def _update_imdb_movies(self, getdata_func): """ 更新imdb電影資訊 利用imdbpy去取得最新的電影資訊,然後更新資料庫 :param getdata_func: 取得要更新的imdb電影 :return: """ i = 0 since = 0 while True: movies = getdata_func(limit=self.DbOperator.LIMIT, since=since) if movies: for movie in movies: imdbid = movie[0] try: # 檢查是否正確的imdbid格式 if not re.match('tt\d{7}', imdbid): raise Exception('not a valid imdbid') if self.DbOperator.is_error_imdbid_movie(imdbid): logger.info('error imdbid: %s' % imdbid) continue imdbmovie = self.IMDbObj.get_movie(imdbid) imdbmovie.save2db(self.DbOperator.HOST, self.DbOperator.DB) i += 1 logger.info( (i, imdbid, imdbmovie['url'], imdbmovie['rating'], imdbmovie['posterurl']).__str__() ) except Exception as e: time.sleep(30) # 如果imdb網路正常,卻取不到資訊,代表可能是錯誤的imdbid,所以要清除imdbid if self.IMDbObj.is_network_ok(): self.DbOperator.clear_imdbid(imdbid) logger.info('clear imdbid: %s' % imdbid) else: logger.warning('update imdb fail: %s' % (str(e))) return since += self.DbOperator.LIMIT logger.info('exported count: %d' % i) else: break
def _download(self, filename): """ 下載檔案,並且最多可以嘗試MAXTRY次 :param filename: :return: """ max_try = self.MaxTry while True: try: logger.info('download %s' % self.get_ftp_file_uri(filename)) sys.stdout.flush() self.down_ftp_file(filename) break except Exception as e: max_try -= 1 if max_try >= 0: logger.warning('retry: %s, msg: %s' % (filename, str(e))) else: logger.error('download %s fail!' % filename) raise
def test_model(est, parameters, W, X_train: np.array, y_train: np.array, X_test: np.array, y_test: np.array, **kwargs): result = np.nan if X_train.shape[0] <= W: logger.warning("Too few training datapoints for window {}".format(W)) return result features = np.concatenate((X_train[:-W], X_test)) target = np.concatenate((y_train[:-W], y_test)) predictions = [] labels = [] # Go in reverse window = 1 for i in range(features.shape[0], 0, -1): if i < (W + 1): break train_start = i - W - 1 train_end = i - 1 test_start = i - 1 test_end = i # print('[Window {}]\tTrain: B={} E={}\tTest: B={} E={}'.format(window, train_start, train_end, test_start, test_end)) _X_train = features[train_start:train_end] _y_train = target[train_start:train_end] _X_test = features[test_start:test_end] _y_test = target[test_start:test_end] _est = est.set_params(**parameters) _est = _est.fit(_X_train, _y_train) pred = _est.predict(_X_test) predictions.append(pred[0]) labels.append(_y_test[0]) window += 1 # print('\t Expect: {} Predict: {}'.format(_y_test[0], pred[0])) labels_arr = np.flip(np.array(labels), axis=0) predictions_arr = np.flip(np.array(predictions), axis=0) print('======== Final score! =======') classification_report(labels_arr, predictions_arr) return (labels_arr, predictions_arr)
def output_shell(line, raise_on_non_zero: bool = False): try: logger.debug(line) print(f'SHELL:{line}', flush=True) shell_command = Popen(line, stdout=PIPE, stderr=PIPE, shell=True) except OSError: return None except ValueError: return None (output, err) = shell_command.communicate() shell_command.wait() if shell_command.returncode != 0 and raise_on_non_zero: print( f"Shell command failed to execute:{line}\n{err}\n{output if not None else ''}" ) logger.warning(f"Command failed: {line}") return output, False return str(output.decode("utf-8")), True
def run(self): try: timestemp = time.time() url = "{0}?0.{1}&callback=&k={2}&page=1&order=default&sort=desc&action=moreson&_={3}&verify={4}".format( self.url, timestemp, self.domain, timestemp, self.verify) result = json.loads(req.get(url).content) if result.get('status') == '1': for item in result.get('data'): if is_domain(item.get('domain')): self.subset.append(item.get('domain')) elif result.get('status') == 3: logger.warning("chaxun.la api block our ip...") logger.info("input you verify_code") # print('get verify_code():', self.verify) # self.verify_code() # self.run() self.subset = list(set(self.subset)) except Exception as e: logger.info(str(e)) finally: logger.info("{0} found {1} domains".format(self.engine_name, len(self.subset))) return self.subset
def get_posterurl_by_width(self, imdbpymovie, width): if self._tmdbresult is None: # fixme: 如果要全部都跑的話,記得修改條件 # 符合2個條件就執行 matchcount = 0 matchcount = matchcount + 1 if self['directors'] != [] else matchcount matchcount = matchcount + 1 if self['stars'] != [] else matchcount matchcount = matchcount + 1 if self['stars'] != [] else matchcount if matchcount > 1: url = 'http://api.themoviedb.org/3/find/%s?external_source=imdb_id&api_key=%s' % (self._imdbid, self._tmdbapikey) response = requests.get(url, timeout=30) # set correct encoding fix_response_encoding(response) if response.status_code == 200: self._tmdbresult = json.loads(response.text) else: time.sleep(5) logger.warning('url: %s, status: %d' % (url, response.status_code)) if self._tmdbresult is not None: if len(self._tmdbresult['movie_results']) == 1 and self._tmdbresult['movie_results'][0]['poster_path']: return 'http://image.tmdb.org/t/p/w%d%s' % (width, self._tmdbresult['movie_results'][0]['poster_path'])
class search_google(): def __init__(self, word, limit, useragent, proxy): self.engine_name = "Google" self.word = word self.results = "" self.totalresults = "" self.files = "pdf" self.server = "www.google.com" self.headers = {'User-agent': useragent} self.quantity = "100" self.limit = int(limit) self.counter = 0 self.proxies = proxy self.print_banner() return def print_banner(self): logger.info("Searching now in {0}..".format(self.engine_name)) return def do_search(self): try: url = "http://{0}/search?num={1}&start={2}&hl=en&meta=&q={3}".format( self.server, self.quantity, self.counter, self.word) except Exception, e: logger.error("Error in {0}: {1}".format( __file__.split('/')[-1], e)) try: r = requests.get(url, headers=self.headers, proxies=self.proxies) if "and not a robot" in r.content: logger.warning("Google has blocked your visit") return 0 else: self.results = r.content self.totalresults += self.results return 1 except Exception, e: logger.error("Error in {0}: {1}".format( __file__.split('/')[-1], e)) return 0
class search_exalead: def __init__(self, word, limit,useragent,proxy=None): self.engine_name = "Exalead" self.word = word self.files = "pdf" self.results = "" self.totalresults = "" self.server = "www.exalead.com" self.userAgent = useragent self.referer = "http://{0}/search/web/results/?q={1}".format(self.server,self.word) self.limit = int(limit) self.counter = 0 self.proxies = proxy self.print_banner() return def print_banner(self): logger.info("Searching now in {0}..".format(self.engine_name)) return def do_search(self): try: url = "http://{0}/search/web/results/?q={1}&elements_per_page=50&start_index={2}".format(self.server,self.word,self.counter)# 这里的pn参数是条目数 except Exception, e: logger.error("Error in {0}: {1}".format(__file__.split('/')[-1],e)) try: r = http_request_get(url, custom_referer=self.referer, proxies = self.proxies) if "We are sorry, but your request has been blocked" in r.content: logger.warning("Exalead blocked our request") return -1 else: self.results = r.content self.totalresults += self.results return 0 except Exception,e: logger.error("Error in {0}: {1}".format(__file__.split('/')[-1],e))
def main(): index = load_dataset('all_merged', return_index=True) resultFile = './data/datasets/all_merged/estimators/randomforest_sfm_hyperparameters.json' hyperparameters = {} if not os.path.exists(resultFile): logger.error('no hyperparameters!') with open(resultFile, 'r') as f: hyperparameters = json.load(f) for _sym, data in index.items(): if _sym not in hyperparameters or not os.path.exists( hyperparameters[_sym]['estimator']): logger.error('{} does not exist.'.format(_sym)) else: features = pd.read_csv(data['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) # Replace nan with infinity so that it can later be imputed to a finite value features = features.replace([np.inf, -np.inf], np.nan) # Derive target classes from closing price target_pct = target_price_variation(features['close']) target = target_binned_price_variation(target_pct, n_bins=2) # target = target_discrete_price_variation(target_pct) # Use selected features preselected = hyperparameters[_sym]['features'] #features = features[preselected] imp = IterativeImputer() features = pd.DataFrame(imp.fit_transform(features.values), index=features.index, columns=features.columns) sel = SelectKBest(score_func=f_classif, k=min(30, len(features.columns))) sel.fit(features.values, target.values) bestfeatures = [ c for c, f in zip(features.columns, sel.get_support()) if f ] print("Using features:\n{}".format(bestfeatures)) features = features[bestfeatures] # Split data in train and blind test set with 70:30 ratio, # most ML models don't take sequentiality into account, but our pipeline # uses a SimpleImputer with mean strategy, so it's best not to shuffle the data. X_train, X_test, y_train, y_test = train_test_split( features.values, target.values, shuffle=False, test_size=0.3) # Summarize distribution print("Training set: # Features {}, # Samples {}".format( X_train.shape[1], X_train.shape[0])) plot_class_distribution("Training set", _sym, y_train) print("Test set: # Features {}, # Samples {}".format( X_test.shape[1], X_test.shape[0])) plot_class_distribution("Test set", _sym, y_test) if not np.isfinite(X_train).all(): logger.warning("Training x is not finite!") if not np.isfinite(y_train).all(): logger.warning("Training y is not finite!") if not np.isfinite(X_test).all(): logger.warning("Test x is not finite!") if not np.isfinite(y_test).all(): logger.warning("Test y is not finite!") # Build pipeline to be used as estimator in grid search # so that each subset of the data is transformed independently # to avoid contamination between folds. pipeline = Pipeline([ ( 'i', SimpleImputer() ), # Replace nan's with the median value between previous and next observation ('s', RobustScaler()), ('c', AdaBoostClassifier(base_estimator=DecisionTreeClassifier())), ]) # Perform hyperparameter tuning of the ensemble with 5-fold cross validation logger.info("Start Grid search") CV_rfc = GridSearchCV(estimator=pipeline, param_grid=DECISIONTREE_PARAM_GRID, cv=5, n_jobs=4, scoring='neg_mean_squared_error', verbose=1) CV_rfc.fit(X_train, y_train) logger.info("End Grid search") # Take the fitted ensemble with tuned hyperparameters clf = CV_rfc.best_estimator_ # Test ensemble's performance on training and test sets logger.info("Classification report on train set") predictions1 = clf.predict(X_train) train_report = classification_report(y_train, predictions1, output_dict=True) print(classification_report(y_train, predictions1)) logger.info("Classification report on test set") predictions2 = clf.predict(X_test) test_report = classification_report(y_test, predictions2, output_dict=True) print(classification_report(y_test, predictions2)) stats = { 'score': accuracy_score(y_train, predictions1), 'mse': mean_squared_error(y_train, predictions1), 'test_score': accuracy_score(y_test, predictions2), 'test_mse': mean_squared_error(y_test, predictions2), 'train_report': train_report, 'test_report': test_report, } print(stats) print("--- end ---")
__file__.split('/')[-1], e)) return False def do_search_files(self): try: query = "filetype:" + self.files + "%20site:" + self.word url = "https://{0}/customsearch/v1?key={1}&highRange={2}&lowRange={3}&cx={4}&start={5}&q={6}".format( self.server, self.api_key, self.highRange, self.lowRange, self.cse_id, self.counter, query) except Exception, e: logger.error("Error in {0}: {1}".format( __file__.split('/')[-1], e)) try: r = requests.get(url, headers=self.headers, proxies=self.proxies) if "and not a robot" in r.content: logger.warning("google has blocked your visit") return -1 else: self.results = r.content self.totalresults += self.results return 1 except Exception, e: logger.error("Error in {0}: {1}".format( __file__.split('/')[-1], e)) return -1 def get_emails(self): rawres = parser(self.totalresults, self.word) return rawres.emails() def get_hostnames(self):
def main(): index = load_dataset('all_merged', return_index=True) resultFile = './data/datasets/all_merged/estimators/svc_hyperparameters.json' estFile = './data/datasets/all_merged/estimators/svc_{}.p' hyperparameters = {} for _sym, data in index.items(): features = pd.read_csv(data['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) # Replace nan with infinity so that it can later be imputed to a finite value features = features.replace([np.inf, -np.inf], np.nan) # Derive target classes from closing price target_pct = target_price_variation(features['close']) target = target_binned_price_variation(target_pct, n_bins=2) # target = target_discrete_price_variation(target_pct) # Split data in train and blind test set with 70:30 ratio, # most ML models don't take sequentiality into account, but our pipeline # uses a SimpleImputer with mean strategy, so it's best not to shuffle the data. X_train, X_test, y_train, y_test = train_test_split(features.values, target.values, shuffle=False, test_size=0.3) # Summarize distribution print("Training set: # Features {}, # Samples {}".format( X_train.shape[1], X_train.shape[0])) plot_class_distribution("Training set", _sym, y_train) print("Test set: # Features {}, # Samples {}".format( X_test.shape[1], X_test.shape[0])) plot_class_distribution("Test set", _sym, y_test) if not np.isfinite(X_train).all(): logger.warning("Training x is not finite!") if not np.isfinite(y_train).all(): logger.warning("Training y is not finite!") if not np.isfinite(X_test).all(): logger.warning("Test x is not finite!") if not np.isfinite(y_test).all(): logger.warning("Test y is not finite!") # Build pipeline to be used as estimator in bagging classifier # so that each subset of the data is transformed independently # to avoid contamination between folds. pipeline = Pipeline([ ( 'i', SimpleImputer() ), # Replace nan's with the median value between previous and next observation ( 's', RobustScaler() ), # Scale data in order to center it and increase robustness against noise and outliers # ('k', SelectKBest()), # Select top 10 best features # ('u', RandomUnderSampler()), ('c', SVC()), ]) # Perform hyperparameter tuning of the ensemble with 5-fold cross validation logger.info("Start Grid search") CV_rfc = GridSearchCV(estimator=pipeline, param_grid=SVC_PARAM_GRID, cv=5, n_jobs=4, scoring='neg_mean_squared_error', verbose=1) CV_rfc.fit(X_train, y_train) logger.info("End Grid search") # Take the fitted ensemble with tuned hyperparameters clf = CV_rfc.best_estimator_ # Test ensemble's performance on training and test sets logger.info("Classification report on train set") predictions1 = clf.predict(X_train) print(classification_report(y_train, predictions1)) logger.info("Classification report on test set") predictions2 = clf.predict(X_test) print(classification_report(y_test, predictions2)) stats = { 'score': accuracy_score(y_train, predictions1), 'mse': mean_squared_error(y_train, predictions1), 'test_score': accuracy_score(y_test, predictions2), 'test_mse': mean_squared_error(y_test, predictions2), 'cv_best_mse': -1 * CV_rfc.best_score_, # CV score is negated MSE # 'cv_results': CV_rfc.cv_results_, 'cv_bestparams': CV_rfc.best_params_, } print(stats) with open(estFile.format(_sym), 'wb') as f: pickle.dump(clf, f) hyperparameters[_sym] = { 'estimator': estFile.format(_sym), 'stats': stats } # feature_importances = np.mean([ # p.named_steps.c.feature_importances_ for p in clf.estimators_ # ], axis=0) # importances = {X.columns[i]: v for i, v in enumerate(feature_importances)} # labeled = {str(k): v for k, v in sorted(importances.items(), key=lambda item: -item[1])} # print({ # # 'features':sel_features # 'feature_importances': labeled, # # 'rank': {l: i + 1 for i, l in enumerate(labeled.keys())}, # }) with open(resultFile, 'w') as f: # Save results at every update json.dump(hyperparameters, f, indent=4) print("--- end ---")
d.remove_files(am.mount_path, pattern = "*.WAV", sudo = True) moth_disk_check = d.check_disk(report = True, display = True, path = am.mount_path) # Configure the AudioMoth for the next recording session am.usbModeOn() am.setTime() # Unmount to allow recording to commence am.unmountMoth() success = True except: print(f'Startup attempt {attempt} of {max_attempt} failed') attempt = attempt + 1 if not success: logger.warning('AudioMoth startup failed') print('Please check AudioMoth') d.sendmail(cfg.name, f"{cfg.name} Error: AudioMoth Failure", cfg.emailto) sleep(5) exit() # Main Loop while True: if movement(None) > 0: e = on_motion() d.sendmail(cfg.name, f"{cfg.name} Motion Event (id:{e.id})", cfg.emailto) # Detect when motion stops while not e.has_ended(): e.enqueue(movement(e))
def run_content(self): if len(self.columns_name) == 0: SqliColumns.get_columns(self) # 循环解包,进入注入 for database_name in self.columns_name: for table_name in self.columns_name[database_name]: # 获取数据的条数,如果小于设置的self.content_count,那需要设置条数等于self.content_count content_counts = self.get_content_count(database_name, table_name) if content_counts == 0: logger.warning('Database %s Table %s is empty...' % (database_name, table_name)) continue elif content_counts != self.content_count: logger.debug('Database %s Table %s content amount change to %d' % (database_name, table_name, content_counts)) self.content_count = content_counts else: pass # 声明一个表储存数据 content = PrettyTable(list(self.columns_name[database_name][table_name])) content.padding_width = 1 content.align = "r" # 每个表都要注入指定条数那么多次 for limits in xrange(self.content_count): # 声明一个队列,储存返回的值 result = Queue.Queue() # 声明线程队列、结果队列和最终插入table的数据队列 threads = [] results = [] contents = [] # 开始多线程的注入 logger.debug("Start multithreading Sqli...") for column_name in self.columns_name[database_name][table_name]: # 开始一个线程注入一个字段 try: t = threading.Thread(target=self.get_content, name='thread for %s' % column_name, args=(result, database_name, table_name, column_name, limits)) t.start() except: logger.error('Thread error...') threads.append(t) # 等待所有线程结束 for t in threads: t.join() # 注入处理返回数据,插入content中的一条 while not result.empty(): results.append(result.get()) # 处理返回的数据 for i in list(self.columns_name[database_name][table_name]): for item in results: if item[0] == i: contents.append(item[1]) else: continue # 插入数据 content_str = ','.join(contents) logger.info("Sqli success content is %s" % content_str) content.add_row(contents) # 输出表 logger.debug("Database %s Table %s sqli success..." % (database_name, table_name)) print "[*] Database %s Table %s content:" % (database_name, table_name) print content