def main(args): if not os.path.exists(args.server_stream): Log.error("server_stream file not exist.") return Log.info("Start parsing iSmartAlarm diagnotics stream...") isap = ISADiagnoticsStreamParser(args.server_stream) unstructured_log = isap.get_unstructured_log() sensor_log = isap.get_sensor_log() with Elastic(index='unstructured_log', doc_type='unstructured_log') as elastic: datetime_log = [] for log in unstructured_log: if 'data_type' in log.keys(): if log['data_type'] == 'datetime': datetime_log.append(log) elastic.upload(datetime_log, 'data') with Elastic(index='sensor_log', doc_type='sensor_log') as elastic: elastic.upload(sensor_log, 'datetime') Log.info("Successfully upload server_stream data.") del isap
def scan(self, url): """Scan and crawl url which user requested.""" Log.i("Trying to crawl {} url".format(url)) domain = urlparse(url).netloc obj = DynamicObject() # Step 1. Visit website using headless tor browser Log.d("Step 1. Visiting {} website using headless browser".format(url)) browser = HeadlessBrowser(ini=self.ini, tor_network=True) report = browser.run(url) del browser # if browser have an exception return from here if not report: return obj obj.webpage = report # Step 2. Scan common service port Log.d( "Step 2. Scanning {} domain's common service port".format(domain)) obj.port = self._portscan(domain) # Step 3. TO-DO return obj
def run(self, url): try: self.driver.get(url) except: # browser scan failed Log.e("Browser has an error.") return # if driver source is none if not self.get_source(): return # run HTML parser for parse data from source try: # beautifulsoup object for parse html source self.soup = BeautifulSoup(self.driver.page_source, 'html.parser') except: # website source code is not HTML Log.e("Invalid HTML Source code.") return # get HAR from driver self.har = json.loads(self.driver.get_log('har')[0]['message']) report = DynamicObject({ 'url': url, 'domain': urlparse(url).netloc, 'title': self.get_title(), 'screenshot': self.get_screenshot(), 'source': self.get_source(), 'sublinks': self.get_sublinks(), 'language': self.get_language(), 'headers': self.get_headers(), 'tree': self.get_website_tree(), }) return report
def save(self): """ Save domain on database and request crawling. :return: None """ engine = Engine.create(self.ini) with Session(engine=engine) as session: for url in self.urls: task_id = uuid4().hex try: # add url into database session.add(Domain(uuid=task_id, url=url)) session.commit() task = run_crawler.apply_async(args=(url, ), task_id=task_id) Log.i("Crawler issued a new task id {} at {}".format( task.task_id, url)) except: Log.d( "This {} url already saved into database.".format(url)) finally: self.urls.remove(url)
def train(X_train, X_val, y_train, y_val, train_config: dict = train_config, global_config: dict = global_config, save_model: bool = True): # create paths if necessary for path in global_config.values(): create_dirs(path) # model name and path name = "_".join([train_config["DATE"], train_config["SESSION_NAME"]]) model_path = os.path.join(global_config["WEIGHT_DIR"], name) # instantiate model model = train_config["MODEL"](**train_config["MODEL_CONFIG"]) # fit to training data model.fit(X_train, y_train) # dump model to disk if save_model: joblib.dump(model, model_path + ".joblib") # log metrics to csv train_predictions = model.predict(X_train) val_predictions = model.predict(X_val) log_content = train_config.copy() log_content["TRAIN_LOSS"] = train_config["LOSS"](y_train, train_predictions) log_content["VAL_LOSS"] = train_config["LOSS"](y_val, val_predictions) log_content["TRAIN_METRICS"] = {} log_content["VAL_METRICS"] = {} for key, metric in train_config["METRICS"].items(): log_content["TRAIN_METRICS"] = metric(y_train, train_predictions) log_content["VAL_METRICS"][key] = metric(y_val, val_predictions) log_path = os.path.join(global_config["LOG_DIR"], train_config["LOGFILE"]) write_log(log_path, log_content) # log metrics to mlflow logger = Log(train_config=train_config, run_name=train_config["SESSION_NAME"]) logger.log_metric("Train Loss", log_content["TRAIN_LOSS"]) logger.log_metric("Validation Loss", log_content["VAL_LOSS"]) # return validation loss return log_content["VAL_LOSS"]
def save(self, merge, frame): """Convert and save into playable video.""" Log.info("Converting video file codec format...") for video in self.rawvideos: os.system(f"ffmpeg -f h264 -r 10 -i {video} -c copy {video.split('.')[0]}.mp4") # remove original file if os.path.exists(video): os.remove(video) Log.info("Successfully convert the video file codec.") if merge: Log.info("Merging videos..") videos = '|'.join([video.split('.')[0] + ".mp4" for video in self.rawvideos]) os.system(f"ffmpeg -f concat -i \"concat:{videos}\" -c copy video.mp4") for video in self.rawvideos: os.remove(f"{video.split('.')[0]}.mp4") Log.info(f"Successfully merged {len(self.rawvideos)} videos.")
def run(source): _class = source() status = _class.active if _class.active: Log.i("Trying to run {} source".format(_class.name)) try: _class.collect() except: Log.e("Failed to collect data from {} source".format(_class.name)) if _class.urls: _class.save() else: Log.i("{} source is now disabled".format(_class.name)) del _class return status
def collect(self): Log.d("Start collecting from freshonion API") response = HTTP.request( url='http://zlal32teyptf4tvi.onion/json/all', tor_network=True, ini=self.ini ) if not response: Log.e("Exception accrued while loading website.") return if response.status_code == 200: rows = response.json() Log.i("{} url detected from freshonion".format(len(rows))) for row in rows: url = self._get_formed_url(row) if url not in self.urls: self.urls.append(url)
def test_write_debug(): Log.d("Test Debugging Message")
def __init__(self, ini): Log.i("Starting crawler") self.ini = ini
def _portscan(self, domain): """Scan and check opened port.""" socket = Socket( tor_network=True, ini=self.ini, ) # common service port list services = [ { 'number': 20, 'status': False }, { 'number': 21, 'status': False }, { 'number': 22, 'status': False }, { 'number': 23, 'status': False }, { 'number': 25, 'status': False }, { 'number': 80, 'status': False }, { 'number': 110, 'status': False }, { 'number': 123, 'status': False }, # NTP { 'number': 143, 'status': False }, { 'number': 194, 'status': False }, # IRC { 'number': 389, 'status': False }, { 'number': 443, 'status': False }, { 'number': 993, 'status': False }, # IMAPS { 'number': 3306, 'status': False }, { 'number': 3389, 'status': False }, { 'number': 5222, 'status': False }, # XMPP { 'number': 6667, 'status': False }, # Public IRC { 'number': 8060, 'status': False }, # OnionCat { 'number': 8333, 'status': False }, # Bitcoin ] for i in range(len(services)): opened = socket.ping_check(domain, services[i]['number']) services[i]['status'] = opened Log.d("{} port is {}".format(services[i]['number'], 'opened' if opened else 'closed')) del socket return services
def extract(self, merge, frame, add_timeline): """Extract frames from database.""" Log.debug("Extracting videos from database...") if frame: with sqlite3.connect(self.database) as con: cur = con.cursor() cur.execute("SELECT frame_time, gop_start_rowid, sps_bytes, pps_bytes, frame_bytes, chunk_complete FROM frame_raw_data_table") rows = cur.fetchall() sps_bytes = None pps_bytes = None videobuf = None count = 0 timestamps_by_video = {} frames_by_video = {} for frame in rows: frame_time, gop_start_rowid, _sps_bytes, _pps_bytes, frame_bytes, chunk_complete = frame if gop_start_rowid == -1: # set new sps and pps bytes sps_bytes = _sps_bytes pps_bytes = _pps_bytes videobuf = pps_bytes + sps_bytes + frame_bytes timestamps_by_video[count] = [frame_time] else: videobuf = videobuf + frame_bytes timestamps_by_video[count].append(frame_time) if chunk_complete == 1: frames_by_video[count] = videobuf sps_bytes = None pps_bytes = None videobuf = None count += 1 if videobuf: frames_by_video[count] = videobuf for key in frames_by_video.keys(): # save h264 file with open(os.path.join(self.output, f'{key}.h264'), 'wb') as f: f.write(frames_by_video[key]) i = 0 for timestamp in timestamps_by_video[key]: os.system(f'ffmpeg -i {self.output}/{key}.h264 -c:v libx264 -filter:v "select=gte(n\,{i})" -frames:v 1 -f h264 {self.output}/{key}_{i}.h264') os.system(f'ffmpeg -i {self.output}/{key}_{i}.h264 -frames:v 1 -f image2 {self.output}/{self._gen_filename(timestamp)}.png') os.remove(f'{self.output}/{key}_{i}.h264') i += 1 os.remove(f'{self.output}/{key}.h264') Log.info(f"Successfully saved image by frame.") else: with sqlite3.connect(self.database) as con: cur = con.cursor() cur.execute("SELECT * FROM frame_raw_data_table") rows = cur.fetchall() videobuf = "" # temporary buffer for constructing video videoname = "" # name of video file count = 0 # video file counter for row in rows: if row[4]: if videoname: with open(videoname, "wb") as f: f.write(videobuf) self.rawvideos.append(videoname) videobuf = row[5] videobuf += row[4] videobuf += row[6] videoname = os.path.join(self.output, f"{count}.tmp") self.videotimes[videoname] = [row[0]] count += 1 else: videobuf = videobuf + row[6] if row[0] not in self.videotimes[videoname]: self.videotimes[videoname].append(row[0]) if videobuf: with open(videoname, "wb") as f: f.write(videobuf) self.rawvideos.append(videoname) Log.info(f"Successfully extrated {count} video files.") self.save(merge) documents = [] for filename in self.videotimes.keys(): runtime = self.videotimes[filename] start, end = to_datetime(runtime[0]), to_datetime(runtime[-1]) filename = os.path.basename(filename).replace('tmp', 'mp4') documents.append({ 'start_time': start, 'end_time': end, 'filename': filename }) # write history as file with open(os.path.join(self.output, 'video_list.txt'), 'w') as f: for document in documents: f.write(f"{document['filename']}: {document['start_time']} - {document['end_time']}\n") # upload to elasticsearch for add timeline if add_timeline: with Elastic(index='nest', doc_type='video') as elastic: elastic.upload(documents, 'start_time')
def __del__(self): Log.i("Ending crawler") del self
def test_write_warning(): Log.w("Test Warning Message")
def test_write_error(): Log.e("Test Error Message")
def test_write_info(): Log.i("Test Info Message")
def save(self, documents): with Elastic(index='alexa', doc_type='activity') as elastic: elastic.upload(documents, 'time') Log.info("Successfully uploaded data into elasticsearch.")
def test_write_critical(): Log.c("Test Critical Message")