def load_journal(name, delete_queue): working_dir = config.core.expiry.journal.directory expiry_ttl = config.core.expiry.journal.ttl * 24 * 60 * 60 log.debug("Expiry will load journal in %s for %s bucket." % (working_dir, name)) while True: try: for listed_file in os.listdir(working_dir): journal_file = os.path.join(working_dir, listed_file) if os.path.isfile(journal_file): if journal_file.endswith(name): cur_time = now() day = "%sT00:00:00Z" % listed_file.split(".")[0] file_time = iso_to_epoch(day) if file_time + expiry_ttl <= cur_time: with open(journal_file) as to_delete_journal: count = 0 for line in to_delete_journal: if count % 1000 == 0: while delete_queue.length( ) > MAX_QUEUE_LENGTH: time.sleep(SLEEP_TIME) line = line.strip() if line: delete_queue.push(line) count += 1 os.unlink(journal_file) except OSError: pass time.sleep(SLEEP_TIME)
def handle_retries(self): tasks = [] while self.sleep(0 if tasks else 3): cpu_mark = time.process_time() time_mark = time.time() # Start of ingest message if self.apm_client: self.apm_client.begin_transaction('ingest_retries') tasks = self.retry_queue.dequeue_range(upper_limit=isotime.now(), num=100) for task in tasks: self.ingest_queue.push(task) # End of ingest message (success) if self.apm_client: elasticapm.label(retries=len(tasks)) self.apm_client.end_transaction('ingest_retries', 'success') self.counter.increment_execution_time( 'cpu_seconds', time.process_time() - cpu_mark) self.counter.increment_execution_time('busy_seconds', time.time() - time_mark)
def submit(self, task: IngestTask): self.submit_client.submit( submission_obj=task.submission, completed_queue=_completeq_name, ) self.timeout_queue.push(int(now(_max_time)), task.scan_key) self.log.info( f"[{task.ingest_id} :: {task.sha256}] Submitted to dispatcher for analysis" )
def process_alerts(): global running # pylint: disable=W0603 consecutive_errors = 0 end_t = now(interval) while running: if now() > end_t: logger.info("Finished interval (%ds). Restarting...", interval) running = False break event = queue.select(alertq, commandq, timeout=1) if not event: continue q_name = event[0] message = event[1] if q_name == alertq_name: counts.increment('alert.received') try: create_alert(counts, datastore, logger, message) consecutive_errors = 0 except Exception as ex: # pylint: disable=W0703 consecutive_errors += 1 retries = message['retries'] = message.get('retries', 0) + 1 if retries > max_retries: logger.exception('Max retries exceeded for: %s', str(message)) else: alertq.push(message) if 'Submission not finalized' not in str(ex): logger.exception('Unhandled exception processing: %s', str(message)) for x in exit_msgs: if x in str(ex): consecutive_errors = max_consecutive_errors + 1 break if consecutive_errors > max_consecutive_errors: break
def retry(raw, scan_key, sha256, ex): # df node def current_time = now() notice = Notice(raw) retries = notice.get('retries', 0) + 1 if retries > max_retries: trace = '' if ex and type(ex) != FileStoreException: trace = ': ' + get_stacktrace_info(ex) logger.error('Max retries exceeded for %s%s', sha256, trace) dupq.delete(dup_prefix + scan_key) elif expired(current_time - seconds(notice.get('ts', current_time)), 0): logger.info('No point retrying expired submission for %s', sha256) dupq.delete(dup_prefix + scan_key) # df pull delete else: logger.info('Requeuing %s (%s)', sha256, ex or 'unknown') notice.set('retries', retries) notice.set('retry_at', now(retry_delay)) retryq.push(notice.raw) # df push push
def _get_version_map(self): self.engine_map = {} engine_list = [] newest_dat = 0 oldest_dat = now() url = self.cfg.get('BASE_URL') + "stat/engines" try: r = self.session.get(url=url, timeout=self.timeout) except requests.exceptions.Timeout: raise Exception("Metadefender service timeout.") engines = r.json() for engine in engines: if self.cfg.get("MD_VERSION") == 4: name = self._format_engine_name(engine["eng_name"]) version = engine['eng_ver'] def_time = engine['def_time'] etype = engine['engine_type'] elif self.cfg.get("MD_VERSION") == 3: name = self._format_engine_name(engine["eng_name"]).replace( "scanengine", "") version = engine['eng_ver'] def_time = engine['def_time'].replace(" AM", "").replace( " PM", "").replace("/", "-").replace(" ", "T") def_time = def_time[6:10] + "-" + def_time[:5] + def_time[ 10:] + "Z" etype = engine['eng_type'] else: raise Exception("Unknown metadefender version") # Compute newest DAT dat_epoch = iso_to_epoch(def_time) if dat_epoch > newest_dat: newest_dat = dat_epoch if dat_epoch < oldest_dat and dat_epoch != 0 and etype in [ "av", "Bundled engine" ]: oldest_dat = dat_epoch self.engine_map[name] = { 'version': version, 'def_time': iso_to_local(def_time)[:19] } engine_list.append(name) engine_list.append(version) engine_list.append(def_time) self.newest_dat = epoch_to_local(newest_dat)[:19] self.oldest_dat = epoch_to_local(oldest_dat)[:19] self.dat_hash = hashlib.md5("".join(engine_list)).hexdigest()
def create_fake_result(svc_id, classification, srl, hours_to_live): start = now() # Generate a random configuration key length = int(random.random() * 32) + 32 conf_bytes = "".join( [chr(int(random.random() * 256)) for _ in xrange(length)]) conf_key = hashlib.md5(conf_bytes).hexdigest()[:7] # Update result object with random values res_obj = copy.deepcopy(RESULT_TEMPLATE) res_obj['__expiry_ts__'] = now_as_iso(hours_to_live * 60 * 60) res_obj['created'] = now_as_iso() res_obj['response']['service_name'] %= svc_id res_obj['classification'] = res_obj['result'][ 'classification'] = classification res_obj['srl'] = srl # Create result sections for _ in xrange(int(random.random() * 4) + 1): section = copy.deepcopy(RESULT_SECTION_TEMPLATE) section['classification'] = classification section['body'] = generate_random_words( int(random.random() * 1024) + 32) section['title_text'] = generate_random_words( int(random.random() * 14) + 2) res_obj['result']['sections'].append(section) # Create tags for _ in xrange(int(random.random() * 29) + 1): tag = copy.deepcopy(RESULT_TAG_TEMPLATE) tag['classification'] = classification tag['type'] = generate_random_words(1).upper() tag['value'] = generate_random_words(int(random.random() * 7) + 1) res_obj['result']['tags'].append(tag) # Update milestones res_obj['response']['milestones']['service_started'] = start res_obj['response']['milestones']['service_completed'] = now() return res_obj, conf_key
def handle_timeouts(self): timeouts = [] while self.sleep(0 if timeouts else 3): cpu_mark = time.process_time() time_mark = time.time() # Start of ingest message if self.apm_client: self.apm_client.begin_transaction('ingest_timeouts') timeouts = self.timeout_queue.dequeue_range( upper_limit=isotime.now(), num=100) for scan_key in timeouts: # noinspection PyBroadException try: actual_timeout = False # Remove the entry from the hash of submissions in progress. entry = self.scanning.pop(scan_key) if entry: actual_timeout = True self.log.error("Submission timed out for %s: %s", scan_key, str(entry)) dup = self.duplicate_queue.pop(_dup_prefix + scan_key, blocking=False) if dup: actual_timeout = True while dup: self.log.error("Submission timed out for %s: %s", scan_key, str(dup)) dup = self.duplicate_queue.pop(_dup_prefix + scan_key, blocking=False) if actual_timeout: self.counter.increment('timed_out') except Exception: self.log.exception("Problem timing out %s:", scan_key) # End of ingest message (success) if self.apm_client: elasticapm.label(timeouts=len(timeouts)) self.apm_client.end_transaction('ingest_timeouts', 'success') self.counter.increment_execution_time( 'cpu_seconds', time.process_time() - cpu_mark) self.counter.increment_execution_time('busy_seconds', time.time() - time_mark)
def submit(client, notice): priority = notice.get('priority') sha256 = notice.get('sha256') hdr = notice.parse(description=': '.join((default_prefix, sha256 or '')), **defaults) user = hdr.pop('submitter') hdr.pop('priority', None) path = notice.get('filename', None) or sha256 client.submit(sha256, path, priority, user, **hdr) with timeouts_lock: timeouts.append(Timeout(now(max_time), notice.get('scan_key')))
def get_logged_in_user(self): auto_auth_uname = self.auto_auth_check() if auto_auth_uname is not None: return auto_auth_uname session_id = flsk_session.get("session_id", None) if not session_id: current_app.logger.debug('session_id cookie not found') abort(401) session = KV_SESSION.get(session_id) if not session: current_app.logger.debug( f'[{session_id}] session_id not found in redis') abort(401) else: cur_time = now() if session.get('expire_at', 0) < cur_time: KV_SESSION.pop(session_id) current_app.logger.debug( f'[{session_id}] session has expired ' f'{session.get("expire_at", 0)} < {cur_time}') abort(401) else: session['expire_at'] = cur_time + session.get('duration', 3600) if config.ui.validate_session_ip and \ request.headers.get("X-Forwarded-For", request.remote_addr) != session.get('ip', None): current_app.logger.debug( f'[{session_id}] X-Forwarded-For does not match session IP ' f'{request.headers.get("X-Forwarded-For", None)} != {session.get("ip", None)}' ) abort(401) if config.ui.validate_session_useragent and \ request.headers.get("User-Agent", None) != session.get('user_agent', None): current_app.logger.debug( f'[{session_id}] User-Agent does not match session user_agent ' f'{request.headers.get("User-Agent", None)} != {session.get("user_agent", None)}' ) abort(401) KV_SESSION.set(session_id, session) self.extra_session_checks(session) return session.get("username", None)
def retry(self, task, scan_key, ex): current_time = now() retries = task.retries + 1 if retries > _max_retries: trace = '' if ex: trace = ': ' + get_stacktrace_info(ex) self.log.error( f'[{task.ingest_id} :: {task.sha256}] Max retries exceeded {trace}' ) self.duplicate_queue.delete(_dup_prefix + scan_key) elif self.expired(current_time - task.ingest_time.timestamp(), 0): self.log.info( f'[{task.ingest_id} :: {task.sha256}] No point retrying expired submission' ) self.duplicate_queue.delete(_dup_prefix + scan_key) else: self.log.info( f'[{task.ingest_id} :: {task.sha256}] Requeuing ({ex or "unknown"})' ) task.retries = retries self.retry_queue.push(int(now(_retry_delay)), task.json())
def process_retries(): # df node def while running: raw = retryq.pop(timeout=1) # df pull pop if not raw: continue retry_at = raw['retry_at'] delay = retry_at - now() if delay >= 0.125: retryq.unpop(raw) time.sleep(min(delay, 1)) continue ingestq.push(raw) # df push push
def check( self, task: IngestTask, count_miss=True ) -> Tuple[Optional[str], Optional[str], Optional[float], str]: key = self.stamp_filescore_key(task) with self.cache_lock: result = self.cache.get(key, None) if result: self.counter.increment('cache_hit_local') self.log.info( f'[{task.ingest_id} :: {task.sha256}] Local cache hit') else: result = self.datastore.filescore.get_if_exists(key) if result: self.counter.increment('cache_hit') self.log.info( f'[{task.ingest_id} :: {task.sha256}] Remote cache hit') else: if count_miss: self.counter.increment('cache_miss') return None, None, None, key with self.cache_lock: self.cache[key] = result current_time = now() age = current_time - result.time errors = result.errors if self.expired(age, errors): self.log.info( f"[{task.ingest_id} :: {task.sha256}] Cache hit dropped, cache has expired" ) self.counter.increment('cache_expired') self.cache.pop(key, None) self.datastore.filescore.delete(key) return None, None, None, key elif self.stale(age, errors): self.log.info( f"[{task.ingest_id} :: {task.sha256}] Cache hit dropped, cache is stale" ) self.counter.increment('cache_stale') return None, None, result.score, key return result.psid, result.sid, result.score, key
def send_heartbeat(): t = now() up_hours = (t - start_time) / (60.0 * 60.0) queues = {} drop_p = {} for level in ('low', 'medium', 'critical', 'high'): queues[level] = uniqueq.count(*priority_range[level]) threshold = sample_threshold[level] drop_p[level] = 1 - max(0, drop_chance(queues[level], threshold)) heartbeat = { 'hostinfo': hostinfo, 'inflight': scanning.length(), 'ingest': ingestq.length(), 'ingesting': drop_p, 'queues': queues, 'shard': shard, 'up_hours': up_hours, 'waiting': submissionq.length(), 'ingest.bytes_completed': 0, 'ingest.bytes_ingested': 0, 'ingest.duplicates': 0, 'ingest.files_completed': 0, 'ingest.skipped': 0, 'ingest.submissions_completed': 0, 'ingest.submissions_ingested': 0, 'ingest.timed_out': 0, 'ingest.whitelisted': 0, } # Send ingester stats. exported = ingester_counts.export() # Add ingester stats to our heartbeat. heartbeat.update(exported) # Send our heartbeat. raw = message.Message(to="*", sender='middleman', mtype=message.MT_INGESTHEARTBEAT, body=heartbeat).as_dict() statusq.publish(raw) # Send whitelister stats. whitelister_counts.export()
def process_retries(self) -> int: # Start of ingest message if self.apm_client: self.apm_client.begin_transaction('ingest_msg') tasks = self.ingester.retry_queue.dequeue_range( upper_limit=isotime.now(), num=10) for task in tasks: self.ingester.ingest_queue.push(task) # End of ingest message (success) if self.apm_client: elasticapm.tag(retries=len(tasks)) self.apm_client.end_transaction('ingest_retries', 'success') return len(tasks)
def rescan(self, submission: Submission, results: Dict[str, Result], file_infos: Dict[str, FileInfo], file_tree, errors: List[str], rescan_services: List[str]): """ Rescan a submission started on another system. """ # Reset submission processing data submission['times'].pop('completed') submission['state'] = 'submitted' # Set the list of service to rescan submission['params']['services']['rescan'] = rescan_services # Create the submission object submission_obj = Submission(submission) if len(submission_obj.files) == 0: raise SubmissionException("No files found to submit.") for f in submission_obj.files: if not self.datastore.file.exists(f.sha256): raise SubmissionException( f"File {f.sha256} does not exist, cannot continue submission." ) # Set the new expiry if submission_obj.params.ttl: submission_obj.expiry_ts = epoch_to_iso(now() + submission_obj.params.ttl * 24 * 60 * 60) # Clearing runtime_excluded on initial submit or resubmit submission_obj.params.services.runtime_excluded = [] # Save the submission self.datastore.submission.save(submission_obj.sid, submission_obj) # Dispatch the submission self.log.debug("Submission complete. Dispatching: %s", submission_obj.sid) self.dispatcher.dispatch_bundle(submission_obj, results, file_infos, file_tree, errors) return submission
def process_timeouts(): # df node def global timeouts # pylint:disable=W0603 with timeouts_lock: current_time = now() index = 0 for t in timeouts: if t.time >= current_time: break index += 1 try: timed_out(t.scan_key) # df push calls except: # pylint: disable=W0702 logger.exception("Problem timing out %s:", t.scan_key) timeouts = timeouts[index:]
def process_timeouts(self): # Start of ingest message if self.apm_client: self.apm_client.begin_transaction('ingest_msg') ingester = self.ingester timeouts = ingester.timeout_queue.dequeue_range( upper_limit=isotime.now(), num=10) for scan_key in timeouts: try: actual_timeout = False # Remove the entry from the hash of submissions in progress. entry = ingester.scanning.pop(scan_key) if entry: actual_timeout = True self.log.error("Submission timed out for %s: %s", scan_key, str(entry)) dup = ingester.duplicate_queue.pop(_dup_prefix + scan_key, blocking=False) if dup: actual_timeout = True while dup: self.log.error("Submission timed out for %s: %s", scan_key, str(dup)) dup = ingester.duplicate_queue.pop(_dup_prefix + scan_key, blocking=False) if actual_timeout: ingester.counter.increment('ingest_timeout') except Exception: self.log.exception("Problem timing out %s:", scan_key) # End of ingest message (success) if self.apm_client: elasticapm.tag(timeouts=len(timeouts)) self.apm_client.end_transaction('ingest_timeouts', 'success') return len(timeouts)
def check(datastore, notice): key = stamp_filescore_key(notice) with cache_lock: result = cache.get(key, None) counter_name = 'ingest.cache_hit_local' if result: logger.info('Local cache hit') else: counter_name = 'ingest.cache_hit' result = datastore.get_filescore(key) if result: logger.info('Remote cache hit') else: ingester_counts.increment('ingest.cache_miss') return None, False, None, key add(key, result.get('psid', None), result['sid'], result['score'], result.get('errors', 0), result['time']) current_time = now() delta = current_time - result.get('time', current_time) errors = result.get('errors', 0) if expired(delta, errors): ingester_counts.increment('ingest.cache_expired') with cache_lock: cache.pop(key, None) datastore.delete_filescore(key) return None, False, None, key elif stale(delta, errors): ingester_counts.increment('ingest.cache_stale') return None, False, result['score'], key ingester_counts.increment(counter_name) return result.get('psid', None), result['sid'], result['score'], key
def _get_version_map(self, node: str) -> None: """ Get the versions of all engines running on a given node :param node: The IP of the MetaDefender node :return: None """ newest_dat = 0 oldest_dat = now() engine_list = [] active_engines = 0 failed_states = ["removed", "temporary failed", "permanently failed"] url = urljoin(node, 'stat/engines') try: self.log.debug(f"_get_version_map: GET {url}") r = self.session.get(url=url, timeout=self.timeout) engines = r.json() for engine in engines: if engine['active'] and engine["state"] not in failed_states: active_engines += 1 if self.config.get("md_version") == 4: name = self._format_engine_name(engine["eng_name"]) version = engine['eng_ver'] def_time = engine['def_time'] etype = engine['engine_type'] elif self.config.get("md_version") == 3: name = self._format_engine_name( engine["eng_name"]).replace("scanengine", "") version = engine['eng_ver'] def_time = engine['def_time'].replace(" AM", "").replace( " PM", "").replace("/", "-").replace(" ", "T") def_time = def_time[6:10] + "-" + def_time[:5] + def_time[ 10:] + "Z" etype = engine['eng_type'] else: raise Exception("Unknown version of MetaDefender") # Compute newest DAT dat_epoch = iso_to_epoch(def_time) if dat_epoch > newest_dat: newest_dat = dat_epoch if dat_epoch < oldest_dat and dat_epoch != 0 and etype in [ "av", "Bundled engine" ]: oldest_dat = dat_epoch self.nodes[node]['engine_map'][name] = { 'version': version, 'def_time': iso_to_local(def_time)[:19] } engine_list.append(name) engine_list.append(version) engine_list.append(def_time) self.nodes[node]['engine_count'] = active_engines self.nodes[node]['newest_dat'] = epoch_to_local(newest_dat)[:19] self.nodes[node]['oldest_dat'] = epoch_to_local(oldest_dat)[:19] self.nodes[node]['engine_list'] = "".join(engine_list) except exceptions.Timeout: raise Exception( f"Node ({node}) timed out after {self.timeout}s while trying to get engine version map" ) except ConnectionError: raise Exception( f"Unable to connect to node ({node}) while trying to get engine version map" )
def login(**_): """ Login the user onto the system Variables: None Arguments: None Data Block: { "user": <UID>, "password": <ENCRYPTED_PASSWORD>, "otp": <OTP_TOKEN>, "apikey": <ENCRYPTED_APIKEY>, "u2f_response": <RESPONSE_TO_CHALLENGE_FROM_U2F_TOKEN> } Result example: { "username": <Logged in user>, # Username for the logged in user "privileges": ["R", "W"], # Different privileges that the user will get for this session "session_duration": 60 # Time after which this session becomes invalid # Note: The timer reset after each call } """ data = request.json if not data: data = request.values user = data.get('user', None) password = data.get('password', None) apikey = data.get('apikey', None) u2f_response = data.get('u2f_response', None) if config.auth.get('encrypted_login', True): private_key = load_async_key(STORAGE.get_blob('id_rsa'), use_pkcs=True) if password and private_key: password = private_key.decrypt(base64.b64decode(password), "ERROR") if apikey and private_key: apikey = private_key.decrypt(base64.b64decode(apikey), "ERROR") try: otp = int(data.get('otp', 0) or 0) except Exception: raise AuthenticationException('Invalid OTP token') if request.environ.get("HTTP_X_REMOTE_CERT_VERIFIED", "FAILURE") == "SUCCESS": dn = request.environ.get("HTTP_X_REMOTE_DN") else: dn = False if (user and password) or dn or (user and apikey): auth = { 'username': user, 'password': password, 'otp': otp, 'u2f_response': u2f_response, 'dn': dn, 'apikey': apikey } try: logged_in_uname, priv = default_authenticator(auth, request, flsk_session, STORAGE) session_duration = config.ui.get('session_duration', 3600) cur_time = now() xsrf_token = generate_random_secret() current_session = { 'duration': session_duration, 'ip': request.headers.get("X-Forward-For", request.remote_addr), 'privileges': priv, 'time': int(cur_time) - (int(cur_time) % session_duration), 'user_agent': request.headers.get("User-Agent", "Unknown user agent"), 'username': logged_in_uname, 'xsrf_token': xsrf_token } session_id = hashlib.sha512(str(current_session)).hexdigest() current_session['expire_at'] = cur_time + session_duration flsk_session['session_id'] = session_id KV_SESSION.add(session_id, current_session) return make_api_response({ "username": logged_in_uname, "privileges": priv, "session_duration": config.ui.get('session_duration', 3600) }, cookies={'XSRF-TOKEN': xsrf_token}) except AuthenticationException as wpe: return make_api_response("", wpe.message, 401) return make_api_response("", "Not enough information to proceed with authentication", 401)
def completed(self, sub): """Invoked when notified that a submission has completed.""" # There is only one file in the submissions we have made sha256 = sub.files[0].sha256 scan_key = sub.params.create_filescore_key(sha256) raw = self.scanning.pop(scan_key) psid = sub.params.psid score = sub.max_score sid = sub.sid if not raw: # Some other worker has already popped the scanning queue? self.log.warning( f"[{sub.metadata.get('ingest_id', 'unknown')} :: {sha256}] " f"Submission completed twice") return scan_key task = IngestTask(raw) task.submission.sid = sid errors = sub.error_count file_count = sub.file_count self.counter.increment('submissions_completed') self.counter.increment('files_completed', increment_by=file_count) self.counter.increment('bytes_completed', increment_by=task.file_size) with self.cache_lock: fs = self.cache[scan_key] = FileScore({ 'expiry_ts': now(self.config.core.ingester.cache_dtl * 24 * 60 * 60), 'errors': errors, 'psid': psid, 'score': score, 'sid': sid, 'time': now(), }) self.datastore.filescore.save(scan_key, fs) self.finalize(psid, sid, score, task) def exhaust() -> Iterable[IngestTask]: while True: res = self.duplicate_queue.pop(_dup_prefix + scan_key, blocking=False) if res is None: break res = IngestTask(res) res.submission.sid = sid yield res # You may be tempted to remove the assignment to dups and use the # value directly in the for loop below. That would be a mistake. # The function finalize may push on the duplicate queue which we # are pulling off and so condensing those two lines creates a # potential infinite loop. dups = [dup for dup in exhaust()] for dup in dups: self.finalize(psid, sid, score, dup) return scan_key
def login(**_): """ Login the user onto the system Variables: None Arguments: None Data Block: { "user": <UID>, "password": <ENCRYPTED_PASSWORD>, "otp": <OTP_TOKEN>, "apikey": <ENCRYPTED_APIKEY>, "webauthn_auth_resp": <RESPONSE_TO_CHALLENGE_FROM_WEBAUTHN> } Result example: { "username": <Logged in user>, # Username for the logged in user "privileges": ["R", "W"], # Different privileges that the user will get for this session "session_duration": 60 # Time after which this session becomes invalid # Note: The timer reset after each call } """ data = request.json if not data: data = request.values user = data.get('user', None) password = data.get('password', None) apikey = data.get('apikey', None) webauthn_auth_resp = data.get('webauthn_auth_resp', None) oauth_provider = data.get('oauth_provider', None) oauth_token = data.get('oauth_token', None) if config.auth.oauth.enabled and oauth_provider: oauth = current_app.extensions.get('authlib.integrations.flask_client') provider = oauth.create_client(oauth_provider) if provider: redirect_uri = f'https://{request.host}/login.html?provider={oauth_provider}' return provider.authorize_redirect(redirect_uri=redirect_uri) try: otp = int(data.get('otp', 0) or 0) except Exception: raise AuthenticationException('Invalid OTP token') if (user and password) or (user and apikey) or (user and oauth_token): auth = { 'username': user, 'password': password, 'otp': otp, 'webauthn_auth_resp': webauthn_auth_resp, 'apikey': apikey, 'oauth_token': oauth_token } logged_in_uname = None ip = request.headers.get("X-Forwarded-For", request.remote_addr) try: logged_in_uname, priv = default_authenticator( auth, request, flsk_session, STORAGE) session_duration = config.ui.session_duration cur_time = now() xsrf_token = generate_random_secret() current_session = { 'duration': session_duration, 'ip': ip, 'privileges': priv, 'time': int(cur_time) - (int(cur_time) % session_duration), 'user_agent': request.headers.get("User-Agent", None), 'username': logged_in_uname, 'xsrf_token': xsrf_token } session_id = hashlib.sha512( str(current_session).encode("UTF-8")).hexdigest() current_session['expire_at'] = cur_time + session_duration flsk_session['session_id'] = session_id KV_SESSION.add(session_id, current_session) return make_api_response( { "username": logged_in_uname, "privileges": priv, "session_duration": session_duration }, cookies={'XSRF-TOKEN': xsrf_token}) except AuthenticationException as wpe: uname = auth.get('username', '(None)') LOGGER.warning( f"Authentication failure. (U:{uname} - IP:{ip}) [{wpe}]") return make_api_response("", err=str(wpe), status_code=401) finally: if logged_in_uname: LOGGER.info( f"Login successful. (U:{logged_in_uname} - IP:{ip})") return make_api_response( "", "Not enough information to proceed with authentication", 401)
def ingest(self, task: IngestTask): self.log.info( f"[{task.ingest_id} :: {task.sha256}] Task received for processing" ) # Load a snapshot of ingest parameters as of right now. max_file_size = self.config.submission.max_file_size param = task.params self.counter.increment('bytes_ingested', increment_by=task.file_size) self.counter.increment('submissions_ingested') if any(len(file.sha256) != 64 for file in task.submission.files): self.log.error( f"[{task.ingest_id} :: {task.sha256}] Invalid sha256, skipped") self.send_notification(task, failure="Invalid sha256", logfunc=self.log.warning) return # Clean up metadata strings, since we may delete some, iterate on a copy of the keys for key in list(task.submission.metadata.keys()): value = task.submission.metadata[key] meta_size = len(value) if meta_size > self.config.submission.max_metadata_length: self.log.info( f'[{task.ingest_id} :: {task.sha256}] ' f'Removing {key} from metadata because value is too big') task.submission.metadata.pop(key) if task.file_size > max_file_size and not task.params.ignore_size and not task.params.never_drop: task.failure = f"File too large ({task.file_size} > {max_file_size})" self._notify_drop(task) self.counter.increment('skipped') self.log.error( f"[{task.ingest_id} :: {task.sha256}] {task.failure}") return # Set the groups from the user, if they aren't already set if not task.params.groups: task.params.groups = self.get_groups_from_user( task.params.submitter) # Check if this file is already being processed pprevious, previous, score = None, False, None if not param.ignore_cache: pprevious, previous, score, _ = self.check(task) # Assign priority. low_priority = self.is_low_priority(task) priority = param.priority if priority < 0: priority = self.priority_value['medium'] if score is not None: priority = self.priority_value['low'] for level, threshold in self.threshold_value.items(): if score >= threshold: priority = self.priority_value[level] break elif low_priority: priority = self.priority_value['low'] # Reduce the priority by an order of magnitude for very old files. current_time = now() if priority and self.expired( current_time - task.submission.time.timestamp(), 0): priority = (priority / 10) or 1 param.priority = priority # Do this after priority has been assigned. # (So we don't end up dropping the resubmission). if previous: self.counter.increment('duplicates') self.finalize(pprevious, previous, score, task) return if self.drop(task): self.log.info(f"[{task.ingest_id} :: {task.sha256}] Dropped") return if self.is_whitelisted(task): self.log.info(f"[{task.ingest_id} :: {task.sha256}] Whitelisted") return self.unique_queue.push(priority, task.as_primitives())
def base(*args, **kwargs): # Login session_id = flsk_session.get("session_id", None) if not session_id: abort(401) session = KV_SESSION.get(session_id) if not session: abort(401) else: session = json.loads(session) cur_time = now() if session.get('expire_at', 0) < cur_time: KV_SESSION.pop(session_id) abort(401) else: session['expire_at'] = cur_time + session.get( 'duration', 3600) if request.headers.get("X-Forward-For", None) != session.get('ip', None) or \ request.headers.get("User-Agent", None) != session.get('user_agent', None): abort(401) KV_SESSION.set(session_id, session) logged_in_uname = session.get("username", None) if not set(self.required_priv).intersection( set(session.get("privileges", []))): raise AccessDeniedException( "The method you've used to login does not give you access to this API." ) if "E" in session.get("privileges", []) and self.check_xsrf_token and \ session.get('xsrf_token', "") != request.environ.get('HTTP_X_XSRF_TOKEN', ""): raise AccessDeniedException("Invalid XSRF token.") # Impersonation requestor = request.environ.get("HTTP_X_PROXIEDENTITIESCHAIN", None) temp_user = login(logged_in_uname) # Terms of Service if not request.path == "/api/v3/user/tos/%s/" % logged_in_uname: if not temp_user.get( 'agrees_with_tos', False) and config.ui.get( "tos", None) is not None: raise AccessDeniedException( "Agree to Terms of Service before you can make any API calls." ) if requestor: user = None if ("C=" in requestor or "c=" in requestor) and dn_parser: requestor_chain = [ dn_parser(x.replace("<", "").replace(">", "")) for x in requestor.split("><") ] requestor_chain.reverse() else: requestor_chain = [requestor] impersonator = temp_user merged_classification = impersonator['classification'] for as_uname in requestor_chain: user = login(as_uname) if not user: raise AccessDeniedException( "One of the entity in the proxied " "chain does not exist in our system.") user[ 'classification'] = CLASSIFICATION.intersect_user_classification( user['classification'], merged_classification) merged_classification = user['classification'] add_access_control(user) if user: logged_in_uname = "%s(on behalf of %s)" % ( impersonator['uname'], user['uname']) else: raise AccessDeniedException( "Invalid proxied entities chain received.") else: impersonator = {} user = temp_user if self.require_admin and not user['is_admin']: raise AccessDeniedException( "API %s requires ADMIN privileges" % request.path) ############################################# # Special username api query validation # # If an API call requests a username, the username as to match # the logged in user or the user has to be ADMIN # # API that needs this special validation need to make sure their # variable name for the username is as an optional parameter # inside 'username_key'. Default: 'username' if self.username_key in kwargs: if kwargs[self.username_key] != user['uname'] \ and not kwargs[self.username_key] == "__global__" \ and not kwargs[self.username_key] == "__workflow__" \ and not kwargs[self.username_key].lower() == "__current__" \ and not user['is_admin']: return make_api_response( {}, "Your username does not match requested username", 403) if self.audit: # noinspection PyBroadException try: json_blob = request.json if not isinstance(json_blob, dict): json_blob = {} except Exception: json_blob = {} params_list = list(args) + \ ["%s=%s" % (k, v) for k, v in kwargs.iteritems() if k in AUDIT_KW_TARGET] + \ ["%s=%s" % (k, v) for k, v in request.args.iteritems() if k in AUDIT_KW_TARGET] + \ ["%s=%s" % (k, v) for k, v in json_blob.iteritems() if k in AUDIT_KW_TARGET] if len(params_list) != 0: AUDIT_LOG.info("%s [%s] :: %s(%s)" % (logged_in_uname, user['classification'], func.func_name, ", ".join(params_list))) # Save user credential in user kwarg for future reference kwargs['user'] = user # Check current user quota quota_user = impersonator.get('uname', None) or user['uname'] quota_id = "%s [%s] => %s" % (quota_user, str( uuid.uuid4()), request.path) count = int(RATE_LIMITER.inc(quota_user, track_id=quota_id)) RATE_LIMITER.inc("__global__", track_id=quota_id) flsk_session['quota_user'] = quota_user flsk_session['quota_id'] = quota_id flsk_session['quota_set'] = True quota = user.get('api_quota', 10) if count > quota: if config.ui.enforce_quota: LOGGER.info( "User %s was prevented from using the api due to exceeded quota. [%s/%s]" % (quota_user, count, quota)) raise QuotaExceededException( "You've exceeded your maximum quota of %s " % quota) else: LOGGER.info("Quota exceeded for user %s. [%s/%s]" % (quota_user, count, quota)) else: if DEBUG: LOGGER.info( "%s's quota is under or equal its limit. [%s/%s]" % (quota_user, count, quota)) return func(*args, **kwargs)
is_low_priority = forge.get_is_low_priority() max_priority = config.submissions.max.priority max_retries = 10 max_time = 2 * 24 * 60 * 60 # Wait 2 days for responses. max_waiting = int(config.core.dispatcher.max.inflight) / (2 * shards) min_priority = 1 priority_value = constants.PRIORITIES retry_delay = 180 retryq = queue.NamedQueue('m-retry-' + shard, **persistent) # df line queue running = True sampling = False selected_initial = [ 'Antivirus', 'Extraction', 'Filtering', 'Networking', 'Static Analysis' ] stale_after_seconds = config.core.middleman.stale_after start_time = now() submissionq = queue.NamedQueue('m-submission-' + shard, **persistent) # df line queue timeouts = [] timeouts_lock = RLock() whitelist = forge.get_whitelist() whitelisted = {} whitelisted_lock = RLock() dropper_threads = 1 try: dropper_threads = int(config.core.middleman.dropper_threads) except AttributeError: logger.warning("No dropper_threads setting. Defaulting to %d.", dropper_threads)
def ingest(datastore, user_groups, raw): # df node def notice = Notice(raw) ignore_size = notice.get('ignore_size', False) never_drop = notice.get('never_drop', False) sha256 = notice.get('sha256') size = notice.get('size', 0) # Make sure we have a submitter ... user = notice.get('submitter', None) if user is None: user = config.submissions.user notice.set('submitter', user) # ... and groups. groups = notice.get('groups', None) if groups is None: groups = user_groups.get(user, None) if groups is None: ruser = datastore.get_user(user) if not ruser: return groups = ruser.get('groups', []) user_groups[user] = groups notice.set('groups', groups) selected = notice.get('selected', None) if not selected: selected = selected_initial notice.set('selected', selected) notice.set('resubmit_to', ['Dynamic Analysis']) resubmit_to = notice.get('resubmit_to', None) if resubmit_to is None: notice.set('resubmit_to', []) ingester_counts.increment('ingest.bytes_ingested', int(size)) ingester_counts.increment('ingest.submissions_ingested') if not sha256: send_notification(notice, failure="Invalid sha256", logfunc=logger.warning) return c12n = notice.get('classification', '') if not Classification.is_valid(c12n): send_notification(notice, failure="Invalid classification %s" % c12n, logfunc=logger.warning) return metadata = notice.get('metadata', {}) if isinstance(metadata, dict): to_delete = [] for k, v in metadata.iteritems(): size = sys.getsizeof(v, -1) if isinstance(v, basestring): size = len(v) if size > config.core.middleman.max_value_size: to_delete.append(k) elif size < 0: to_delete.append(k) if to_delete: logger.info('Removing %s from %s', to_delete, notice.raw) for k in to_delete: metadata.pop(k, None) if size > config.submissions.max.size and not ignore_size and not never_drop: notice.set( 'failure', "File too large (%d > %d)" % (size, config.submissions.max.size)) dropq.push(notice.raw) # df push push ingester_counts.increment('ingest.skipped') return pprevious, previous, score = None, False, None if not notice.get('ignore_cache', False): pprevious, previous, score, _ = check(datastore, notice) # Assign priority. low_priority = is_low_priority(notice) priority = notice.get('priority') if priority is None: priority = priority_value['medium'] if score is not None: priority = priority_value['low'] for level in ('critical', 'high'): if score >= threshold_value[level]: priority = priority_value[level] break elif low_priority: priority = priority_value['low'] # Reduce the priority by an order of magnitude for very old files. current_time = now() if priority and \ expired(current_time - seconds(notice.get('ts', current_time)), 0): priority = (priority / 10) or 1 notice.set('priority', priority) # Do this after priority has been assigned. # (So we don't end up dropping the resubmission). if previous: ingester_counts.increment('ingest.duplicates') finalize(pprevious, previous, score, notice) # df push calls return if drop(notice): # df push calls return if is_whitelisted(notice): # df push calls return uniqueq.push(priority, notice.raw) # df push push
def init(): datastore = forge.get_datastore() datastore.commit_index('submission') sids = [ x['submission.sid'] for x in datastore.stream_search( 'submission', 'state:submitted AND times.submitted:[NOW-1DAY TO *] ' 'AND submission.metadata.type:* ' 'AND NOT submission.description:Resubmit*') ] submissions = {} submitted = {} for submission in datastore.get_submissions(sids): task = Task(submission) if not task.original_selected or not task.root_sha256 or not task.scan_key: continue if forge.determine_ingest_queue(task.root_sha256) != ingestq_name: continue scan_key = task.scan_key submissions[task.sid] = submission submitted[scan_key] = task.sid # Outstanding is the set of things Riak believes are being scanned. outstanding = set(submitted.keys()) # Keys is the set of things middleman believes are being scanned. keys = set(scanning.keys()) # Inflight is the set of submissions middleman and Riak agree are inflight. inflight = outstanding.intersection(keys) # Missing is the set of submissions middleman thinks are in flight but # according to Riak are not incomplete. missing = keys.difference(inflight) # Process the set of submissions Riak believes are incomplete but # middleman doesn't know about. for scan_key in outstanding.difference(inflight): sid = submitted.get(scan_key, None) if not sid: logger.info("Init: No sid found for incomplete") continue if not task.original_selected or not task.root_sha256 or not task.scan_key: logger.info("Init: Not root_sha256 or original_selected") continue submission = submissions[sid] task = Task(submission) if not task.metadata: logger.info("Init: Incomplete submission is not one of ours: %s", sid) stype = None try: stype = task.metadata.get('type', None) except: # pylint: disable=W0702 logger.exception( "Init: Incomplete submission has malformed metadata: %s", sid) if not stype: logger.info("Init: Incomplete submission missing type: %s", sid) raw = { 'metadata': task.metadata, 'overrides': get_submission_overrides(task, overrides), 'sha256': task.root_sha256, 'type': stype, } raw['overrides']['selected'] = task.original_selected reinsert(datastore, " (incomplete)", Notice(raw), logger) r = redis.StrictRedis(persistent['host'], persistent['port'], persistent['db']) # Duplicates is the set of sha256s where a duplicate queue exists. duplicates = [ x.replace(dup_prefix, '', 1) for x in r.keys(dup_prefix + '*') ] # Process the set of duplicates where no scanning or riak entry exists. for scan_key in set(duplicates).difference(outstanding.union(keys)): raw = dupq.pop(dup_prefix + scan_key, blocking=False) if not raw: logger.warning("Init: Couldn't pop off dup queue (%s)", scan_key) dupq.delete(dup_prefix + scan_key) continue reinsert(datastore, " (missed duplicate)", Notice(raw), logger) while True: res = completeq.pop(blocking=False) if not res: break scan_key = completed(Task(res)) try: missing.remove(scan_key) except: # pylint: disable=W0702 pass # Process the set of submissions middleman thinks are in flight but # according to Riak are not incomplete. for scan_key in missing: raw = scanning.pop(scan_key) if raw: reinsert(datastore, '', Notice(raw), logger, retry_all=False) # Set up time outs for all inflight submissions. expiry_time = now(max_time) for scan_key in inflight: # No need to lock. We're the only thing running at this point. timeouts.append(Timeout(scan_key, expiry_time)) signal.signal(signal.SIGINT, interrupt) signal.signal(signal.SIGTERM, interrupt) datastore.close()
def test_isotime_epoch(): epoch_date = now(200) assert epoch_date == local_to_epoch(epoch_to_local(epoch_date)) assert epoch_date == iso_to_epoch(epoch_to_iso(epoch_date)) assert isinstance(epoch_date, float)
def completed(task): # df node def sha256 = task.root_sha256 psid = task.psid score = task.score sid = task.sid scan_key = task.scan_key with ScanLock(scan_key): # Remove the entry from the hash of submissions in progress. raw = scanning.pop(scan_key) # df pull pop if not raw: logger.warning("Untracked submission (score=%d) for: %s %s", int(score), sha256, str(task.metadata)) # Not a result we care about. We are notified for every # submission that completes. Some submissions will not be ours. if task.metadata: stype = None try: stype = task.metadata.get('type', None) except: # pylint: disable=W0702 logger.exception("Malformed metadata: %s:", sid) if not stype: return scan_key if (task.description or '').startswith(default_prefix): raw = { 'metadata': task.metadata, 'overrides': get_submission_overrides(task, overrides), 'sha256': sha256, 'type': stype, } finalize(psid, sid, score, Notice(raw)) return scan_key errors = task.raw.get('error_count', 0) file_count = task.raw.get('file_count', 0) ingester_counts.increment('ingest.submissions_completed') ingester_counts.increment('ingest.files_completed', file_count) ingester_counts.increment('ingest.bytes_completed', int(task.size or 0)) notice = Notice(raw) with cache_lock: _add(scan_key, psid, sid, score, errors, now()) finalize(psid, sid, score, notice) # df push calls def exhaust(): while True: res = dupq.pop( # df pull pop dup_prefix + scan_key, blocking=False) if res is None: break yield res # You may be tempted to remove the assignment to dups and use the # value directly in the for loop below. That would be a mistake. # The function finalize may push on the duplicate queue which we # are pulling off and so condensing those two lines creates a # potential infinite loop. dups = [dup for dup in exhaust()] for dup in dups: finalize(psid, sid, score, Notice(dup)) return scan_key