def ingester(): # df node def # pylint:disable=R0912 datastore = forge.get_datastore() user_groups = {} # Move from ingest to unique and waiting queues. # While there are entries in the ingest queue we consume chunk_size # entries at a time and move unique entries to uniqueq / queued and # duplicates to their own queues / waiting. while running: while True: result = completeq.pop(blocking=False) # df pull pop if not result: break completed(Task(result)) # df push calls entry = ingestq.pop(timeout=1) # df pull pop if not entry: continue trafficq.push(entry) # df push push sha256 = entry.get('sha256', '') if not sha256 or len(sha256) != 64: logger.error("Invalid sha256: %s", entry) continue entry['md5'] = entry.get('md5', '').lower() entry['sha1'] = entry.get('sha1', '').lower() entry['sha256'] = sha256.lower() ingest(datastore, user_groups, entry) # df push calls datastore.close()
def send_raw(self, raw, shards=None): if not shards: config = forge.get_config() shards = config.core.dispatcher.shards task = Task(raw) self.send(task, shards)
def watch(cls, sid, watch_queue): t = Task.watch(**{ 'priority': config.submissions.max.priority, 'sid': sid, 'watch_queue': watch_queue, }) n = forge.determine_dispatcher(sid) forge.get_control_queue('control-queue-' + str(n)).push(t.raw)
def _send_control_queue_call(cls, shard, state, **kw): name = reply_queue_name(state) kw.update({ 'state': state, 'watch_queue': name, }) t = Task({}, **kw) forge.get_control_queue('control-queue-' + str(shard)).push(t.raw) nq = NamedQueue(name) return nq.pop(timeout=5)
def _do_work(self, raw_task): """ Complete an incoming work item. Note: This will block while a service is executing the task. For some services this could be many seconds or even minutes. """ assert not isinstance(raw_task, list) task = Task(raw_task) # noinspection PyProtectedMember self.service._handle_task(task) self.work_count.value += 1
def redispatch(self, name, sid, srl, service, reason, now): entry = None try: entry = self.entries[sid][srl] except KeyError: return False try: stage = self.service_manager.stage_by_name(service.name) d = getattr(entry, name)[stage] c = entry.completed_services[stage] if service.name in c or d and service.name in d: return False log.info("%s for %s: %s/%s", reason, service.name, sid, srl) self.dispatch(service, entry, now) return True except Exception as ex: #pylint: disable=W0703 trace = get_stacktrace_info(ex) log.error("Couldn't redispatch to %s for %s/%s: %s", service.name, sid, srl, trace) response = Task(deepcopy(entry.task.raw)) response.watermark(service.name, '') response.nonrecoverable_failure(trace) self.storage_queue.push({ 'type': 'error', 'name': service.name, 'response': response, }) return False
def process(self, msg): func = None task = Task.wrap(msg) if not msg: log.warning("Got 'None' msg") return try: func = self.__getattribute__(task.state) except AttributeError: log.warning('Unknown message type: %s', task.state) try: func(task) except Exception as ex: #pylint: disable=W0703 trace = get_stacktrace_info(ex) log.error('Problem processing %s: %s', pformat(task.raw), trace)
def _check_time_drift(self): dispatcher = '0' name = reply_queue_name('cli_get_time') t = Task({}, **{ 'state': 'get_system_time', 'watch_queue': name, }) forge.get_control_queue('control-queue-' + dispatcher).push(t.raw) nq = NamedQueue(name) r = nq.pop(timeout=5) if r is None or 'time' not in r: self.log.warn('timed out trying to determine dispatchers clock.') return clock_difference = abs(r['time'] - time.time()) if clock_difference > 600: self.log.info( 'Dispatchers clock %s away from ours. Clocks are not set correctly', clock_difference) else: self.log.debug('Clock drift from dispatcher: %s.', clock_difference)
logger.info("Monitoring the following service queues: %s", threshold) while True: queue_lengths = get_service_queue_lengths() over = { k: v for k, v in queue_lengths.iteritems() if v > (threshold.get(k, 0) or v) } for name, size in over.iteritems(): excess = size - threshold.get(name, size) if excess <= 0: continue for msg in get_queue(name).unpush(excess): # noinspection PyBroadException try: t = Task(msg) t.watermark(name, '') t.nonrecoverable_failure('Service busy.') t.cache_key = store.save_error(name, None, None, t) dispatch_queue.send_raw(t.as_dispatcher_response()) logger.info("%s is too busy to process %s.", name, t.srl) except: # pylint:disable=W0702 logger.exception('Problem sending response:') time.sleep(config.system.update_interval)
def dispatch(self, service, entry, now): task = entry.task sid = task.sid srl = task.srl name = service.name queue_size = self.queue_size[name] = self.queue_size.get(name, 0) + 1 entry.retries[name] = entry.retries.get(name, -1) + 1 if task.profile: if entry.retries[name]: log.info('%s Graph: "%s" -> "%s/%s" [label=%d];', sid, srl, srl, name, entry.retries[name]) else: log.info('%s Graph: "%s" -> "%s/%s";', sid, srl, srl, name) log.info('%s Graph: "%s/%s" [label=%s];', sid, srl, name, name) file_count = len(self.entries[sid]) + len(self.completed[sid]) # Warning: Please do not change the text of the error messages below. msg = None if self._service_is_down(service, now): msg = 'Service down.' elif entry.retries[name] > config.core.dispatcher.max.retries: msg = 'Max retries exceeded.' elif entry.retries[name] >= 1: log.debug("Retry sending %s/%s to %s", sid, srl, name) elif task.depth > config.core.dispatcher.max.depth: msg = 'Max depth exceeded.' elif file_count > config.core.dispatcher.max.files: msg = 'Max files exceeded.' if msg: log.debug(' '.join((msg, "Not sending %s/%s to %s." % \ (sid, srl, name)))) response = Task(deepcopy(task.raw)) response.watermark(name, '') response.nonrecoverable_failure(msg) self.storage_queue.push({ 'type': 'error', 'name': name, 'response': response, }) return False if service.skip(task): response = Task(deepcopy(task.raw)) response.watermark(name, '') response.success() q.send_raw(response.as_dispatcher_response()) return False # Setup an ack timeout. seconds = min(service.timeout * (queue_size + 5), 7200) task.ack_timeout = seconds task.sent = now service.proxy.execute(task.priority, task.as_service_request(name)) # Add the timeout to the end of its respective list. ack_timeout = self.ack_timeout lst = ack_timeout.get(seconds, []) lst.append(Timeout(sid, srl, name, now + seconds)) ack_timeout[seconds] = lst return True
def _drain(self): with self._current_work_items_lock: if not self._current_work_items: self.log.info('EXIT_DRAIN:0') return result_store = forge.get_datastore() dispatch_queue = forge.get_dispatch_queue() self.log.info('EXIT_DRAIN:%s', len(self._current_work_items)) for item in self._current_work_items: work = Task(item) task = Task({}) task.sid = work.sid task.srl = work.srl task.dispatch_queue = work.dispatch_queue task.classification = work.classification self.log.info("DRAIN: %s/%s", task.sid, task.srl) task.watermark(self.service_cls.SERVICE_NAME, None) task.recoverable_failure( 'Task was pre-empted (shutdown, vm revert or cull)') task.cache_key = result_store.save_error( self.service_cls.SERVICE_NAME, None, None, task) dispatch_queue.send_raw(task.as_dispatcher_response())
def scan_file(svc_class, sha256, **kwargs): logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) # Don't use srl normalization for filenames (i.e. 1/2/3/4/1234mysha256) # We use mocks for dispatcher, restore store etc that will inject the results into # these lists. dispatch_result_collector = mocks.MockDispatchCollector() result_store_good = {} result_store_bad = {} children = [] supplementary = [] cfg = forge.get_datastore().get_service(svc_class.SERVICE_NAME).get( "config", {}) import functools forge.get_filestore = functools.partial(mocks.get_local_transport, '.') forge.get_submit_client = functools.partial(mocks.get_mock_submit_client, children, supplementary) forge.get_dispatch_queue = lambda: dispatch_result_collector forge.get_datastore = functools.partial(mocks.get_mock_result_store, result_store_good, result_store_bad) service = svc_class(cfg) service.start_service() # Run all inputs through the service. Children will end up in the children list, # results will end up in the results list. Actual fleshed out service results # will be in riak. task = Task.create(srl=sha256, ignore_cache=True, submitter='local_soak_test', **kwargs) start = time.time() if service.BATCH_SERVICE: service._handle_task_batch([ task, ]) else: service._handle_task(task) end = time.time() duration = end - start print 'Duration: %s' % duration (serviced_ok, serviced_fail_recover, serviced_fail_nonrecover ) = dispatch_result_collector.get_serviced_results() for response in chain(serviced_ok, serviced_fail_recover, serviced_fail_nonrecover): # TODO: we should be able to find it by key in our result_store_good if 'response' in response and 'cache_key' in response['response']: if response['response']['cache_key'] not in result_store_good: print "Appear to be missing result in result store" pprint.pprint(response) for (_key, full_result) in result_store_good.items(): if full_result and 'result' in full_result: pprint.pprint(full_result) json.dumps(full_result, ensure_ascii=True).encode('utf-8') service.stop_service()
def init(): datastore = forge.get_datastore() datastore.commit_index('submission') sids = [ x['submission.sid'] for x in datastore.stream_search( 'submission', 'state:submitted AND times.submitted:[NOW-1DAY TO *] ' 'AND submission.metadata.type:* ' 'AND NOT submission.description:Resubmit*') ] submissions = {} submitted = {} for submission in datastore.get_submissions(sids): task = Task(submission) if not task.original_selected or not task.root_sha256 or not task.scan_key: continue if forge.determine_ingest_queue(task.root_sha256) != ingestq_name: continue scan_key = task.scan_key submissions[task.sid] = submission submitted[scan_key] = task.sid # Outstanding is the set of things Riak believes are being scanned. outstanding = set(submitted.keys()) # Keys is the set of things middleman believes are being scanned. keys = set(scanning.keys()) # Inflight is the set of submissions middleman and Riak agree are inflight. inflight = outstanding.intersection(keys) # Missing is the set of submissions middleman thinks are in flight but # according to Riak are not incomplete. missing = keys.difference(inflight) # Process the set of submissions Riak believes are incomplete but # middleman doesn't know about. for scan_key in outstanding.difference(inflight): sid = submitted.get(scan_key, None) if not sid: logger.info("Init: No sid found for incomplete") continue if not task.original_selected or not task.root_sha256 or not task.scan_key: logger.info("Init: Not root_sha256 or original_selected") continue submission = submissions[sid] task = Task(submission) if not task.metadata: logger.info("Init: Incomplete submission is not one of ours: %s", sid) stype = None try: stype = task.metadata.get('type', None) except: # pylint: disable=W0702 logger.exception( "Init: Incomplete submission has malformed metadata: %s", sid) if not stype: logger.info("Init: Incomplete submission missing type: %s", sid) raw = { 'metadata': task.metadata, 'overrides': get_submission_overrides(task, overrides), 'sha256': task.root_sha256, 'type': stype, } raw['overrides']['selected'] = task.original_selected reinsert(datastore, " (incomplete)", Notice(raw), logger) r = redis.StrictRedis(persistent['host'], persistent['port'], persistent['db']) # Duplicates is the set of sha256s where a duplicate queue exists. duplicates = [ x.replace(dup_prefix, '', 1) for x in r.keys(dup_prefix + '*') ] # Process the set of duplicates where no scanning or riak entry exists. for scan_key in set(duplicates).difference(outstanding.union(keys)): raw = dupq.pop(dup_prefix + scan_key, blocking=False) if not raw: logger.warning("Init: Couldn't pop off dup queue (%s)", scan_key) dupq.delete(dup_prefix + scan_key) continue reinsert(datastore, " (missed duplicate)", Notice(raw), logger) while True: res = completeq.pop(blocking=False) if not res: break scan_key = completed(Task(res)) try: missing.remove(scan_key) except: # pylint: disable=W0702 pass # Process the set of submissions middleman thinks are in flight but # according to Riak are not incomplete. for scan_key in missing: raw = scanning.pop(scan_key) if raw: reinsert(datastore, '', Notice(raw), logger, retry_all=False) # Set up time outs for all inflight submissions. expiry_time = now(max_time) for scan_key in inflight: # No need to lock. We're the only thing running at this point. timeouts.append(Timeout(scan_key, expiry_time)) signal.signal(signal.SIGINT, interrupt) signal.signal(signal.SIGTERM, interrupt) datastore.close()
def _do_work(self, work): tasks = [Task(raw) for raw in work] # noinspection PyProtectedMember self.service._handle_task_batch(tasks)
def submit(cls, transport, storage, sha256, path, priority, submitter, **kw): """ Execute a submit. Any kw are passed along in the dispatched request. """ assert_valid_sha256(sha256) queue = forge.get_dispatch_queue() classification = kw['classification'] kw['max_extracted'] = max_extracted(kw) kw['max_supplementary'] = max_supplementary(kw) kw['ttl'] = ttl = effective_ttl(kw) kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl) # By the time submit is called, either the file was in our cache # and we freshed its ttl or the client has successfully transfered # the file to us. local_path = transport.local_path(sha256) if not transport.exists(sha256): raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport))) root_sha256 = sha256 temporary_path = massaged_path = None try: if not local_path: temporary_path = tempfile.mktemp(prefix="submission.submit") transport.download(sha256, temporary_path) local_path = temporary_path fileinfo = identify.fileinfo(local_path) if fileinfo['sha256'] != sha256: raise CorruptedFileStoreException('SHA256 mismatch between received ' 'and calculated sha256. %s != %s' % (sha256, fileinfo['sha256'])) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) decode_file = forge.get_decode_file() massaged_path, _, fileinfo, al_meta = decode_file(local_path, fileinfo) if massaged_path: local_path = massaged_path sha256 = fileinfo['sha256'] transport.put(local_path, sha256) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) ignore_size = kw.get('ignore_size', False) max_size = config.submissions.max.size if fileinfo['size'] > max_size and not ignore_size: msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size) raise SubmissionException(msg) # We'll just merge the mandatory arguments, fileinfo, and any # optional kw and pass those all on to the dispatch callback. task_args = fileinfo task_args.update(kw) task_args.update({ 'original_selected': kw.get('selected', []), 'root_sha256': root_sha256, 'srl': sha256, 'sha256': sha256, 'priority': priority, 'submitter': submitter, 'path': safe_str(path)}) if 'metadata' in task_args: task_args['metadata'].update(al_meta) else: task_args['metadata'] = al_meta submit_task = Task.create(**task_args) if submit_task.is_initial(): storage.create_submission( submit_task.sid, submit_task.as_submission_record(), [(os.path.basename(path), submit_task.srl)]) log.debug("Submission complete. Dispatching: %s", submit_task) queue.send(submit_task, shards=SHARDS) return submit_task.raw finally: if massaged_path: try: os.unlink(massaged_path) except: # pylint:disable=W0702 pass if temporary_path: try: os.unlink(temporary_path) except: # pylint:disable=W0702 pass
def submit_inline(cls, storage, transport, file_paths, **kw): """ Submit local samples to the submission service. submit_inline can be used when the sample to submit is already local to the submission service. It does the presumit, filestore upload and submit. Any kw are passed to the Task created to dispatch this submission. """ classification = kw['classification'] kw['max_extracted'] = max_extracted(kw) kw['max_supplementary'] = max_supplementary(kw) kw['ttl'] = ttl = effective_ttl(kw) kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl) submissions = [] file_tuples = [] dispatch_request = None # Generate static fileinfo data for each file. for file_path in file_paths: file_name = os.path.basename(file_path) fileinfo = identify.fileinfo(file_path) ignore_size = kw.get('ignore_size', False) max_size = config.submissions.max.size if fileinfo['size'] > max_size and not ignore_size: msg = "File too large (%d > %d). Submission Failed" % \ (fileinfo['size'], max_size) raise SubmissionException(msg) decode_file = forge.get_decode_file() temp_path, original_name, fileinfo, al_meta = \ decode_file(file_path, fileinfo) if temp_path: file_path = temp_path if not original_name: original_name = os.path.splitext(file_name)[0] file_name = original_name sha256 = fileinfo['sha256'] storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) file_tuples.append((file_name, sha256)) if not transport.exists(sha256): log.debug('File not on remote filestore. Uploading %s', sha256) transport.put(file_path, sha256, location='near') if temp_path: os.remove(temp_path) # We'll just merge the mandatory arguments, fileinfo, and any # optional kw and pass those all on to the dispatch callback. task_args = fileinfo task_args['priority'] = 0 # Just a default. task_args.update(kw) task_args['srl'] = sha256 task_args['original_filename'] = file_name task_args['path'] = file_name if 'metadata' in task_args: task_args['metadata'].update(al_meta) else: task_args['metadata'] = al_meta dispatch_request = Task.create(**task_args) submissions.append(dispatch_request) storage.create_submission( dispatch_request.sid, dispatch_request.as_submission_record(), file_tuples) dispatch_queue = forge.get_dispatch_queue() for submission in submissions: dispatch_queue.submit(submission) log.debug("Submission complete. Dispatched: %s", dispatch_request) # Ugly - fighting with task to give UI something that makes sense. file_result_tuples = \ zip(file_paths, [dispatch_request.raw for dispatch_request in submissions]) result = submissions[0].raw.copy() fileinfos = [] for filename, result in file_result_tuples: finfo = result['fileinfo'] finfo['original_filename'] = os.path.basename(filename) finfo['path'] = finfo['original_filename'] fileinfos.append(finfo) result['fileinfo'] = fileinfos return result
def submit_multi(cls, storage, transport, files, **kw): """ Submit all files into one submission submit_multi can be used when all the files are already present in the file storage. files is an array of (name, sha256) tuples Any kw are passed to the Task created to dispatch this submission. """ sid = str(uuid.uuid4()) classification = kw['classification'] kw['max_extracted'] = max_extracted(kw) kw['max_supplementary'] = max_supplementary(kw) kw['ttl'] = ttl = effective_ttl(kw) kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl) submissions = [] temporary_path = None dispatch_request = None # Generate static fileinfo data for each file. for name, sha256 in files: local_path = transport.local_path(sha256) if not transport.exists(sha256): raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport))) try: if not local_path: temporary_path = tempfile.mktemp(prefix="submission.submit_multi") transport.download(sha256, temporary_path) local_path = temporary_path fileinfo = identify.fileinfo(local_path) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) decode_file = forge.get_decode_file() massaged_path, new_name, fileinfo, al_meta = \ decode_file(local_path, fileinfo) if massaged_path: name = new_name local_path = massaged_path sha256 = fileinfo['sha256'] if not transport.exists(sha256): transport.put(local_path, sha256) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) ignore_size = kw.get('ignore_size', False) max_size = config.submissions.max.size if fileinfo['size'] > max_size and not ignore_size: msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size) raise SubmissionException(msg) # We'll just merge the mandatory arguments, fileinfo, and any # optional kw and pass those all on to the dispatch callback. task_args = fileinfo task_args['priority'] = 0 # Just a default. task_args.update(kw) task_args['srl'] = sha256 task_args['original_filename'] = name task_args['sid'] = sid task_args['path'] = name if 'metadata' in task_args: task_args['metadata'].update(al_meta) else: task_args['metadata'] = al_meta dispatch_request = Task.create(**task_args) submissions.append(dispatch_request) finally: if temporary_path: try: os.unlink(temporary_path) except: # pylint: disable=W0702 pass storage.create_submission( dispatch_request.sid, dispatch_request.as_submission_record(), files) dispatch_queue = forge.get_dispatch_queue() for submission in submissions: dispatch_queue.submit(submission) log.debug("Submission complete. Dispatched: %s", dispatch_request) return submissions[0].raw.copy()