def queue_bulk_threats(self, atom_list, payload): hashkey_created = [] bulk_in_flight = [] # bulk task uuid unchecked for batch in split_list(atom_list, self._batch_size()): if len(bulk_in_flight) >= self.OCD_DTL_MAX_BULK_THREATS_IN_FLIGHT: bulk_threat_task_uuid = bulk_in_flight.pop(0) hashkey_created += self.check_bulk_threats_added(bulk_threat_task_uuid) payload['atom_values'] = '\n'.join(batch) # Raw csv expected response = self.datalake_requests(self.url, 'post', self._post_headers(), payload) task_uid = response.get('task_uuid') if task_uid: bulk_in_flight.append(response['task_uuid']) else: logger.warning(f'batch of threats from {batch[0]} to {batch[-1]} failed to be created') # Finish to check the other bulk tasks for bulk_threat_task_uuid in bulk_in_flight: hashkey_created += self.check_bulk_threats_added(bulk_threat_task_uuid) nb_threats = len(hashkey_created) if nb_threats > 0: ok_sign = '\x1b[0;30;42m' + ' OK ' + '\x1b[0m' logger.info(f'Created {nb_threats} threats'.ljust(self.terminal_size - 6, ' ') + ok_sign) else: ko_sign = '\x1b[0;30;41m' + ' KO ' + '\x1b[0m' logger.info(f'Failed to create any threats'.ljust(self.terminal_size - 6, ' ') + ko_sign) return set(hashkey_created)
def check_bulk_threats_added(self, bulk_threat_task_uuid) -> list: """Check if the bulk manual threat submission completed successfully and if so return the hashkeys created""" def is_completed_task(json_response): return json_response['state'] in ('DONE', 'CANCELLED') hashkey_created = [] url = self._build_url_for_endpoint('retrieve-threats-manual-bulk') try: response = self.handle_bulk_task( bulk_threat_task_uuid, url, timeout=self.OCD_DTL_MAX_BULK_THREATS_TIME, additional_checks=[is_completed_task] ) except TimeoutError: response = {} hashkeys = response.get('hashkeys') atom_values = response.get('atom_values') # if the state is not DONE we consider the batch a failure if hashkeys and response.get('state', 'CANCELLED') == 'DONE': for hashkey in hashkeys: hashkey_created.append(hashkey) else: # default values in case the json is missing some fields hashkeys = hashkeys or ['<missing value>'] atom_values = atom_values or ['<missing value>'] logger.warning(f'batch of threats from {atom_values[0]}({hashkeys[0]}) to {atom_values[-1]}({hashkeys[-1]})' f' failed to be created during task {bulk_threat_task_uuid}') return hashkey_created
def defang_threats(threats, atom_type): defanged = [] # matches urls like http://www.website.com:444/file.html standard_url_regex = re.compile(r'^(https?:\/\/)[a-z0-9]+([\-\.][a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$') # matches urls like http://185.25.5.3:8080/result.php (ipv4 or ipv6) ip_url_regex = re.compile(r'^(https?:\/\/)[0-9a-zA-Z]{1,4}([\.:][0-9a-zA-Z]{1,4}){3,7}(:[0-9]{1,5})?(\/.*)?$') for threat in threats: unmodified_threat = threat threat = threat.replace('[.]', '.') threat = threat.replace('(.)', '.') if atom_type == 'url': if not threat.startswith('http'): if threat.startswith('hxxp'): threat = threat.replace('hxxp', 'http') elif threat.startswith('ftp'): threat = threat.replace('ftp', 'http') elif threat.startswith('sftp'): threat = threat.replace('sftp', 'https') else: threat = 'http://' + threat if not standard_url_regex.match(threat) and not ip_url_regex.match(threat): logger.warning(f'\'{unmodified_threat}\' has been modified as \'{threat}\' but is still not recognized' f' as an url. Skipping this line') continue if unmodified_threat != threat: logger.info(f'\'{unmodified_threat}\' has been modified as \'{threat}\'') defanged.append(threat) return defanged
def datalake_requests(self, url: str, method: str, headers: dict, post_body: dict = None): """ Use it to request the API """ self.headers = headers tries_left = self.SET_MAX_RETRY logger.debug( self._pretty_debug_request(url, method, post_body, headers, self.tokens)) if not headers.get('Authorization'): fresh_tokens = self.token_generator.get_token() self.tokens = [ f'Token {fresh_tokens["access_token"]}', f'Token {fresh_tokens["refresh_token"]}' ] headers['Authorization'] = self.tokens[0] while True: response = self._send_request(url, method, headers, post_body) logger.debug(f'API response:\n{str(response.text)}') if response.status_code == 401: logger.warning( 'Token expired or Missing authorization header. Updating token' ) self._token_update(self._load_response(response)) elif response.status_code == 422: logger.warning('Bad authorization header. Updating token') logger.debug(f'422 HTTP code: {response.text}') self._token_update(self._load_response(response)) elif response.status_code < 200 or response.status_code > 299: logger.error( f'API returned non 2xx response code : {response.status_code}\n{response.text}' f'\n Retrying') else: try: dict_response = self._load_response(response) return dict_response except JSONDecodeError: logger.error( 'Request unexpectedly returned non dict value. Retrying' ) tries_left -= 1 if tries_left <= 0: logger.error( 'Request failed: Will return nothing for this request') return {}
def post_new_score_from_list(self, hashkeys: list, scores: Dict[str, int], override_type: str = 'temporary') -> list: """ Post new score to the API from a list of hashkeys """ return_value = [] for hashkey in hashkeys: response = self._post_new_score(hashkey, scores, override_type) if response.get('message'): logger.warning('\x1b[6;30;41m' + hashkey + ': ' + response.get('message') + '\x1b[0m') return_value.append(hashkey + ': ' + response.get('message')) else: return_value.append(hashkey + ': OK') logger.info('\x1b[6;30;42m' + hashkey + ': OK\x1b[0m') return return_value
def post_tags(self, hashkeys: Set[str], tags: List[str], *, public=True) -> list: """ Post tags on threat hashkeys """ visibility = 'public' if public else 'organization' return_value = [] for hashkey in hashkeys: response = self._post_tags_to_hashkey(hashkey, tags, visibility) if response.get('message'): logger.warning('\x1b[6;30;41m' + hashkey + ': ' + response.get('message') + '\x1b[0m') return_value.append(hashkey + ': ' + response.get('message')) else: return_value.append(hashkey + ': OK') logger.info('\x1b[6;30;42m' + hashkey + ': OK\x1b[0m') return return_value
def post_comments_and_tags_from_list(self, hashkeys: Set[str], content: str, tags: list, *, public=True) -> list: """ Post comments and tag on threats hashkey """ visibility = 'public' if public else 'organization' return_value = [] for hashkey in hashkeys: response = self._post_comments_and_tags(hashkey, content, tags, visibility) if response.get('message'): logger.warning('\x1b[6;30;41m' + hashkey + ': ' + response.get('message') + '\x1b[0m') return_value.append(hashkey + ': ' + response.get('message')) else: return_value.append(hashkey + ': OK') logger.info('\x1b[6;30;42m' + hashkey + ': OK\x1b[0m') return return_value
def datalake_requests(self, url: str, method: str, headers: dict, post_body: dict = None): """ Use it to request the API. """ tries_left = self.SET_MAX_RETRY api_response = None logger.debug( self._pretty_debug_request(url, method, post_body, headers, self.tokens)) if not headers.get('Authorization'): fresh_tokens = self.token_generator.get_token() self.tokens = [ f'Token {fresh_tokens["access_token"]}', f'Token {fresh_tokens["refresh_token"]}' ] headers['Authorization'] = self.tokens[0] while tries_left > 0: try: response = self._send_request(url, method, headers, post_body) dict_response = self._load_response(response) if self._token_update(dict_response): return dict_response except: tries_left -= 1 if tries_left <= 0: logger.warning( 'Request failed: Will return nothing for this request') return {} elif not api_response: logger.debug( 'ERROR : Something has gone wrong with requests ...') logger.debug('sleep 5 seconds') time.sleep(5) else: logger.warning( 'ERROR : Wrong requests, please refer to the API') logger.warning( f'for URL: {url}\nwith:\nheaders:{headers}\nbody:{post_body}\n' ) logger.warning(api_response.text)
def main(override_args=None): """Method to start the script""" # Load initial args starter = BaseScripts() parser = starter.start( 'Gets threats or hashkeys from given atom types and atom values.') supported_atom_types = parser.add_argument_group('Supported Atom Types') parser.add_argument( 'untyped_atoms', help= 'untyped atom values to lookup. Useful when you do not know what is the atom type', nargs='*', ) for atom_type in ATOM_TYPES_FLAGS: supported_atom_types.add_argument( f'--{atom_type}', action='append', help=f'set a single {atom_type} atom type with its value', ) parser.add_argument( '-ad', '--atom-details', dest='hashkey_only', default=True, action='store_false', help='returns threats full details', ) parser.add_argument( '-i', '--input', action='append', help='read threats to add from FILE. [atomtype:path/to/file.txt]', ) parser.add_argument( '-ot', '--output-type', help='set to the output type desired {json,csv}. Default is json', ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() logger.debug(f'START: bulk_lookup_threats.py') # create output type header accept_header = {'Accept': None} if args.output_type: try: accept_header['Accept'] = BaseEngine.output_type2header( args.output_type) except ParserError as e: logger.exception( f'Exception raised while getting output type headers # {str(e)}', exc_info=False) exit(1) # to gather all typed atoms passed by arguments and input files typed_atoms = {} # set validations flags regarding the presence or absence of cli arguments has_file = False if args.input is None else True has_flag = False for flag in ATOM_TYPES_FLAGS: atom_values = getattr(args, flag) if atom_values is not None: typed_atoms[flag] = atom_values has_flag = True # validate that at least there is one untyped atom or one atom or one input file if (not has_flag and not has_file and not args.untyped_atoms) or (SUBCOMMAND_NAME in args.untyped_atoms): parser.error( "you must provide at least one of following: untyped atom, atom type, input file." ) # process input files if has_file: for input_file in args.input: file_atom_type, filename = get_atom_type_from_filename(input_file) logger.debug(f'file {filename} was recognized as {file_atom_type}') if file_atom_type == UNTYPED_ATOM_TYPE: args.untyped_atoms += starter._load_list(filename) else: typed_atoms.setdefault(file_atom_type, []).extend(starter._load_list(filename)) # load api_endpoints and tokens endpoints_config, main_url, tokens = starter.load_config(args) post_engine_bulk_lookup_threats = BulkLookupThreats( endpoints_config, args.env, tokens) post_engine_atom_values_extractor = AtomValuesExtractor( endpoints_config, args.env, tokens) # lookup for atom types if args.untyped_atoms: atoms_values_extractor_response = post_engine_atom_values_extractor.atom_values_extract( args.untyped_atoms) if atoms_values_extractor_response['found'] > 0: typed_atoms = join_dicts( typed_atoms, atoms_values_extractor_response['results']) else: logger.warning('none of your untyped atoms could be typed') # find out what atoms couldn't be typed for printing them if atoms_values_extractor_response['not_found'] > 0: for atom_type, atom_list in atoms_values_extractor_response[ 'results'].items(): args.untyped_atoms = [ untyped_atom for untyped_atom in args.untyped_atoms if untyped_atom not in atoms_values_extractor_response['results'][atom_type] ] logger.warning( f'\x1b[6;37;43m{"#" * 60} UNTYPED ATOMS {"#" * 47}\x1b[0m') logger.warning('\n'.join(args.untyped_atoms)) logger.warning('') response = post_engine_bulk_lookup_threats.bulk_lookup_threats( threats=typed_atoms, additional_headers=accept_header, hashkey_only=args.hashkey_only) pretty_print(response, args.output_type) if args.output: starter.save_output(args.output, response) logger.debug(f'Results saved in {args.output}\n') logger.debug(f'END: lookup_threats.py')