def main(override_args=None): starter = BaseScripts() logger.debug(f'START: get_query_hash.py') # Load initial args parser = starter.start('Retrieve a query hash from a query body (a json used for the Advanced Search).') required_named = parser.add_argument_group('required arguments') required_named.add_argument( 'query_body_path', help='path to the json file containing the query body', ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() # Load api_endpoints and tokens endpoint_config, main_url, tokens = starter.load_config(args) with open(args.query_body_path, 'r') as query_body_file: query_body = json.load(query_body_file) logger.debug(f'Retrieving query hash for query body: {query_body}') advanced_search = AdvancedSearch(endpoint_config, args.env, tokens) response = advanced_search.get_threats(query_body, limit=0) if not response or 'query_hash' not in response: logger.error("Couldn't retrieve a query hash, is the query body valid ?") exit(1) query_hash = response['query_hash'] if args.output: with open(args.output, 'w') as output: output.write(query_hash) logger.info(f'Query hash saved in {args.output}') else: logger.info(f'Query hash associated: {query_hash}')
def _handle_bulk_search_task(self, task_uuid): retrieve_bulk_result_url = self._build_url_for_endpoint( 'retrieve-bulk-search') retrieve_bulk_result_url = retrieve_bulk_result_url.format( task_uuid=task_uuid) start_time = time() back_off_time = 10 json_response = None while not json_response: response = requests.get(url=retrieve_bulk_result_url, headers={'Authorization': self.tokens[0]}) if response.status_code == 200: json_response = response.json() elif response.status_code == 401: logger.debug('Refreshing expired Token') self._token_update(response.json()) elif time( ) - start_time + back_off_time < self.OCD_DTL_MAX_BULK_SEARCH_TIME: sleep(back_off_time) back_off_time = min(back_off_time * 2, self.OCD_DTL_MAX_BACK_OFF_TIME) else: logger.error() raise TimeoutError( f'No bulk search result after waiting {self.OCD_DTL_MAX_BULK_SEARCH_TIME / 60:.0f} mins\n' f'task_uuid: "{task_uuid}"') return json_response
def refresh_token(self, refresh_token: str): """ Refresh the current token :param refresh_token: str """ logger.debug('Token will be refresh') return self.retrieve_token({'Authorization': refresh_token}, True)
def main(override_args=None): """Method to start the script""" starter = BaseScripts() logger.debug(f'START: get_threats_from_query_hash.py') # Load initial args parser = starter.start( 'Retrieve a list of response from a given query hash.') parser.add_argument( '--query_fields', help= 'fields to be retrieved from the threat (default: only the hashkey)\n' 'If an atom detail isn\'t present in a particular atom, empty string is returned.', nargs='+', default=['threat_hashkey'], ) parser.add_argument( '--list', help= 'Turn the output in a list (require query_fields to be a single element)', action='store_true', ) required_named = parser.add_argument_group('required arguments') required_named.add_argument( 'query_hash', help='the query hash from which to retrieve the response hashkeys', ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() if len(args.query_fields) > 1 and args.list: parser.error( "List output format is only available if a single element is queried (via query_fields)" ) # Load api_endpoints and tokens endpoint_config, main_url, tokens = starter.load_config(args) logger.debug( f'Start to search for threat from the query hash:{args.query_hash}') bulk_search = BulkSearch(endpoint_config, args.env, tokens) response = bulk_search.get_threats(args.query_hash, args.query_fields) original_count = response.get('count', 0) logger.info(f'Number of threat that have been retrieved: {original_count}') formatted_output = format_output(response, args.list) if args.output: with open(args.output, 'w') as output: output.write(formatted_output) else: logger.info(formatted_output) if args.output: logger.info(f'Threats saved in {args.output}') else: logger.info('Done')
def main(override_args=None): """Method to start the script""" starter = BaseScripts() # Load initial args parser = starter.start( 'Add tags and/or comments to a specified list of hashkeys.') parser.add_argument( 'hashkeys', help='hashkeys of the threat to add tags and/or the comment', nargs='*', ) parser.add_argument( '-i', '--input_file', help='hashkey txt file, with one hashkey by line', ) parser.add_argument( '-p', '--public', help='set the visibility to public', action='store_true', ) parser.add_argument( '--tags', nargs='+', help='add a list of tags', required=True, ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() # Load api_endpoints and tokens endpoint_config, main_url, tokens = starter.load_config(args) post_engine_add_comments = TagsPost(endpoint_config, args.env, tokens) if not args.hashkeys and not args.input_file: parser.error("either a hashkey or an input_file is required") hashkeys = set(args.hashkeys) if args.hashkeys else set() if args.input_file: retrieve_hashkeys_from_file(args.input_file, hashkeys) response_dict = post_engine_add_comments.post_tags( hashkeys, args.tags, public=args.public, ) if args.output: starter.save_output(args.output, response_dict) logger.debug(f'Results saved in {args.output}\n') logger.debug(f'END: add_tags.py')
def _post_comment(self, hashkey: str, comment: str, visibility: str = 'organization') -> dict: """ Post comment on threat hashkey """ payload = { 'content': comment, 'visibility': visibility, } url = self.url.format(hashkey=hashkey) logger.debug(url) return self.datalake_requests(url, 'post', self._post_headers(), payload)
def handle_bulk_task(self, task_uuid, retrieve_bulk_result_url, *, timeout, additional_checks: List[Check] = None) \ -> Json: """ Handle a generic bulk task, blocking until the task is done or the timeout is up :param task_uuid: uuid of the bulk task :param retrieve_bulk_result_url: endpoint to query, must contained a task_uuid field :param timeout: timeout after which a TimeoutError is raised :param additional_checks: functions to call on a potential json, if all checks return True, the Json is returned :return: a Json returned on HTTP 200 validating all additional_checks """ retrieve_bulk_result_url = retrieve_bulk_result_url.format(task_uuid=task_uuid) spinner = None if logger.isEnabledFor(logging.INFO): spinner = Halo(text=f'Waiting for bulk task {task_uuid} response', spinner='dots') spinner.start() start_time = time() back_off_time = 10 json_response = None while not json_response: response = requests.get( url=retrieve_bulk_result_url, headers={'Authorization': self.tokens[0]}, verify=self.requests_ssl_verify ) if response.status_code == 200: potential_json_response = response.json() if additional_checks and not all(check(potential_json_response) for check in additional_checks): continue # the json isn't valid if spinner: spinner.succeed(f'bulk task {task_uuid} done') json_response = potential_json_response elif response.status_code == 401: logger.debug('Refreshing expired Token') self._token_update(response.json()) elif time() - start_time + back_off_time < timeout: sleep(back_off_time) back_off_time = min(back_off_time * 2, self.OCD_DTL_MAX_BACK_OFF_TIME) else: if spinner: spinner.fail(f'bulk task {task_uuid} timeout') logger.error() raise TimeoutError( f'No bulk result after waiting {timeout / 60:.0f} mins\n' f'task_uuid: "{task_uuid}"' ) if spinner: spinner.stop() return json_response
def main(override_args=None): """Method to start the script""" starter = BaseScripts() logger.debug(f'START: get_threats_by_hashkey.py') # Load initial args parser = starter.start( 'Retrieve threats (as Json) from a list of ids (hashkeys)') parser.add_argument( 'hashkeys', help='hashkeys of the threats to retreive', nargs='*', ) parser.add_argument( '-i', '--input_file', help='list of threats ids (hashkeys) that need to be retrieved', ) parser.add_argument( '--lost', help='saved hashes that were not found', ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() if not args.hashkeys and not args.input_file: parser.error("either a hashkey or an input_file is required") threats_list = starter._load_csv( args.input_file) if args.input_file else args.hashkeys # Load api_endpoints and tokens endpoint_url, main_url, tokens = starter.load_config(args) logger.debug(f'TOTAL: {len(threats_list)} threats found') url_threats = main_url + endpoint_url['endpoints']['threats'] search_engine_threats = ThreatsSearch(url_threats, main_url, tokens) list_threats, list_lost_hashes = search_engine_threats.get_json( threats_list) if args.output: starter.save_output(args.output, list_threats) logger.debug(f'Threats JSON saved in {args.output}\n') if args.lost: starter.save_output(args.lost, list_lost_hashes) logger.debug(f'Threats lost saved in {args.lost}\n') logger.debug(f'END: get_threats_by_hashkey.py')
def _post_comments_and_tags(self, hashkey: str, content: str, tags: list, visibility: str = 'organization') -> dict: """ Post comments and tag on threats hashkey """ payload = { 'content': content, 'tags': tags, 'visibility': visibility, } logger.debug(f'{self.url}{hashkey}/comments/') return self.datalake_requests(f'{self.url}{hashkey}/comments/', 'post', self._post_headers(), payload)
def add_threats(self, atom_list: list, atom_type: str, is_whitelist: bool, threats_score: Dict[str, int], is_public: bool, tags: list, links: list, override_type: str) -> dict: """ Use it to add a list of threats to the API. :param atom_list: atoms that needs to be added. :param atom_type: must be one of the _authorized_atom_value :param is_whitelist: if true the score will be set to 0 :param threats_score: a dict that contain {threat_type -> score} :param is_public: if true the added threat will be public else will be reserved to organization :param tags: a list of tags to add :param links: external_analysis_link to include with each atoms :param override_type: either 'permanent' or 'temporary'. Permanent don't allow future automatic score change """ payload = { 'override_type': override_type, 'public': is_public, 'threat_data': { 'content': {}, 'scores': [], 'threat_types': [], 'tags': tags } } if is_whitelist: for threat in self.authorized_threats_value: payload['threat_data']['scores'].append({'score': {'risk': 0}, 'threat_type': threat}) payload['threat_data']['threat_types'].append(threat) else: for threat, score in threats_score.items(): payload['threat_data']['scores'].append({'score': {'risk': score}, 'threat_type': threat}) payload['threat_data']['threat_types'].append(threat) return_value = {'results': []} for atom in atom_list: if not atom: # empty value logger.info(f'EMPTY ATOM {atom.ljust(self.terminal_size - 6, " ")} \x1b[0;30;41m KO \x1b[0m') continue response_dict = self._add_new_atom(atom, atom_type, payload, links) if response_dict.get('atom_value'): logger.info(atom.ljust(self.terminal_size - 6, ' ') + '\x1b[0;30;42m' + ' OK ' + '\x1b[0m') return_value['results'].append(response_dict) else: logger.info(atom.ljust(self.terminal_size - 6, ' ') + '\x1b[0;30;41m' + ' KO ' + '\x1b[0m') logger.debug(response_dict) return return_value
def _post_tags_to_hashkey(self, hashkey: str, tags: List[str], visibility: str = 'organization') -> dict: """ Post tags on a single threat hashkey """ tags_payload = [] for tag in tags: tags_payload.append( { 'name': tag, 'visibility': visibility, } ) payload = { 'tags': tags_payload, } url = self.url.format(hashkey=hashkey) logger.debug(url) return self.datalake_requests(url, 'post', self._post_headers(), payload)
def _post_new_score(self, hashkey: str, scores: Dict[str, int], override_type: str = 'temporary') -> dict: """ Post new score to the API """ payload = {'override_type': override_type, 'scores': []} for threat_type, score in scores.items(): if score is None: return {'message': 'No score to modify'} payload['scores'].append({ 'threat_type': threat_type, 'score': { 'risk': score } }) logger.debug('url : ' + repr(self.url)) return self.datalake_requests(f'{self.url}{hashkey}/scoring-edits/', 'post', self._post_headers(), payload)
def refresh_token(self, refresh_token: str): """ Refresh the current token :param refresh_token: str """ logger.debug('Token will be refresh') headers = {'Authorization': refresh_token} response = requests.post(url=self.url_refresh, headers=headers, verify=self.requests_ssl_verify) json_response = json.loads(response.text) if response.status_code == 401 and json_response.get('msg') == 'Token has expired': logger.debug('Refreshing the refresh token') # Refresh token is also expired, we need to restart the authentication from scratch return self.get_token() elif 'access_token' in json_response: return json_response # else an error occurred logger.error(f'An error occurred while refreshing the refresh token, for URL: {self.url_refresh}\n' f'response of the API: {response.text}') exit(1)
def retrieve_token(self, data: dict, refresh_token: bool): """ Generate a token from data, if the refresh_token is set to True, then it will refresh a token, else it will create a new token. Variable data is the refresh token in case of refresh_token. Variable data is the header in case of not refresh_token. :param data: dict :param refresh_token: bool :return dict """ if refresh_token: raw_res = requests.post(url=self.url_refresh, headers=data) else: raw_res = requests.post(url=self.url_token, json=data) api_response = json.loads(raw_res.text) if 'access_token' in api_response.keys(): return api_response logger.debug('ERROR : Wrong requests, please refer to the API') logger.debug( f'for URL: {self.url_refresh if refresh_token else self.url_token}\n' ) logger.debug(raw_res.text) return
def _send_request(self, url: str, method: str, headers: dict, data: dict): """ Send the correct http request to url from method [get, post, delete, patch, put]. Raise a TypeError 'Unknown method to requests {method}' when the method is not one of the above. :param url: str :param method: str :param data: dict :param headers: dict :param tokens: list :return: str """ common_kwargs = { 'url': url, 'headers': headers, 'verify': self.requests_ssl_verify } if method == 'get': api_response = requests.get(**common_kwargs) elif method == 'post': api_response = requests.post(**common_kwargs, data=json.dumps(data)) elif method == 'delete': api_response = requests.delete(**common_kwargs, data=json.dumps(data)) elif method == 'patch': api_response = requests.patch(**common_kwargs, data=json.dumps(data)) elif method == 'put': api_response = requests.put(**common_kwargs, data=json.dumps(data)) else: logger.debug( 'ERROR : Wrong requests, please only do [get, post, put, patch, delete] method' ) raise TypeError('Unknown method to requests %s', method) return api_response
def datalake_requests(self, url: str, method: str, headers: dict, post_body: dict = None): """ Use it to request the API """ self.headers = headers tries_left = self.SET_MAX_RETRY logger.debug( self._pretty_debug_request(url, method, post_body, headers, self.tokens)) if not headers.get('Authorization'): fresh_tokens = self.token_generator.get_token() self.tokens = [ f'Token {fresh_tokens["access_token"]}', f'Token {fresh_tokens["refresh_token"]}' ] headers['Authorization'] = self.tokens[0] while True: response = self._send_request(url, method, headers, post_body) logger.debug(f'API response:\n{str(response.text)}') if response.status_code == 401: logger.warning( 'Token expired or Missing authorization header. Updating token' ) self._token_update(self._load_response(response)) elif response.status_code == 422: logger.warning('Bad authorization header. Updating token') logger.debug(f'422 HTTP code: {response.text}') self._token_update(self._load_response(response)) elif response.status_code < 200 or response.status_code > 299: logger.error( f'API returned non 2xx response code : {response.status_code}\n{response.text}' f'\n Retrying') else: try: dict_response = self._load_response(response) return dict_response except JSONDecodeError: logger.error( 'Request unexpectedly returned non dict value. Retrying' ) tries_left -= 1 if tries_left <= 0: logger.error( 'Request failed: Will return nothing for this request') return {}
def datalake_requests(self, url: str, method: str, headers: dict, post_body: dict = None): """ Use it to request the API. """ tries_left = self.SET_MAX_RETRY api_response = None logger.debug( self._pretty_debug_request(url, method, post_body, headers, self.tokens)) if not headers.get('Authorization'): fresh_tokens = self.token_generator.get_token() self.tokens = [ f'Token {fresh_tokens["access_token"]}', f'Token {fresh_tokens["refresh_token"]}' ] headers['Authorization'] = self.tokens[0] while tries_left > 0: try: response = self._send_request(url, method, headers, post_body) dict_response = self._load_response(response) if self._token_update(dict_response): return dict_response except: tries_left -= 1 if tries_left <= 0: logger.warning( 'Request failed: Will return nothing for this request') return {} elif not api_response: logger.debug( 'ERROR : Something has gone wrong with requests ...') logger.debug('sleep 5 seconds') time.sleep(5) else: logger.warning( 'ERROR : Wrong requests, please refer to the API') logger.warning( f'for URL: {url}\nwith:\nheaders:{headers}\nbody:{post_body}\n' ) logger.warning(api_response.text)
class BaseEngine: OCD_DTL_QUOTA_TIME = int(os.getenv('OCD_DTL_QUOTA_TIME', 1)) OCD_DTL_REQUESTS_PER_QUOTA_TIME = int( os.getenv('OCD_DTL_REQUESTS_PER_QUOTA_TIME', 5)) logger.debug( f'Throttle selected: {OCD_DTL_REQUESTS_PER_QUOTA_TIME} queries per {OCD_DTL_QUOTA_TIME}s' ) SET_MAX_RETRY = 3 def __init__(self, url: str, token_url: str, tokens: list): self.url = url self.token_url = token_url self.tokens = tokens self.terminal_size = self._get_size_terminal() self.token_generator = TokenGenerator(token_url) self.SET_MAX_RETRY = 3 def _get_size_terminal(self) -> int: """ Return the terminal size for pretty print """ stty_sizes = os.popen('stty size', 'r').read().split() if len(stty_sizes) >= 2: return int(stty_sizes[1]) else: # Return default terminal size return 80 @throttle( period=OCD_DTL_QUOTA_TIME, call_per_period=OCD_DTL_REQUESTS_PER_QUOTA_TIME, ) def datalake_requests(self, url: str, method: str, headers: dict, post_body: dict = None): """ Use it to request the API. """ tries_left = self.SET_MAX_RETRY api_response = None logger.debug( self._pretty_debug_request(url, method, post_body, headers, self.tokens)) if not headers.get('Authorization'): fresh_tokens = self.token_generator.get_token() self.tokens = [ f'Token {fresh_tokens["access_token"]}', f'Token {fresh_tokens["refresh_token"]}' ] headers['Authorization'] = self.tokens[0] while tries_left > 0: try: response = self._send_request(url, method, headers, post_body) dict_response = self._load_response(response) if self._token_update(dict_response): return dict_response except: tries_left -= 1 if tries_left <= 0: logger.warning( 'Request failed: Will return nothing for this request') return {} elif not api_response: logger.debug( 'ERROR : Something has gone wrong with requests ...') logger.debug('sleep 5 seconds') time.sleep(5) else: logger.warning( 'ERROR : Wrong requests, please refer to the API') logger.warning( f'for URL: {url}\nwith:\nheaders:{headers}\nbody:{post_body}\n' ) logger.warning(api_response.text) def _send_request(self, url: str, method: str, headers: dict, data: dict): """ Send the correct http request to url from method [get, post, delete, patch, put]. Raise a TypeError 'Unknown method to requests {method}' when the method is not one of the above. :param url: str :param method: str :param data: dict :param headers: dict :param tokens: list :return: str """ if method == 'get': api_response = requests.get(url=url, headers=headers) elif method == 'post': api_response = requests.post(url=url, headers=headers, data=json.dumps(data)) elif method == 'delete': api_response = requests.delete(url=url, headers=headers, data=json.dumps(data)) elif method == 'patch': api_response = requests.patch(url=url, headers=headers, data=json.dumps(data)) elif method == 'put': api_response = requests.put(url=url, headers=headers, data=json.dumps(data)) else: logger.debug( 'ERROR : Wrong requests, please only do [get, post, put, patch, delete] method' ) raise TypeError('Unknown method to requests %s', method) return api_response def _load_response(self, api_response: Response): """ Load the API response from JSON format to dict. The endpoint for events is a bit special, the json.loads() doesn't work for the return format of the API. We get for this special case a return dict containing the length of the response i.e.: if length of response == 3 then: no events :param: api_response: dict :return: dict_response """ if api_response.text.startswith('[') and api_response.text.endswith( ']\n'): # This condition is for the date-histogram endpoints dict_response = {'response_length': len(api_response.text)} else: dict_response = json.loads(api_response.text) return dict_response def _token_update(self, dict_response: dict): """ Allow to update token when API response is either Missing Authorization Header or Token has expired. Return False is the token has been regenerated. :param dict_response: dict :return: Bool """ if dict_response.get('msg') == 'Missing Authorization Header': fresh_tokens = self.token_generator.get_token() self.tokens = [ f'Token {fresh_tokens["access_token"]}', f'Token {fresh_tokens["refresh_token"]}' ] self.headers['Authorization'] = self.tokens[0] return False elif dict_response.get('msg') == 'Token has expired': fresh_token = self.token_generator.refresh_token(self.tokens[1]) self.tokens = [ f'Token {fresh_token["access_token"]}', self.tokens[1] ] self.headers['Authorization'] = self.tokens[0] return False return True def _pretty_debug_request(self, url: str, method: str, data: dict, headers: dict, tokens: list): """ Return pretty debug string :param url: str :param method: str :param data: dict :param headers: dict :param tokens: list :return: str """ debug = ('-' * self.terminal_size + 'DEBUG - datalake_requests:\n' + f' - url: \n{url}\n' + f' - method: \n{method}\n' + f' - headers: \n{headers}\n' + f' - data: \n{data}\n' + f' - token: \n{tokens[0]}\n' + f' - refresh_token: \n{tokens[1]}\n' + '-' * self.terminal_size) return debug
def main(override_args=None): """Method to start the script""" starter = BaseScripts() # Load initial args parser = starter.start('Edit scores of a specified list of ids (hashkeys)') parser.add_argument( 'hashkeys', help='hashkeys of the threat to edit score.', nargs='*', ) parser.add_argument( '-i', '--input_file', help='hashkey txt file, with one hashkey by line.', ) parser.add_argument( '-t', '--threat_types', nargs='+', help= 'Choose specific threat types and their score, like: ddos 50 scam 15.', ) parser.add_argument( '--permanent', help= '''Permanent: all values will override any values provided by both newer and older IOCs. Newer IOCs with override_type permanent can still override old permanent changes. temporary: all values should override any values provided by older IOCs, but not newer ones.''', action='store_true', ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() logger.debug(f'START: edit_score.py') if not args.hashkeys and not args.input_file: parser.error("either a hashkey or an input_file is required") if not args.threat_types or len(args.threat_types) % 2 != 0: parser.error("threat_types invalid ! should be like: ddos 50 scam 15") parsed_threat_type = AddThreatsPost.parse_threat_types(args.threat_types) hashkeys = set(args.hashkeys) if args.hashkeys else set() if args.input_file: retrieve_hashkeys_from_file(args.input_file, hashkeys) # Load api_endpoints and tokens endpoint_url, main_url, tokens = starter.load_config(args) url_threats = main_url + endpoint_url['endpoints']['threats'] post_engine_edit_score = ThreatsScoringPost(url_threats, main_url, tokens) response_dict = post_engine_edit_score.post_new_score_from_list( hashkeys, parsed_threat_type, 'permanent' if args.permanent else 'temporary', ) if args.output: starter.save_output(args.output, response_dict) logger.info(f'Results saved in {args.output}\n') logger.debug(f'END: edit_score.py')
def main(override_args=None): """Method to start the script""" # Load initial args starter = BaseScripts() parser = starter.start( 'Gets threats or hashkeys from given atom types and atom values.') supported_atom_types = parser.add_argument_group('Supported Atom Types') parser.add_argument( 'untyped_atoms', help= 'untyped atom values to lookup. Useful when you do not know what is the atom type', nargs='*', ) for atom_type in ATOM_TYPES_FLAGS: supported_atom_types.add_argument( f'--{atom_type}', action='append', help=f'set a single {atom_type} atom type with its value', ) parser.add_argument( '-ad', '--atom-details', dest='hashkey_only', default=True, action='store_false', help='returns threats full details', ) parser.add_argument( '-i', '--input', action='append', help='read threats to add from FILE. [atomtype:path/to/file.txt]', ) parser.add_argument( '-ot', '--output-type', help='set to the output type desired {json,csv}. Default is json', ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() logger.debug(f'START: bulk_lookup_threats.py') # create output type header accept_header = {'Accept': None} if args.output_type: try: accept_header['Accept'] = BaseEngine.output_type2header( args.output_type) except ParserError as e: logger.exception( f'Exception raised while getting output type headers # {str(e)}', exc_info=False) exit(1) # to gather all typed atoms passed by arguments and input files typed_atoms = {} # set validations flags regarding the presence or absence of cli arguments has_file = False if args.input is None else True has_flag = False for flag in ATOM_TYPES_FLAGS: atom_values = getattr(args, flag) if atom_values is not None: typed_atoms[flag] = atom_values has_flag = True # validate that at least there is one untyped atom or one atom or one input file if (not has_flag and not has_file and not args.untyped_atoms) or (SUBCOMMAND_NAME in args.untyped_atoms): parser.error( "you must provide at least one of following: untyped atom, atom type, input file." ) # process input files if has_file: for input_file in args.input: file_atom_type, filename = get_atom_type_from_filename(input_file) logger.debug(f'file {filename} was recognized as {file_atom_type}') if file_atom_type == UNTYPED_ATOM_TYPE: args.untyped_atoms += starter._load_list(filename) else: typed_atoms.setdefault(file_atom_type, []).extend(starter._load_list(filename)) # load api_endpoints and tokens endpoints_config, main_url, tokens = starter.load_config(args) post_engine_bulk_lookup_threats = BulkLookupThreats( endpoints_config, args.env, tokens) post_engine_atom_values_extractor = AtomValuesExtractor( endpoints_config, args.env, tokens) # lookup for atom types if args.untyped_atoms: atoms_values_extractor_response = post_engine_atom_values_extractor.atom_values_extract( args.untyped_atoms) if atoms_values_extractor_response['found'] > 0: typed_atoms = join_dicts( typed_atoms, atoms_values_extractor_response['results']) else: logger.warning('none of your untyped atoms could be typed') # find out what atoms couldn't be typed for printing them if atoms_values_extractor_response['not_found'] > 0: for atom_type, atom_list in atoms_values_extractor_response[ 'results'].items(): args.untyped_atoms = [ untyped_atom for untyped_atom in args.untyped_atoms if untyped_atom not in atoms_values_extractor_response['results'][atom_type] ] logger.warning( f'\x1b[6;37;43m{"#" * 60} UNTYPED ATOMS {"#" * 47}\x1b[0m') logger.warning('\n'.join(args.untyped_atoms)) logger.warning('') response = post_engine_bulk_lookup_threats.bulk_lookup_threats( threats=typed_atoms, additional_headers=accept_header, hashkey_only=args.hashkey_only) pretty_print(response, args.output_type) if args.output: starter.save_output(args.output, response) logger.debug(f'Results saved in {args.output}\n') logger.debug(f'END: lookup_threats.py')
def main(override_args=None): """Method to start the script""" starter = BaseScripts() # Load initial args parser = starter.start('Submit a new threat to Datalake from a file') required_named = parser.add_argument_group('required arguments') csv_controle = parser.add_argument_group('CSV control arguments') parser.add_argument( 'threats', help='threats to lookup', nargs='*', ) parser.add_argument( '-i', '--input', help='read threats to add from FILE', ) parser.add_argument( '-td', '--threat_details', action='store_true', help='set if you also want to have access to the threat details ', ) parser.add_argument( '-ot', '--output_type', default='json', help= 'set to the output type desired {json,csv}. Default is json if not specified', ) required_named.add_argument( '-a', '--atom_type', help='set it to define the atom type', required=True, ) csv_controle.add_argument( '--is_csv', help='set if the file input is a CSV', action='store_true', ) csv_controle.add_argument( '-d', '--delimiter', help='set the delimiter of the CSV file', default=',', ) csv_controle.add_argument( '-c', '--column', help='select column of the CSV file, starting at 1', type=int, default=1, ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() logger.debug(f'START: lookup_threats.py') if not args.threats and not args.input: parser.error("either a threat or an input_file is required") if args.atom_type not in PostEngine.authorized_atom_value: parser.error("atom type must be in {}".format(','.join( PostEngine.authorized_atom_value))) args.output_type = output_type2header(args.output_type, parser) hashkey_only = not args.threat_details # Load api_endpoints and tokens endpoint_config, main_url, tokens = starter.load_config(args) get_engine_lookup_threats = LookupThreats(endpoint_config, args.env, tokens) list_threats = list(args.threats) if args.threats else [] if args.input: if args.is_csv: try: list_threats = list_threats + starter._load_csv( args.input, args.delimiter, args.column - 1) except ValueError as ve: logger.error(ve) exit() else: list_threats = list_threats + starter._load_list(args.input) list_threats = list(OrderedDict.fromkeys( list_threats)) # removing duplicates while preserving order response_dict = get_engine_lookup_threats.lookup_threats( list_threats, args.atom_type, hashkey_only, args.output_type) if args.output: starter.save_output(args.output, response_dict) logger.debug(f'Results saved in {args.output}\n') logger.debug(f'END: lookup_threats.py')
def main(override_args=None): """Method to start the script""" starter = BaseScripts() # Load initial args parser = starter.start('Submit a new threat to Datalake from a file') required_named = parser.add_argument_group('required arguments') csv_controle = parser.add_argument_group('CSV control arguments') required_named.add_argument( '-i', '--input', help='read threats to add from FILE', required=True, ) required_named.add_argument( '-a', '--atom_type', help='set it to define the atom type', required=True, ) csv_controle.add_argument( '--is_csv', help='set if the file input is a CSV', action='store_true', ) csv_controle.add_argument( '-d', '--delimiter', help='set the delimiter of the CSV file', default=',', ) csv_controle.add_argument( '-c', '--column', help='select column of the CSV file, starting at 1', type=int, default=1, ) parser.add_argument( '-p', '--public', help='set the visibility to public', action='store_true', ) parser.add_argument( '-w', '--whitelist', help='set it to define the added threats as whitelist', action='store_true', ) parser.add_argument( '-t', '--threat_types', nargs='+', help= 'choose specific threat types and their score, like: ddos 50 scam 15', default=[], ) parser.add_argument( '--tag', nargs='+', help='add a list of tags', default=[], ) parser.add_argument( '--link', help='add link as external_analysis_link', nargs='+', ) parser.add_argument( '--permanent', help= 'sets override_type to permanent. Scores won\'t be updated by the algorithm. Default is temporary', action='store_true', ) parser.add_argument( '--no-bulk', help= 'force an api call for each threats, useful to retrieve the details of threats created', action='store_true', ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() logger.debug(f'START: add_new_threats.py') if not args.threat_types and not args.whitelist: parser.error( "threat types is required if the atom is not for whitelisting") permanent = 'permanent' if args.permanent else 'temporary' if args.is_csv: try: list_new_threats = starter._load_csv(args.input, args.delimiter, args.column - 1) except ValueError as ve: logger.error(ve) exit() else: list_new_threats = starter._load_list(args.input) list_new_threats = defang_threats(list_new_threats, args.atom_type) list_new_threats = list(OrderedDict.fromkeys( list_new_threats)) # removing duplicates while preserving order threat_types = ThreatsPost.parse_threat_types(args.threat_types) or [] # Load api_endpoints and tokens endpoint_config, main_url, tokens = starter.load_config(args) if args.no_bulk: post_engine_add_threats = ThreatsPost(endpoint_config, args.env, tokens) response_dict = post_engine_add_threats.add_threats( list_new_threats, args.atom_type, args.whitelist, threat_types, args.public, args.tag, args.link, permanent) else: post_engine_add_threats = BulkThreatsPost(endpoint_config, args.env, tokens) hashkeys = post_engine_add_threats.add_bulk_threats( list_new_threats, args.atom_type, args.whitelist, threat_types, args.public, args.tag, args.link, permanent) response_dict = {'haskeys': list(hashkeys)} if args.output: starter.save_output(args.output, response_dict) logger.debug(f'Results saved in {args.output}\n') logger.debug(f'END: add_new_threats.py')
def main(override_args=None): """Method to start the script""" starter = BaseScripts() # Load initial args parser = starter.start('Submit a new threat to Datalake from a file') required_named = parser.add_argument_group('required arguments') csv_controle = parser.add_argument_group('CSV control arguments') required_named.add_argument( '-i', '--input', help='read threats to add from FILE', required=True, ) required_named.add_argument( '-a', '--atom_type', help='set it to define the atom type', required=True, ) csv_controle.add_argument( '--is_csv', help='set if the file input is a CSV', action='store_true', ) csv_controle.add_argument( '-d', '--delimiter', help='set the delimiter of the CSV file', default=',', ) csv_controle.add_argument( '-c', '--column', help='select column of the CSV file, starting at 1', type=int, default=1, ) parser.add_argument( '-p', '--public', help='set the visibility to public', action='store_true', ) parser.add_argument( '-w', '--whitelist', help='set it to define the added threats as whitelist', action='store_true', ) parser.add_argument( '-t', '--threat_types', nargs='+', help='choose specific threat types and their score, like: ddos 50 scam 15', default=[], ) parser.add_argument( '--tag', nargs='+', help='add a list of tags', default=[], ) parser.add_argument( '--link', help='add link as external_analysis_link', nargs='+', ) if override_args: args = parser.parse_args(override_args) else: args = parser.parse_args() logger.debug(f'START: add_new_threats.py') if not args.threat_types and not args.whitelist: parser.error("threat types is required if the atom is not for whitelisting") # Load api_endpoints and tokens endpoint_url, main_url, tokens = starter.load_config(args) url_manual_threats = main_url + endpoint_url['endpoints']['threats-manual'] post_engine_add_threats = AddThreatsPost(url_manual_threats, main_url, tokens) if args.is_csv: list_new_threats = starter._load_csv(args.input, args.delimiter, args.column - 1) else: list_new_threats = starter._load_list(args.input) threat_types = AddThreatsPost.parse_threat_types(args.threat_types) or [] response_dict = post_engine_add_threats.add_threats( list_new_threats, args.atom_type, args.whitelist, threat_types, args.public, args.tag, args.link, ) if args.output: starter.save_output(args.output, response_dict) logger.debug(f'Results saved in {args.output}\n') logger.debug(f'END: add_new_threats.py')
class BaseEngine: OCD_DTL_QUOTA_TIME = int(os.getenv('OCD_DTL_QUOTA_TIME', 1)) OCD_DTL_REQUESTS_PER_QUOTA_TIME = int( os.getenv('OCD_DTL_REQUESTS_PER_QUOTA_TIME', 5)) logger.debug( f'Throttle selected: {OCD_DTL_REQUESTS_PER_QUOTA_TIME} queries per {OCD_DTL_QUOTA_TIME}s' ) Json = Union[ dict, list] # json like object that can be a dict or root level array SET_MAX_RETRY = 3 def __init__(self, endpoint_config: dict, environment: str, tokens: list): self.endpoint_config = endpoint_config self.environment = environment self.requests_ssl_verify = suppress_insecure_request_warns(environment) self.url = self._build_url(endpoint_config, environment) self.tokens = tokens self.terminal_size = self._get_size_terminal() self.token_generator = TokenGenerator(endpoint_config, environment=environment) self.headers = None self.SET_MAX_RETRY = 3 def _get_size_terminal(self) -> int: """ Return the terminal size for pretty print """ stty_sizes = os.popen('stty size', 'r').read().split() if len(stty_sizes) >= 2: return int(stty_sizes[1]) else: # Return default terminal size return 80 @throttle( period=OCD_DTL_QUOTA_TIME, call_per_period=OCD_DTL_REQUESTS_PER_QUOTA_TIME, ) def datalake_requests(self, url: str, method: str, headers: dict, post_body: dict = None): """ Use it to request the API """ self.headers = headers tries_left = self.SET_MAX_RETRY logger.debug( self._pretty_debug_request(url, method, post_body, headers, self.tokens)) if not headers.get('Authorization'): fresh_tokens = self.token_generator.get_token() self.replace_tokens(fresh_tokens) while True: response = self._send_request(url, method, headers, post_body) logger.debug(f'API response:\n{str(response.text)}') if response.status_code == 401: logger.warning( 'Token expired or Missing authorization header. Updating token' ) self._token_update(self._load_response(response)) elif response.status_code == 422: logger.warning('Bad authorization header. Updating token') logger.debug(f'422 HTTP code: {response.text}') self._token_update(self._load_response(response)) elif response.status_code < 200 or response.status_code > 299: logger.error( f'API returned non 2xx response code : {response.status_code}\n{response.text}' f'\n Retrying') else: try: dict_response = self._load_response(response) return dict_response except JSONDecodeError: logger.error( 'Request unexpectedly returned non dict value. Retrying' ) tries_left -= 1 if tries_left <= 0: logger.error( 'Request failed: Will return nothing for this request') return {} # time.sleep(5) def _send_request(self, url: str, method: str, headers: dict, data: dict): """ Send the correct http request to url from method [get, post, delete, patch, put]. Raise a TypeError 'Unknown method to requests {method}' when the method is not one of the above. :param url: str :param method: str :param data: dict :param headers: dict :param tokens: list :return: str """ common_kwargs = { 'url': url, 'headers': headers, 'verify': self.requests_ssl_verify } if method == 'get': api_response = requests.get(**common_kwargs) elif method == 'post': api_response = requests.post(**common_kwargs, data=json.dumps(data)) elif method == 'delete': api_response = requests.delete(**common_kwargs, data=json.dumps(data)) elif method == 'patch': api_response = requests.patch(**common_kwargs, data=json.dumps(data)) elif method == 'put': api_response = requests.put(**common_kwargs, data=json.dumps(data)) else: logger.debug( 'ERROR : Wrong requests, please only do [get, post, put, patch, delete] method' ) raise TypeError('Unknown method to requests %s', method) return api_response def _load_response(self, api_response: Response): """ Load the API response from JSON format to dict. The endpoint for events is a bit special, the json.loads() doesn't work for the return format of the API. We get for this special case a return dict containing the length of the response i.e.: if length of response == 3 then: no events :param: api_response: dict :return: dict_response """ if api_response.text.startswith('[') and api_response.text.endswith( ']\n'): # This condition is for the date-histogram endpoints dict_response = {'response_length': len(api_response.text)} else: dict_response = json.loads(api_response.text) return dict_response def _token_update(self, dict_response: dict): """ Allow to update token when API response is either Missing Authorization Header or Token has expired. Return False is the token has been regenerated. :param dict_response: dict :return: Bool """ if dict_response.get('msg') == 'Missing Authorization Header': fresh_tokens = self.token_generator.get_token() self.replace_tokens(fresh_tokens) return False elif dict_response.get( 'msg' ) == 'Bad Authorization header. Expected value \'Token <JWT>\'': fresh_tokens = self.token_generator.get_token() self.replace_tokens(fresh_tokens) return False elif dict_response.get('msg') == 'Token has expired': fresh_tokens = self.token_generator.refresh_token(self.tokens[1]) self.replace_tokens(fresh_tokens) return False return True def replace_tokens(self, fresh_tokens: dict): access_token = fresh_tokens["access_token"] # Update of the refresh token is optional refresh_token = fresh_tokens.get('refresh_token', self.tokens[1].replace('Token ', '')) self.tokens = [f'Token {access_token}', f'Token {refresh_token}'] self.headers['Authorization'] = self.tokens[0] def _pretty_debug_request(self, url: str, method: str, data: dict, headers: dict, tokens: list): """ Return pretty debug string :param url: str :param method: str :param data: dict :param headers: dict :param tokens: list :return: str """ debug = ('-' * self.terminal_size + 'DEBUG - datalake_requests:\n' + f' - url: \n{url}\n' + f' - method: \n{method}\n' + f' - headers: \n{headers}\n' + f' - data: \n{data}\n' + f' - token: \n{tokens[0]}\n' + f' - refresh_token: \n{tokens[1]}\n' + '-' * self.terminal_size) return debug def _build_url(self, endpoint_config: dict, environment: str): """To be implemented by each subclass""" raise NotImplemented() def _build_url_for_endpoint(self, endpoint_name): base_url = urljoin(self.endpoint_config['main'][self.environment], self.endpoint_config['api_version']) enpoints = self.endpoint_config['endpoints'] return urljoin(base_url, enpoints[endpoint_name], allow_fragments=True)