Python BaseScripts示例，datalake_scripts.common.base_script.BaseScripts Python示例

示例#1

0

显示文件

文件： get_query_hash.py 项目： juan-kabbali/datalake

def main(override_args=None):
    starter = BaseScripts()
    logger.debug(f'START: get_query_hash.py')

    # Load initial args
    parser = starter.start('Retrieve a query hash from a query body (a json used for the Advanced Search).')
    required_named = parser.add_argument_group('required arguments')
    required_named.add_argument(
        'query_body_path',
        help='path to the json file containing the query body',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    # Load api_endpoints and tokens
    endpoint_config, main_url, tokens = starter.load_config(args)
    with open(args.query_body_path, 'r') as query_body_file:
        query_body = json.load(query_body_file)
    logger.debug(f'Retrieving query hash for query body: {query_body}')

    advanced_search = AdvancedSearch(endpoint_config, args.env, tokens)

    response = advanced_search.get_threats(query_body, limit=0)
    if not response or 'query_hash' not in response:
        logger.error("Couldn't retrieve a query hash, is the query body valid ?")
        exit(1)
    query_hash = response['query_hash']
    if args.output:
        with open(args.output, 'w') as output:
            output.write(query_hash)
        logger.info(f'Query hash saved in {args.output}')
    else:
        logger.info(f'Query hash associated: {query_hash}')

示例#2

0

显示文件

def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()
    logger.debug(f'START: get_threats_from_query_hash.py')

    # Load initial args
    parser = starter.start(
        'Retrieve a list of response from a given query hash.')
    parser.add_argument(
        '--query_fields',
        help=
        'fields to be retrieved from the threat (default: only the hashkey)\n'
        'If an atom detail isn\'t present in a particular atom, empty string is returned.',
        nargs='+',
        default=['threat_hashkey'],
    )
    parser.add_argument(
        '--list',
        help=
        'Turn the output in a list (require query_fields to be a single element)',
        action='store_true',
    )
    required_named = parser.add_argument_group('required arguments')
    required_named.add_argument(
        'query_hash',
        help='the query hash from which to retrieve the response hashkeys',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    if len(args.query_fields) > 1 and args.list:
        parser.error(
            "List output format is only available if a single element is queried (via query_fields)"
        )

    # Load api_endpoints and tokens
    endpoint_config, main_url, tokens = starter.load_config(args)
    logger.debug(
        f'Start to search for threat from the query hash:{args.query_hash}')

    bulk_search = BulkSearch(endpoint_config, args.env, tokens)

    response = bulk_search.get_threats(args.query_hash, args.query_fields)
    original_count = response.get('count', 0)
    logger.info(f'Number of threat that have been retrieved: {original_count}')

    formatted_output = format_output(response, args.list)
    if args.output:
        with open(args.output, 'w') as output:
            output.write(formatted_output)
    else:
        logger.info(formatted_output)

    if args.output:
        logger.info(f'Threats saved in {args.output}')
    else:
        logger.info('Done')

示例#3

0

显示文件

文件： get_threats_by_hashkey.py 项目： netwrkspider/datalake

def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()
    logger.debug(f'START: get_threats_by_hashkey.py')

    # Load initial args
    parser = starter.start(
        'Retrieve threats (as Json) from a list of ids (hashkeys)')
    parser.add_argument(
        'hashkeys',
        help='hashkeys of the threats to retreive',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input_file',
        help='list of threats ids (hashkeys) that need to be retrieved',
    )
    parser.add_argument(
        '--lost',
        help='saved hashes that were not found',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    if not args.hashkeys and not args.input_file:
        parser.error("either a hashkey or an input_file is required")
    threats_list = starter._load_csv(
        args.input_file) if args.input_file else args.hashkeys

    # Load api_endpoints and tokens
    endpoint_url, main_url, tokens = starter.load_config(args)

    logger.debug(f'TOTAL: {len(threats_list)} threats found')
    url_threats = main_url + endpoint_url['endpoints']['threats']
    search_engine_threats = ThreatsSearch(url_threats, main_url, tokens)
    list_threats, list_lost_hashes = search_engine_threats.get_json(
        threats_list)

    if args.output:
        starter.save_output(args.output, list_threats)
        logger.debug(f'Threats JSON saved in {args.output}\n')
    if args.lost:
        starter.save_output(args.lost, list_lost_hashes)
        logger.debug(f'Threats lost saved in {args.lost}\n')
    logger.debug(f'END: get_threats_by_hashkey.py')

示例#4

0

显示文件

文件： add_comments.py 项目： cert-orangecyberdefense/datalake

def main(override_args=None):
    """Method to start the script"""
    # Load initial args
    parser = BaseScripts.start('Add tags and/or comments to a specified list of hashkeys.')
    parser.add_argument(
        'hashkeys',
        help='hashkeys of the threat to add tags and/or the comment',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input_file',
        help='hashkey txt file, with one hashkey by line',
    )
    parser.add_argument(
        '-p',
        '--public',
        help='set the visibility to public',
        action='store_true',
    )
    parser.add_argument(
        '--comment',
        help='add the given comment',
        required=True,
    )

    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    configure_logging(args.loglevel)

    # Load api_endpoints and tokens
    endpoint_config = Config().load_config()
    token_manager = TokenManager(endpoint_config, environment=args.env)
    post_engine_add_comments = CommentsPost(endpoint_config, args.env, token_manager)

    if not args.hashkeys and not args.input_file:
        parser.error("either a hashkey or an input_file is required")

    hashkeys = set(args.hashkeys) if args.hashkeys else set()
    if args.input_file:
        retrieve_hashkeys_from_file(args.input_file, hashkeys)

    response_dict = post_engine_add_comments.post_comments(
        hashkeys,
        args.comment,
        public=args.public,
    )

    if args.output:
        save_output(args.output, response_dict)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: add_comments.py')

示例#5

0

显示文件

文件： add_tags.py 项目： cert-orangecyberdefense/datalake

def main(override_args=None):
    """Method to start the script"""

    # Load initial args
    parser = BaseScripts.start(
        'Add tags and/or comments to a specified list of hashkeys.')
    parser.add_argument(
        'hashkeys',
        help='hashkeys of the threat to add tags and/or the comment',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input_file',
        help='hashkey txt file, with one hashkey by line',
    )
    parser.add_argument(
        '-p',
        '--public',
        help='set the visibility to public',
        action='store_true',
    )
    parser.add_argument(
        '--tags',
        nargs='+',
        help='add a list of tags',
        required=True,
    )

    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    if not args.hashkeys and not args.input_file:
        parser.error("either a hashkey or an input_file is required")

    hashkeys = set(args.hashkeys) if args.hashkeys else set()

    if args.input_file:
        retrieve_hashkeys_from_file(args.input_file, hashkeys)

    dtl = Datalake(env=args.env, log_level=args.loglevel)
    response_dict = post_tags(hashkeys, args.tags, args.public, dtl)

    if args.output:
        save_output(args.output, response_dict)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: add_tags.py')

示例#6

0

显示文件

def main(override_args=None):
    """Method to start the script"""
    logger.debug(f'START: get_threats_by_hashkey.py')

    # Load initial args
    parser = BaseScripts.start(
        'Retrieve threats (as Json) from a list of ids (hashkeys)')
    parser.add_argument(
        'hashkeys',
        help='hashkeys of the threats to retreive',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input_file',
        help='list of threats ids (hashkeys) that need to be retrieved',
    )
    parser.add_argument(
        '--lost',
        help='saved hashes that were not found',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    configure_logging(args.loglevel)

    if not args.hashkeys and not args.input_file:
        parser.error("either a hashkey or an input_file is required")
    threats_list = load_list(
        args.input_file) if args.input_file else args.hashkeys
    logger.debug(f'TOTAL: {len(threats_list)} threats found')

    # Load api_endpoints and tokens
    endpoint_config = Config().load_config()
    token_manager = TokenManager(endpoint_config, environment=args.env)
    search_engine_threats = ThreatsSearch(endpoint_config, args.env,
                                          token_manager)
    list_threats, list_lost_hashes = search_engine_threats.get_json(
        threats_list)

    if args.output:
        save_output(args.output, list_threats)
        logger.debug(f'Threats JSON saved in {args.output}\n')
    if args.lost:
        save_output(args.lost, list_lost_hashes)
        logger.debug(f'Threats lost saved in {args.lost}\n')
    logger.debug(f'END: get_threats_by_hashkey.py')

示例#7

0

显示文件

文件： get_query_hash.py 项目： cert-orangecyberdefense/datalake

def main(override_args=None):
    logger.debug(f'START: get_query_hash.py')

    # Load initial args
    parser = BaseScripts.start(
        'Retrieve a query hash from a query body (a json used for the Advanced Search).'
    )
    required_named = parser.add_argument_group('required arguments')
    required_named.add_argument(
        'query_body_path',
        help='path to the json file containing the query body',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    # Load api_endpoints and tokens
    with open(args.query_body_path, 'r') as query_body_file:
        query_body = json.load(query_body_file)
    logger.debug(f'Retrieving query hash for query body: {query_body}')

    dtl = Datalake(env=args.env, log_level=args.loglevel)

    resp = dtl.AdvancedSearch.advanced_search_from_query_body(
        query_body, limit=0, offset=0, output=Output.JSON)
    if not resp or 'query_hash' not in resp:
        logger.error(
            "Couldn't retrieve a query hash, is the query body valid ?")
        exit(1)
    query_hash = resp['query_hash']
    if args.output:
        with open(args.output, 'w') as output:
            output.write(query_hash)
        logger.info(f'Query hash saved in {args.output}')
    else:
        logger.info(f'Query hash associated: {query_hash}')

示例#8

0

显示文件

文件： lookup_threats.py 项目： cert-orangecyberdefense/datalake

def main(override_args=None):
    # Load initial args
    parser = BaseScripts.start('Submit a new threat to Datalake from a file')
    required_named = parser.add_argument_group('required arguments')
    csv_control = parser.add_argument_group('CSV control arguments')

    parser.add_argument(
        'threats',
        help='threats to lookup',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input',
        help='read threats to add from FILE',
    )
    parser.add_argument(
        '-td',
        '--threat_details',
        action='store_true',
        help='set if you also want to have access to the threat details ',
    )
    parser.add_argument(
        '-ot',
        '--output_type',
        default='json',
        help=
        'set to the output type desired {json,csv}. Default is json if not specified',
    )
    required_named.add_argument(
        '-a',
        '--atom_type',
        help='set it to define the atom type',
        required=True,
    )
    csv_control.add_argument(
        '--is_csv',
        help='set if the file input is a CSV',
        action='store_true',
    )
    csv_control.add_argument(
        '-d',
        '--delimiter',
        help='set the delimiter of the CSV file',
        default=',',
    )
    csv_control.add_argument(
        '-c',
        '--column',
        help='select column of the CSV file, starting at 1',
        type=int,
        default=1,
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: lookup_threats.py')

    if not args.threats and not args.input:
        parser.error("either a threat or an input_file is required")

    if args.output_type:
        try:
            args.output_type = BaseEngine.output_type2header(args.output_type)
        except ParserError as e:
            logger.exception(
                f'Exception raised while getting output type from headers # {str(e)}',
                exc_info=False)
            exit(1)

    hashkey_only = not args.threat_details
    dtl = Datalake(env=args.env, log_level=args.loglevel)
    list_threats = list(args.threats) if args.threats else []
    if args.input:
        if args.is_csv:
            try:
                list_threats = list_threats + load_csv(
                    args.input, args.delimiter, args.column - 1)
            except ValueError as ve:
                logger.error(ve)
                exit()
        else:
            list_threats = list_threats + load_list(args.input)

    full_response = {}
    atom_type = parse_atom_type_or_exit(args.atom_type)
    list_threats = list(OrderedDict.fromkeys(
        list_threats))  # removing duplicates while preserving order
    for threat in list_threats:
        response = dtl.Threats.lookup(threat,
                                      atom_type=atom_type,
                                      hashkey_only=hashkey_only)
        found = response.get('threat_found', True)
        text, color = boolean_to_text_and_color[found]
        logger.info('{}{} hashkey:{} {}\x1b[0m'.format(color, threat,
                                                       response['hashkey'],
                                                       text))
        full_response[threat] = response

    if args.output:
        if args.output_type == 'text/csv':
            full_response = CsvBuilder.create_look_up_csv(
                full_response,
                args.atom_type,
                has_details=args.threat_details,
            )
        save_output(args.output, full_response)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: lookup_threats.py')

示例#9

0

显示文件

def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()

    # Load initial args
    parser = starter.start('Submit a new threat to Datalake from a file')
    required_named = parser.add_argument_group('required arguments')
    csv_controle = parser.add_argument_group('CSV control arguments')
    required_named.add_argument(
        '-i',
        '--input',
        help='read threats to add from FILE',
        required=True,
    )
    required_named.add_argument(
        '-a',
        '--atom_type',
        help='set it to define the atom type',
        required=True,
    )
    csv_controle.add_argument(
        '--is_csv',
        help='set if the file input is a CSV',
        action='store_true',
    )
    csv_controle.add_argument(
        '-d',
        '--delimiter',
        help='set the delimiter of the CSV file',
        default=',',
    )
    csv_controle.add_argument(
        '-c',
        '--column',
        help='select column of the CSV file, starting at 1',
        type=int,
        default=1,
    )
    parser.add_argument(
        '-p',
        '--public',
        help='set the visibility to public',
        action='store_true',
    )
    parser.add_argument(
        '-w',
        '--whitelist',
        help='set it to define the added threats as whitelist',
        action='store_true',
    )
    parser.add_argument(
        '-t',
        '--threat_types',
        nargs='+',
        help=
        'choose specific threat types and their score, like: ddos 50 scam 15',
        default=[],
    )
    parser.add_argument(
        '--tag',
        nargs='+',
        help='add a list of tags',
        default=[],
    )
    parser.add_argument(
        '--link',
        help='add link as external_analysis_link',
        nargs='+',
    )
    parser.add_argument(
        '--permanent',
        help=
        'sets override_type to permanent. Scores won\'t be updated by the algorithm. Default is temporary',
        action='store_true',
    )
    parser.add_argument(
        '--no-bulk',
        help=
        'force an api call for each threats, useful to retrieve the details of threats created',
        action='store_true',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: add_new_threats.py')

    if not args.threat_types and not args.whitelist:
        parser.error(
            "threat types is required if the atom is not for whitelisting")

    permanent = 'permanent' if args.permanent else 'temporary'

    if args.is_csv:
        try:
            list_new_threats = starter._load_csv(args.input, args.delimiter,
                                                 args.column - 1)
        except ValueError as ve:
            logger.error(ve)
            exit()
    else:
        list_new_threats = starter._load_list(args.input)
    list_new_threats = defang_threats(list_new_threats, args.atom_type)
    list_new_threats = list(OrderedDict.fromkeys(
        list_new_threats))  # removing duplicates while preserving order
    threat_types = ThreatsPost.parse_threat_types(args.threat_types) or []

    # Load api_endpoints and tokens
    endpoint_config, main_url, tokens = starter.load_config(args)
    if args.no_bulk:
        post_engine_add_threats = ThreatsPost(endpoint_config, args.env,
                                              tokens)
        response_dict = post_engine_add_threats.add_threats(
            list_new_threats, args.atom_type, args.whitelist, threat_types,
            args.public, args.tag, args.link, permanent)
    else:
        post_engine_add_threats = BulkThreatsPost(endpoint_config, args.env,
                                                  tokens)
        hashkeys = post_engine_add_threats.add_bulk_threats(
            list_new_threats, args.atom_type, args.whitelist, threat_types,
            args.public, args.tag, args.link, permanent)
        response_dict = {'haskeys': list(hashkeys)}

    if args.output:
        starter.save_output(args.output, response_dict)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: add_new_threats.py')

示例#10

0

显示文件

文件： add_new_threats.py 项目： netwrkspider/datalake

def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()

    # Load initial args
    parser = starter.start('Submit a new threat to Datalake from a file')
    required_named = parser.add_argument_group('required arguments')
    csv_controle = parser.add_argument_group('CSV control arguments')
    required_named.add_argument(
        '-i',
        '--input',
        help='read threats to add from FILE',
        required=True,
    )
    required_named.add_argument(
        '-a',
        '--atom_type',
        help='set it to define the atom type',
        required=True,
    )
    csv_controle.add_argument(
        '--is_csv',
        help='set if the file input is a CSV',
        action='store_true',
    )
    csv_controle.add_argument(
        '-d',
        '--delimiter',
        help='set the delimiter of the CSV file',
        default=',',
    )
    csv_controle.add_argument(
        '-c',
        '--column',
        help='select column of the CSV file, starting at 1',
        type=int,
        default=1,
    )
    parser.add_argument(
        '-p',
        '--public',
        help='set the visibility to public',
        action='store_true',
    )
    parser.add_argument(
        '-w',
        '--whitelist',
        help='set it to define the added threats as whitelist',
        action='store_true',
    )
    parser.add_argument(
        '-t',
        '--threat_types',
        nargs='+',
        help='choose specific threat types and their score, like: ddos 50 scam 15',
        default=[],
    )
    parser.add_argument(
        '--tag',
        nargs='+',
        help='add a list of tags',
        default=[],
    )
    parser.add_argument(
        '--link',
        help='add link as external_analysis_link',
        nargs='+',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: add_new_threats.py')

    if not args.threat_types and not args.whitelist:
        parser.error("threat types is required if the atom is not for whitelisting")

    # Load api_endpoints and tokens
    endpoint_url, main_url, tokens = starter.load_config(args)
    url_manual_threats = main_url + endpoint_url['endpoints']['threats-manual']
    post_engine_add_threats = AddThreatsPost(url_manual_threats, main_url, tokens)
    if args.is_csv:
        list_new_threats = starter._load_csv(args.input, args.delimiter, args.column - 1)
    else:
        list_new_threats = starter._load_list(args.input)
    threat_types = AddThreatsPost.parse_threat_types(args.threat_types) or []
    response_dict = post_engine_add_threats.add_threats(
        list_new_threats,
        args.atom_type,
        args.whitelist,
        threat_types,
        args.public,
        args.tag,
        args.link,
    )

    if args.output:
        starter.save_output(args.output, response_dict)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: add_new_threats.py')

示例#11

0

显示文件

文件： get_threats_from_query_hash.py 项目： cert-orangecyberdefense/datalake

def main(override_args=None):
    """Method to start the script"""
    logger.debug(f'START: get_threats_from_query_hash.py')

    # Load initial args
    parser = BaseScripts.start(
        'Retrieve a list of response from a given query hash.')
    parser.add_argument(
        '--query_fields',
        help=
        'fields to be retrieved from the threat (default: only the hashkey)\n'
        'If an atom detail isn\'t present in a particular atom, empty string is returned.',
        nargs='+',
        default=['threat_hashkey'],
    )
    parser.add_argument(
        '--list',
        help=
        'Turn the output in a list (require query_fields to be a single element)',
        action='store_true',
    )
    required_named = parser.add_argument_group('required arguments')
    required_named.add_argument(
        'query_hash',
        help=
        'the query hash from which to retrieve the response hashkeys or a path to the query body json file',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    configure_logging(args.loglevel)

    if len(args.query_fields) > 1 and args.list:
        parser.error(
            "List output format is only available if a single element is queried (via query_fields)"
        )

    query_body = {}
    query_hash = args.query_hash
    if len(query_hash) != 32 or os.path.exists(query_hash):
        try:
            with open(query_hash, 'r') as query_body_file:
                query_body = json.load(query_body_file)
        except FileNotFoundError:
            logger.error(
                f"Couldn't understand the given value as a query hash or path to query body: {query_hash}"
            )
            exit(1)

    # Load api_endpoints and tokens
    dtl = Datalake(env=args.env, log_level=args.loglevel)
    logger.debug(
        f'Start to search for threat from the query hash:{query_hash}')
    spinner = None
    if logger.isEnabledFor(logging.INFO):
        spinner = Halo(text=f'Creating bulk task', spinner='dots')
        spinner.start()

    task = dtl.BulkSearch.create_task(query_body=query_body,
                                      query_hash=query_hash,
                                      query_fields=args.query_fields)
    if spinner:
        spinner.text = f'Waiting for bulk task {task.uuid} response'
    response = task.download_sync()
    original_count = response.get('count', 0)
    if spinner:
        spinner.succeed()
        spinner.info(
            f'Number of threat that have been retrieved: {original_count}')

    formatted_output = format_output(response, args.list)
    if args.output:
        with open(args.output, 'w') as output:
            output.write(formatted_output)
    else:
        logger.info(formatted_output)

    if args.output:
        logger.info(f'Threats saved in {args.output}')
    else:
        logger.info('Done')

示例#12

0

显示文件

def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()

    # Load initial args
    parser = starter.start('Submit a new threat to Datalake from a file')
    required_named = parser.add_argument_group('required arguments')
    csv_controle = parser.add_argument_group('CSV control arguments')

    parser.add_argument(
        'threats',
        help='threats to lookup',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input',
        help='read threats to add from FILE',
    )
    parser.add_argument(
        '-td',
        '--threat_details',
        action='store_true',
        help='set if you also want to have access to the threat details ',
    )
    parser.add_argument(
        '-ot',
        '--output_type',
        default='json',
        help=
        'set to the output type desired {json,csv}. Default is json if not specified',
    )
    required_named.add_argument(
        '-a',
        '--atom_type',
        help='set it to define the atom type',
        required=True,
    )
    csv_controle.add_argument(
        '--is_csv',
        help='set if the file input is a CSV',
        action='store_true',
    )
    csv_controle.add_argument(
        '-d',
        '--delimiter',
        help='set the delimiter of the CSV file',
        default=',',
    )
    csv_controle.add_argument(
        '-c',
        '--column',
        help='select column of the CSV file, starting at 1',
        type=int,
        default=1,
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: lookup_threats.py')

    if not args.threats and not args.input:
        parser.error("either a threat or an input_file is required")

    if args.atom_type not in PostEngine.authorized_atom_value:
        parser.error("atom type must be in {}".format(','.join(
            PostEngine.authorized_atom_value)))

    args.output_type = output_type2header(args.output_type, parser)
    hashkey_only = not args.threat_details
    # Load api_endpoints and tokens
    endpoint_config, main_url, tokens = starter.load_config(args)
    get_engine_lookup_threats = LookupThreats(endpoint_config, args.env,
                                              tokens)
    list_threats = list(args.threats) if args.threats else []
    if args.input:
        if args.is_csv:
            try:
                list_threats = list_threats + starter._load_csv(
                    args.input, args.delimiter, args.column - 1)
            except ValueError as ve:
                logger.error(ve)
                exit()
        else:
            list_threats = list_threats + starter._load_list(args.input)
    list_threats = list(OrderedDict.fromkeys(
        list_threats))  # removing duplicates while preserving order
    response_dict = get_engine_lookup_threats.lookup_threats(
        list_threats, args.atom_type, hashkey_only, args.output_type)

    if args.output:
        starter.save_output(args.output, response_dict)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: lookup_threats.py')

示例#13

0

显示文件

文件： advanced_search.py 项目： cert-orangecyberdefense/datalake

def main(override_args=None):
    parser = BaseScripts.start(
        'Gets threats from given query body or query hash.',
        output_file_required=True)
    parser.add_argument('-i',
                        '--input',
                        help='read query body from a json file')
    parser.add_argument('--query-hash',
                        help='sets the query hash for the advanced search')
    parser.add_argument(
        '-l',
        '--limit',
        help=
        'defines how many items will be returned in one page slice. Accepted values: 0 to 5000, default is 20',
        type=int,
        default=20)
    parser.add_argument(
        '--offset',
        help=
        'defines an index of the first requested item. Accepted values: 0 and bigger, default is 0.',
        type=int,
        default=0)
    parser.add_argument(
        '-ot',
        '--output-type',
        help=
        'sets the output type desired {json, csv, stix, misp}. Default is json',
        default='json')
    parser.add_argument(
        '--ordering',
        help=
        'threat field to filter on. To sort the results by relevance (if any "search" is applied), just skip '
        'this field. To use the reversed order, use minus, i.e. --ordering="-last_updated" in your command line.'
    )

    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: advanced_search.py')
    if bool(args.input) == bool(args.query_hash):
        raise ValueError(
            'Either an input file with a query body or a query hash needs to be provided.'
        )
    try:
        output_type = Output[args.output_type.upper()]
    except KeyError:
        logger.error(
            'Not supported output, please use json, stix, misp or csv')
        exit(1)

    dtl = Datalake(env=args.env, log_level=args.loglevel)
    if args.input:
        query_body = load_json(args.input)
        resp = dtl.AdvancedSearch.advanced_search_from_query_body(
            query_body,
            limit=args.limit,
            offset=args.offset,
            output=output_type,
            ordering=args.ordering)
    else:
        resp = dtl.AdvancedSearch.advanced_search_from_query_hash(
            args.query_hash,
            limit=args.limit,
            offset=args.offset,
            output=output_type,
            ordering=args.ordering)
    save_output(args.output, resp)
    logger.info(
        f'\x1b[0;30;42m OK: MATCHING THREATS SAVED IN {args.output} \x1b[0m')
    logger.debug(f'END: advanced_search.py')

示例#14

0

显示文件

文件： datalake.py 项目： cert-orangecyberdefense/datalake_misp_integration

 def __init__(self):
     args = ConfigArg(loglevel=logging.WARNING, env=OCD_DTL_API_ENV)
     endpoint_config, _, tokens = BaseScripts().load_config(args=args)
     self.threats_api = Threats(endpoint_config, args.env, tokens)

示例#15

0

显示文件

文件： bulk_lookup_threats.py 项目： juan-kabbali/datalake

def main(override_args=None):
    """Method to start the script"""

    # Load initial args
    starter = BaseScripts()
    parser = starter.start(
        'Gets threats or hashkeys from given atom types and atom values.')
    supported_atom_types = parser.add_argument_group('Supported Atom Types')

    parser.add_argument(
        'untyped_atoms',
        help=
        'untyped atom values to lookup. Useful when you do not know what is the atom type',
        nargs='*',
    )
    for atom_type in ATOM_TYPES_FLAGS:
        supported_atom_types.add_argument(
            f'--{atom_type}',
            action='append',
            help=f'set a single {atom_type} atom type with its value',
        )
    parser.add_argument(
        '-ad',
        '--atom-details',
        dest='hashkey_only',
        default=True,
        action='store_false',
        help='returns threats full details',
    )
    parser.add_argument(
        '-i',
        '--input',
        action='append',
        help='read threats to add from FILE. [atomtype:path/to/file.txt]',
    )
    parser.add_argument(
        '-ot',
        '--output-type',
        help='set to the output type desired {json,csv}. Default is json',
    )

    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: bulk_lookup_threats.py')

    # create output type header
    accept_header = {'Accept': None}

    if args.output_type:
        try:
            accept_header['Accept'] = BaseEngine.output_type2header(
                args.output_type)
        except ParserError as e:
            logger.exception(
                f'Exception raised while getting output type headers # {str(e)}',
                exc_info=False)
            exit(1)

    # to gather all typed atoms passed by arguments and input files
    typed_atoms = {}

    # set validations flags regarding the presence or absence of cli arguments
    has_file = False if args.input is None else True
    has_flag = False
    for flag in ATOM_TYPES_FLAGS:
        atom_values = getattr(args, flag)
        if atom_values is not None:
            typed_atoms[flag] = atom_values
            has_flag = True

    # validate that at least there is one untyped atom or one atom or one input file
    if (not has_flag and not has_file
            and not args.untyped_atoms) or (SUBCOMMAND_NAME
                                            in args.untyped_atoms):
        parser.error(
            "you must provide at least one of following: untyped atom, atom type, input file."
        )

    # process input files
    if has_file:
        for input_file in args.input:
            file_atom_type, filename = get_atom_type_from_filename(input_file)
            logger.debug(f'file {filename} was recognized as {file_atom_type}')

            if file_atom_type == UNTYPED_ATOM_TYPE:
                args.untyped_atoms += starter._load_list(filename)
            else:
                typed_atoms.setdefault(file_atom_type,
                                       []).extend(starter._load_list(filename))

    # load api_endpoints and tokens
    endpoints_config, main_url, tokens = starter.load_config(args)
    post_engine_bulk_lookup_threats = BulkLookupThreats(
        endpoints_config, args.env, tokens)
    post_engine_atom_values_extractor = AtomValuesExtractor(
        endpoints_config, args.env, tokens)

    # lookup for atom types
    if args.untyped_atoms:
        atoms_values_extractor_response = post_engine_atom_values_extractor.atom_values_extract(
            args.untyped_atoms)
        if atoms_values_extractor_response['found'] > 0:
            typed_atoms = join_dicts(
                typed_atoms, atoms_values_extractor_response['results'])
        else:
            logger.warning('none of your untyped atoms could be typed')

        # find out what atoms couldn't be typed for printing them
        if atoms_values_extractor_response['not_found'] > 0:
            for atom_type, atom_list in atoms_values_extractor_response[
                    'results'].items():
                args.untyped_atoms = [
                    untyped_atom for untyped_atom in args.untyped_atoms
                    if untyped_atom not in
                    atoms_values_extractor_response['results'][atom_type]
                ]

            logger.warning(
                f'\x1b[6;37;43m{"#" * 60} UNTYPED ATOMS {"#" * 47}\x1b[0m')
            logger.warning('\n'.join(args.untyped_atoms))
            logger.warning('')

    response = post_engine_bulk_lookup_threats.bulk_lookup_threats(
        threats=typed_atoms,
        additional_headers=accept_header,
        hashkey_only=args.hashkey_only)

    pretty_print(response, args.output_type)

    if args.output:
        starter.save_output(args.output, response)
        logger.debug(f'Results saved in {args.output}\n')

    logger.debug(f'END: lookup_threats.py')

示例#16

0

显示文件

文件： edit_score.py 项目： netwrkspider/datalake

def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()

    # Load initial args
    parser = starter.start('Edit scores of a specified list of ids (hashkeys)')
    parser.add_argument(
        'hashkeys',
        help='hashkeys of the threat to edit score.',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input_file',
        help='hashkey txt file, with one hashkey by line.',
    )
    parser.add_argument(
        '-t',
        '--threat_types',
        nargs='+',
        help=
        'Choose specific threat types and their score, like: ddos 50 scam 15.',
    )
    parser.add_argument(
        '--permanent',
        help=
        '''Permanent: all values will override any values provided by both newer and
            older IOCs. Newer IOCs with override_type permanent can still override old permanent changes.
            temporary: all values should override any values provided by older IOCs,
            but not newer ones.''',
        action='store_true',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    logger.debug(f'START: edit_score.py')

    if not args.hashkeys and not args.input_file:
        parser.error("either a hashkey or an input_file is required")

    if not args.threat_types or len(args.threat_types) % 2 != 0:
        parser.error("threat_types invalid ! should be like: ddos 50 scam 15")
    parsed_threat_type = AddThreatsPost.parse_threat_types(args.threat_types)

    hashkeys = set(args.hashkeys) if args.hashkeys else set()
    if args.input_file:
        retrieve_hashkeys_from_file(args.input_file, hashkeys)

    # Load api_endpoints and tokens
    endpoint_url, main_url, tokens = starter.load_config(args)
    url_threats = main_url + endpoint_url['endpoints']['threats']
    post_engine_edit_score = ThreatsScoringPost(url_threats, main_url, tokens)

    response_dict = post_engine_edit_score.post_new_score_from_list(
        hashkeys,
        parsed_threat_type,
        'permanent' if args.permanent else 'temporary',
    )

    if args.output:
        starter.save_output(args.output, response_dict)
        logger.info(f'Results saved in {args.output}\n')
    logger.debug(f'END: edit_score.py')

示例#17

0

显示文件

def main(override_args=None):
    """Method to start the script"""
    # Load initial args
    parser = BaseScripts.start('Submit a new threat to Datalake from a file')
    required_named = parser.add_argument_group('required arguments')
    csv_controle = parser.add_argument_group('CSV control arguments')
    required_named.add_argument(
        '-i',
        '--input',
        help='read threats to add from FILE',
        required=True,
    )
    required_named.add_argument(
        '-a',
        '--atom_type',
        help='set it to define the atom type',
        required=True,
    )
    csv_controle.add_argument(
        '--is_csv',
        help='set if the file input is a CSV',
        action='store_true',
    )
    csv_controle.add_argument(
        '-d',
        '--delimiter',
        help='set the delimiter of the CSV file',
        default=',',
    )
    csv_controle.add_argument(
        '-c',
        '--column',
        help='select column of the CSV file, starting at 1',
        type=int,
        default=1,
    )
    parser.add_argument(
        '-p',
        '--public',
        help='set the visibility to public',
        action='store_true',
    )
    parser.add_argument(
        '-w',
        '--whitelist',
        help='set it to define the added threats as whitelist',
        action='store_true',
    )
    parser.add_argument(
        '-t',
        '--threat_types',
        nargs='+',
        help=
        'choose specific threat types and their score, like: ddos 50 scam 15',
        default=[],
        action='append',
    )
    parser.add_argument(
        '--tag',
        nargs='+',
        help='add a list of tags',
        default=[],
    )
    parser.add_argument(
        '--link',
        help='add link as external_analysis_link',
        nargs='+',
    )
    parser.add_argument(
        '--permanent',
        help=
        'sets override_type to permanent. Scores won\'t be updated by the algorithm. Default is temporary',
        action='store_true',
    )
    parser.add_argument(
        '--lock',
        help=
        'sets override_type to lock. Scores won\'t be updated by the algorithm for three months. Default is '
        'temporary',
        action='store_true',
    )
    parser.add_argument(
        '--no-bulk',
        help=
        'force an api call for each threats, useful to retrieve the details of threats created',
        action='store_true',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: add_new_threats.py')

    if not args.threat_types and not args.whitelist:
        parser.error(
            "threat types is required if the atom is not for whitelisting")

    if args.permanent and args.lock:
        parser.error("Only one override type is authorized")

    if args.permanent:
        override_type = OverrideType.PERMANENT
    elif args.lock:
        override_type = OverrideType.LOCK
    else:
        override_type = OverrideType.TEMPORARY

    if args.is_csv:
        try:
            list_new_threats = load_csv(args.input, args.delimiter,
                                        args.column - 1)
        except ValueError as ve:
            logger.error(ve)
            exit()
    else:
        list_new_threats = load_list(args.input)
        if not list_new_threats:
            raise parser.error('No atom found in the input file.')
    list_new_threats = defang_threats(list_new_threats, args.atom_type)
    list_new_threats = list(OrderedDict.fromkeys(
        list_new_threats))  # removing duplicates while preserving order
    args.threat_types = flatten_list(args.threat_types)
    threat_types = parse_threat_types(args.threat_types)
    atom_type = AtomType[args.atom_type.upper()]
    dtl = Datalake(env=args.env, log_level=args.loglevel)

    spinner = Halo(text=f'Creating threats', spinner='dots')
    spinner.start()

    threat_response = dtl.Threats.add_threats(list_new_threats, atom_type,
                                              threat_types, override_type,
                                              args.whitelist, args.public,
                                              args.tag, args.link,
                                              args.no_bulk)

    spinner.stop()
    terminal_size = Endpoint._get_terminal_size()
    if args.no_bulk:
        for threat in threat_response:
            logger.info(
                f'{threat["hashkey"].ljust(terminal_size - 6, " ")} \x1b[0;30;42m  OK  \x1b[0m'
            )
    else:
        failed = []
        failed_counter = 0
        created_counter = 0
        for batch_res in threat_response:
            failed.extend(batch_res['failed'])
            for success in batch_res['success']:
                for val_created in success['created_atom_values']:
                    created_counter += 1
                    logger.info(
                        f'{val_created.ljust(terminal_size - 6, " ")} \x1b[0;30;42m  OK  \x1b[0m'
                    )
        for failed_obj in failed:
            for failed_atom_val in failed_obj['failed_atom_values']:
                failed_counter += 1
                logger.info(
                    f'Creation failed for value {failed_atom_val.ljust(terminal_size - 6, " ")} \x1b[0;30;4\
                1m  KO  \x1b[0m')
        logger.info(
            f'Number of batches: {len(threat_response)}\nCreated threats: {created_counter}\nFailed threat creation: {failed_counter}'
        )

    if args.output:
        save_output(args.output, threat_response)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: add_new_threats.py')

示例#18

0

显示文件

文件： edit_score.py 项目： cert-orangecyberdefense/datalake

def main(override_args=None):
    """Method to start the script"""
    # Load initial args
    parser = BaseScripts.start(
        'Edit scores of a specified list of ids (hashkeys)')
    parser.add_argument(
        'hashkeys',
        help='hashkeys of the threat to edit score.',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input_file',
        help='hashkey txt file, with one hashkey by line.',
    )
    parser.add_argument(
        '-t',
        '--threat_types',
        nargs='+',
        help=
        'choose specific threat types and their score, like: ddos 50 scam 15',
        default=[],
        action='append',
    )
    parser.add_argument(
        '-w',
        '--whitelist',
        help=
        'Whitelist the input, equivalent to setting all threat types at 0.',
        action='store_true',
    )
    parser.add_argument(
        '--permanent',
        help=
        '''Permanent: all values will override any values provided by both newer and
            older IOCs. Newer IOCs with override_type permanent can still override old permanent changes.
            temporary: all values should override any values provided by older IOCs,
            but not newer ones.''',
        action='store_true',
    )
    parser.add_argument(
        '--lock',
        help=
        'sets override_type to lock. Scores won\'t be updated by the algorithm for three months. Default is '
        'temporary',
        action='store_true',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: edit_score.py')

    if not args.hashkeys and not args.input_file:
        parser.error("either a hashkey or an input_file is required")

    if args.permanent and args.lock:
        parser.error("Only one override type is authorized")

    if args.permanent:
        override_type = OverrideType.PERMANENT
    elif args.lock:
        override_type = OverrideType.LOCK
    else:
        override_type = OverrideType.TEMPORARY

    if args.whitelist:
        parsed_threat_type = get_whitelist_threat_types()
    else:
        args.threat_types = flatten_list(args.threat_types)
        if not args.threat_types or len(args.threat_types) % 2 != 0:
            parser.error(
                "threat_types invalid ! should be like: ddos 50 scam 15")
        parsed_threat_type = parse_threat_types(args.threat_types)
    # removing duplicates while preserving order
    hashkeys = args.hashkeys
    if args.input_file:
        retrieve_hashkeys_from_file(args.input_file, hashkeys)
        if not hashkeys:
            raise parser.error('No hashkey found in the input file.')
    hashkeys_chunks = list(
        split_list(
            list(OrderedDict.fromkeys(hashkeys)) if hashkeys else [], 100))

    dtl = Datalake(env=args.env, log_level=args.loglevel)
    response_list = []
    for index, hashkeys in enumerate(hashkeys_chunks):
        try:
            dtl.Threats.edit_score_by_hashkeys(hashkeys, parsed_threat_type,
                                               override_type)
        except ValueError as e:
            logger.warning(
                f'\x1b[6;30;41mBATCH {str(index+1)}/{len(list(hashkeys_chunks))}: FAILED\x1b[0m'
            )
            for hashkey in hashkeys:
                response_list.append(hashkey + ': FAILED')
                logger.warning(f'\x1b[6;30;41m{hashkey} : FAILED\x1b[0m')
            logger.warning(e)
        else:
            logger.info(
                f'\x1b[6;30;42mBATCH {str(index+1)}/{len(list(hashkeys_chunks))}: OK\x1b[0m'
            )
            for hashkey in hashkeys:
                response_list.append(hashkey + ': OK')

    if args.output:
        save_output(args.output, response_list)
        logger.info(f'Results saved in {args.output}\n')
    logger.debug(f'END: edit_score.py')