Python get_list_of_blobs示例，va_dt_common.common.get_list_of_blobs Python示例

示例#1

0

显示文件

文件： main.py 项目： gva-jhabte/application-repo

def main():

    # keep a record of key data items so we can log what we've done
    auditor = Auditor(JOB_NAME, r'../../config/va_auditor.yaml')
    auditor.commencement_time = datetime.datetime.today()

    # make sure the temp file isn't there from a previous run
    if os.path.exists(TEMP_FILE):
        os.remove(TEMP_FILE)

    # get details of qids from KB files
    qids = {}
    for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET,
                                     KB_SOURCE_BLOB):
        for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET,
                                       blob.name):
            try:
                qids.update(extract_kb(line))

            except Exception as ex:
                print(f"Exception in processing KB entries: {ex}")
                print(line)
                sys.exit()

    # main loop
    for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET,
                                     SOURCE_BLOB):
        for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET,
                                       blob.name):
            auditor.records_read = auditor.records_read + 1
            try:
                records = extract_details(line, qids)

            except Exception as ex:
                print(f"Exception in extracting QVM record: {ex}")
                print(line)
                sys.exit()

            for record in records:
                try:
                    dt.write_json_line(record, TEMP_FILE)
                    auditor.records_written = auditor.records_written + 1

                except Exception as ex:
                    print(f"Exception in writing json line: {ex}")
                    print(record)
                    sys.exit()

    dt.save_file_to_bucket(TEMP_FILE, TARGET_PROJECT, TARGET_BUCKET,
                           TARGET_BLOB)

    # clean up the temp file
    if os.path.exists(TEMP_FILE):
        os.remove(TEMP_FILE)

    auditor.completion_time = datetime.datetime.today()
    auditor.log_event()

示例#2

0

显示文件

文件： main.py 项目： gva-jhabte/application-repo

def main(run_date):
    
    # keep a record of key data items so we can log what we've done
    with Auditor(data_set=JOB_NAME) as auditor:
        
        graph = load_cmdb_graph(JOB_NAME, SOURCE_PROJECT, SOURCE_BUCKET, CMDB_GRAPH_BLOB)

        # set up a temp file for saving to
        # set the auditor to automatically track the written records 
        temp_file = dt.temp_file(JOB_NAME, auditor)
        
        # the main processing loop
        for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET,
                                         VM_FINDINGS_BLOB + '.*' + datetime.date.strftime(run_date, '%Y-%m-%d')):
            
            print(blob.name)
            
            for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name):
                auditor.records_read = auditor.records_read + 1
                vm_finding = json.loads(line)
                by_ip = find_record_in_graph(graph, vm_finding.get('IP'), vm_finding.get('NETBIOS'))
                merged = {**vm_finding, **by_ip}
                temp_file.write_json_line(merged)
        
        blob_name = TARGET_BLOB.replace('%date', '%Y-%m-%d')
        blob_name = run_date.strftime(blob_name)
    
        temp_file.save_to_bucket(TARGET_PROJECT, TARGET_BUCKET, blob_name)

示例#3

0

显示文件

文件： main.py 项目： gva-jhabte/application-repo

def main(run_date):

    # keep a record of key data items so we can log what we've done
    with Auditor(JOB_NAME, r'../../config/va_auditor.yaml') as auditor:

        # set up a temp file to save the records to
        temp_file = dt.temp_file(JOB_NAME, auditor)

        records = {}

        # the main loop
        for blob in dt.get_list_of_blobs(
                SOURCE_PROJECT, SOURCE_BUCKET, SOURCE_BLOB + '.*' +
                datetime.date.strftime(run_date, '%Y-%m-%d')):
            for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET,
                                           blob.name):
                details = extract_details(line)
                if details[0] in records:
                    records[details[0]].append(details[1])
                else:
                    records[details[0]] = [details[1]]

        for record in records:
            json_line = {"QID": record, "CVES": records[record]}
            temp_file.write_json_line(json_line)

        blob_name = TARGET_BLOB.replace('%date', '%Y-%m-%d')
        blob_name = run_date.strftime(blob_name)
        temp_file.save_to_bucket(TARGET_PROJECT, TARGET_BUCKET, blob_name)

示例#4

0

显示文件

文件： main.py 项目： gva-jhabte/application-repo

def load_cmdb_graph(JOB_NAME, SOURCE_PROJECT, SOURCE_BUCKET, SOURCE_BLOB):
    graph_file = dt.temp_file(JOB_NAME + '_GRAPH')
    # copy the graph file
    for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET, SOURCE_BLOB):
        with open(graph_file.file_name, 'wb') as file:
            blob.download_to_file(file)
    # open the graph file
    graph = nx.read_graphml(graph_file.file_name)
    print("Status: {} nodes and {} edges".format(len(graph.nodes), len(graph.edges)))
    return graph

示例#5

0

显示文件

def process_job(job_name, source_blob, target_blob):

    # keep a record of key data items so we can log what we've done
    with Auditor(data_set=job_name) as auditor:
        # set up a temp file for saving to
        # set the auditor to automatically track the written records
        temp_file = dt.temp_file(job_name, auditor)

        # we can't be sure today's files will be present, so look for the latest files
        for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET,
                                         source_blob):
            # we want the whole file, so download it all at once.
            payload = blob.download_as_string()
            json_block = json.loads(payload)
            for json_record in json_block:
                auditor.records_read = auditor.records_read + 1
                temp_file.write_json_line(json_record)

        temp_file.save_to_bucket(TARGET_PROJECT, TARGET_BUCKET, target_blob)

示例#6

0

显示文件

def main(run_date):

    # keep a record of key data items so we can log what we've done
    auditor = Auditor(JOB_NAME, r'../../config/va_auditor.yaml')
    auditor.commencement_time = datetime.datetime.today()

    # set up a temp file for saving to
    # set the auditor to automatically track the written records
    temp_file = dt.temp_file(JOB_NAME, auditor)

    #Create QVM all report.

    # Takes in CVE summary (CVEId, CVSS data, QID, MFL/Exploit data). Key by CVEId
    # Takes in QID-CVE map. Can search by CVE or QID, many->many relationship
    # Takes in Asset findings (QVM == Qualys machine scan results, along with CMDB data. Key by QID, IP Address
    # Takes in Qualys descriptions and such like). Key by IP address
    # Once all data available, create triage rating based upon OLD triage algo and add. Then output as CSV(?)

    # Generator across Asset findings (each will have an IP, some CMDB data and a QID). Then get CVE from QIDCVEMap to get
    # CVE summary data. Get triage based upon compounded data from Triage subroutine, and add any QID description needed.
    # Then output as csv (possibly? Still to do...)

    # Get CVE summary data
    CVESummaries = {}
    for blob in dt.get_list_of_blobs(
            SOURCE_PROJECT, SOURCE_BUCKET, CVE_SUMMARY_SOURCE_BLOB_PATH +
            '.*' + datetime.datetime.strftime(run_date, '%Y-%m-%d')):
        for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET,
                                       blob.name):
            data_record = json.loads(line)
            CVESummaries[data_record['CVE']] = data_record

    # Likewise QID summaries (will have the QID verbose description on it)
    QIDSummaries = {}
    for blob in dt.get_list_of_blobs(
            SOURCE_PROJECT, SOURCE_BUCKET, QID_SUMMARY_SOURCE_BLOB_PATH +
            '.*' + datetime.datetime.strftime(run_date, '%Y-%m-%d')):
        for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET,
                                       blob.name):
            data_record = json.loads(line)
            QIDSummaries[data_record['QID']] = data_record

    # And finally likewise the QID -> CVE map data. This is many <-> many, so collect it as sets of CVE Ids
    # which are keyed by the QID in question, as it will be searched by QID.
    CVEsForAllQIDs = {}
    for blob in dt.get_list_of_blobs(
            SOURCE_PROJECT, SOURCE_BUCKET, QID_CVE_SOURCE_BLOB_PATH + '.*' +
            datetime.datetime.strftime(run_date, '%Y-%m-%d')):
        for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET,
                                       blob.name):
            data_record = json.loads(line)
            if data_record['QID'] in CVEsForAllQIDs:
                # Add to existing set
                CVEsForAllQIDs[data_record['QID']].add(data_record['CVE'])
            else:
                # New item on dict creating a new set
                CVEsForAllQIDs[data_record['QID']] = {data_record['CVE']}

    # Now, parse the whole finding set retrieving the enrichment data from the existing indices
    for blob in dt.get_list_of_blobs(
            SOURCE_PROJECT, SOURCE_BUCKET, ASSET_FINDINGS_SOURCE_BLOB_PATH +
            '.*' + datetime.datetime.strftime(run_date, '%Y-%m-%d')):
        for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET,
                                       blob.name):
            finding = json.loads(line)

            # Do some column renames where appropriate to match VSM reporting names
            finding['VulnID'] = finding.pop('QID')
            finding['ScanScore'] = finding.pop('SEVERITY')

            if 'ENVIRONMENT' in finding and not finding[
                    'ENVIRONMENT'] is None and finding['ENVIRONMENT'].upper(
                    )[:4] == 'PROD':
                serverIsProduction = True
            else:
                serverIsProduction = False

            if 'CBP' in finding:
                CBP = getMaxCBP(finding['CBP'])
                # Homogenise the values
                if 'NONE' in CBP.upper():
                    CBP = ''
            else:
                CBP = ''  # Presumes no CBP if no data returned. May need to revisit

            # Return the CBP value to the findings dict so that its duplicates are eliminated
            finding['CBP'] = CBP

            # Add various keys that are missing in some cases with empty values to the
            # finding so that the output data is consistent in the fields it presents

            if not 'PORT' in finding or finding['PORT'] is None:
                finding['PORT'] = ''
            if not 'SOX' in finding or finding['SOX'] is None:
                finding['SOX'] = 'false'
            if not 'STEWARD' in finding or finding['STEWARD'] is None:
                finding['STEWARD'] = ''
            if not 'CMDB_OS' in finding or finding['CMDB_OS'] is None:
                finding['CMDB_OS'] = ''
            if not 'CMDB_OS_VERSION' in finding or finding[
                    'CMDB_OS_VERSION'] is None:
                finding['CMDB_OS_VERSION'] = ''

            # Retrieve the QID summary for the finding
            if finding['VulnID'] in QIDSummaries:
                qidSummary = QIDSummaries[finding['VulnID']]
            else:
                # Got a QID with no summary, so build a dummy one. Should really not happen.
                qidSummary = {
                    'QID': finding['VulnID'],
                    'Patchable': 'Unknown',
                    'Published_Date': 'Unknown',
                    'baseScore': 0,
                    'availabilityImpact': 'NONE',
                    'confidentialityImpact': 'NONE',
                    'integrityImpact': 'NONE',
                    'VulnerabilityName': '',
                    'Category': '',
                    'Solution': '',
                    'VendorNotes': ''
                }

            # Get all the CVEs associated with the finding (may be more than one)
            if finding['VulnID'] in CVEsForAllQIDs:
                # Code to generate triage based upon matching CVE data
                CVEIdsForQID = CVEsForAllQIDs[finding['VulnID']]

                # Get all the summaries. The odd selector is Dictionary Comprehension syntax and can be read as
                # 'Create a new dictionary (keys:values) based on the keys and values from CVESummaries if the key for
                # an entry in CVESummaries is in CVEsForQID'
                CVESummariesForQID = {
                    k: v
                    for (k, v) in CVESummaries.items() if k in CVEIdsForQID
                }

                # Get a single line rollup of all the CVE data for the QID that can then be used for both triage and return data.
                cveSummaryForQID = CVESummaryForQID(CVESummariesForQID)

                # The triage will rely on the highest/worst values for any of the CVEs returned, so pass the generator for those into
                # a routine to derive that.
                TriageString = Triage(cveSummaryForQID['MFL'],
                                      cveSummaryForQID['BaseScore'],
                                      cveSummaryForQID['Exploit_Known'],
                                      cveSummaryForQID['UserInteraction'],
                                      serverIsProduction, CBP,
                                      cveSummaryForQID['Confidentiality'],
                                      cveSummaryForQID['Integrity'],
                                      cveSummaryForQID['Availability'])

                # Finally, bundle the whole lot together as a dict out output data.
                data_out = dict(finding,
                                **cveSummaryForQID)  # concatenates these dicts

            else:  # QID has no matching CVE/CVSS data. Generate triage based off Qualys data. TODO Find correct Algo for this
                # Prepare a dict to look like the CVSS one. Score and vectors are taken from the QID summary
                # UI is presumed to be false, as this data is not available for QID findings (and QID findings tend
                # to be stuff like unpatched software which require no UI anyway)
                fakeCVESummary = {
                    'CVE': '',
                    'Confidentiality':
                    qidSummary['confidentialityImpact'].upper(),
                    'Integrity': qidSummary['integrityImpact'].upper(),
                    'Availability': qidSummary['availabilityImpact'].upper(),
                    'UserInteraction': False,
                    'BaseScore': float(qidSummary['baseScore']),
                    'MFL': False,
                    'Exploit_Known': False,
                    'MFLCVEs': '',
                    'MFLCount': 0
                }

                # Prepare a Triage string based upon the QID data as loaded into the fake CVE summary above
                TriageString = Triage(fakeCVESummary['MFL'],
                                      fakeCVESummary['BaseScore'],
                                      fakeCVESummary['Exploit_Known'],
                                      fakeCVESummary['UserInteraction'],
                                      serverIsProduction, CBP,
                                      fakeCVESummary['Confidentiality'],
                                      fakeCVESummary['Integrity'],
                                      fakeCVESummary['Availability'])

                # And create the reportLine much as before
                data_out = dict(finding,
                                **fakeCVESummary)  # concatenates these dicts

            # Add QIDSummary data to the output
            data_out['Patchable'] = qidSummary[
                'Patchable']  # Add the required fields from the QID summary
            data_out['Published_Date'] = qidSummary['Published_Date']
            data_out['VulnerabilityName'] = qidSummary.get(
                'VulnerabilityName') or ''
            data_out['Category'] = qidSummary.get('Category') or ''
            data_out['Solution'] = qidSummary.get('Solution') or ''
            data_out['VendorReferences'] = qidSummary.get(
                'VendorReferences') or ''

            # Add the triage string
            data_out[
                'TriagedRating'] = TriageString  # Adds the triaged value to the return dict

            # Derive the ScanType from the supplied ASSET_TYPE if it is present
            if not 'ASSET_TYPE' in finding or finding['ASSET_TYPE'] is None:
                data_out[
                    'ScanType'] = ''  # Don't set this if there is no ASSET_TYPE. May change.
            elif finding['ASSET_TYPE'] == 'server':
                data_out['ScanType'] = 'I'  # Internal
            elif finding['ASSET_TYPE'] == 'workstation':
                data_out['ScanType'] = 'E'  # Endpoint
            else:
                data_out[
                    'ScanType'] = ''  # Should never be hit, but assures that a value of some sort is returned

            # Add the derived date-based data
            data_out['ReportDate'] = datetime.datetime.now().strftime(
                '%Y-%m-%dT%H:%M:%SZ')
            data_out['Cycle'] = datetime.datetime.now().strftime('%m %Y')

            firstFoundDate = datetime.datetime.strptime(
                finding['FIRST_FOUND_DATETIME'], '%Y-%m-%dT%H:%M:%SZ')
            delta = datetime.datetime.now() - firstFoundDate
            data_out['DaysSinceFirstFound'] = delta.days

            if 'High' in TriageString:
                targetRemediationDate = firstFoundDate + timedelta(weeks=4)
            elif 'Medium' in TriageString:
                targetRemediationDate = firstFoundDate + timedelta(
                    days=183
                )  # 6 months is a variable time. Pick a good approximation
            else:  # Low
                targetRemediationDate = firstFoundDate + timedelta(
                    days=365
                )  # as is one year (think leap years). Again, approximate
            data_out['RemediationDue'] = targetRemediationDate.strftime(
                '%Y-%m-%dT%H:%M:%SZ')

            data_out[
                'TargetBreached'] = targetRemediationDate < datetime.datetime.now(
                )

            # Other fields
            data_out['Concat'] = finding['ID'] + '-' + finding['VulnID']

            # Write out line to temp file (calls json.dumps to write string out)
            temp_file.write_json_line(data_out)

    # finally write out the temp file to the bucket after incorporating the run_date
    preFormat = TARGET_BLOB.replace('%date', '%Y-%m-%d')
    destinationFile = run_date.strftime(preFormat)
    temp_file.save_to_bucket(TARGET_PROJECT, TARGET_BUCKET, destinationFile)

    # No need to explicitly remove the local file. temp_file class has a destructor that will do that.
    temp_file = None
    auditor.completion_time = datetime.datetime.today()
    auditor.log_event()

示例#7

0

显示文件

def main():

    # keep a record of key data items so we can log what we've done
    with Auditor(JOB_NAME, r'../../config/va_auditor.yaml') as auditor:

        # set up a temp file for saving to
        # set the auditor to automatically track the written records
        temp_file = dt.temp_file(JOB_NAME, auditor)

        # create a list of the CVEs in these two sets
        mfl_blob = dt.select_file_records(SOURCE_PROJECT, SOURCE_BUCKET,
                                          MFL_LIST_BLOB)
        mfl_index = set(jl.create_index(mfl_blob, 'CVE'))
        edb_blob = dt.select_file_records(SOURCE_PROJECT, SOURCE_BUCKET,
                                          CVES_WITH_EXPLOITS_BLOB)
        edb_index = set(jl.create_index(edb_blob, 'CVE'))

        # the main loop
        for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET,
                                         NVD_CVE_SUMMARY_BLOB):
            for nvd_cve_summary_line in dt.read_blob_lines(
                    SOURCE_PROJECT, SOURCE_BUCKET, blob.name):
                record = json.loads(nvd_cve_summary_line)

                result = {}
                result['CVE'] = record.get('CVE')

                if record['v2.0'] != {}:
                    result['Confidentiality'] = record['v2.0'].get(
                        'confidentialityImpact')
                    result['Integrity'] = record['v2.0'].get('integrityImpact')
                    result['Availability'] = record['v2.0'].get(
                        'availabilityImpact')
                    result['UserInteraction'] = record['v2.0'].get(
                        'userInteractionRequired')
                    result['BaseScore'] = record['v2.0'].get('baseScore')
                elif record['v3.0'] != {}:
                    result['Confidentiality'] = record['v3.0'].get(
                        'confidentialityImpact')
                    result['Integrity'] = record['v3.0'].get('integrityImpact')
                    result['Availability'] = record['v3.0'].get(
                        'availabilityImpact')
                    result['UserInteraction'] = record['v3.0'].get(
                        'userInteraction')
                    result['BaseScore'] = record['v3.0'].get('baseScore')
                else:
                    result['Confidentiality'] = ''
                    result['Integrity'] = ''
                    result['Availability'] = ''
                    result['UserInteraction'] = ''
                    result['BaseScore'] = ''

                # could have also implemented by adding an MFL=True
                # column to the MFL set and joined on CVE
                result = jl.set_value(result, 'MFL',
                                      lambda x: x.get('CVE') in mfl_index)
                result = jl.set_value(result, 'Exploit_Known',
                                      lambda x: x.get('CVE') in edb_index)
                temp_file.write_json_line(result)

        # save the temp file to the bucket
        temp_file.save_to_bucket(TARGET_PROJECT, TARGET_BUCKET, TARGET_BLOB)

示例#8

0

显示文件

文件： main.py 项目： gva-jhabte/application-repo

def isIP(IP):
    if IP.count(".") == 3 and all(isIPv4_part(i) for i in IP.split(".")):
        return True
    if IP.count(":") == 7 and all(isIPv6_part(i) for i in IP.split(":")):
        return True
    return False


cmdb_graph = nx.DiGraph()

errorbin = dt.temp_file(JOB_NAME + '-errorbin')

counter = 0
print('adding relationships')
for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET,
                                 RELATIONSHIP_BLOB):
    for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name):
        try:
            record = json.loads(line)
            counter = counter + 1

            # pre-defined edges - most aren't needed but we can't tell at this point
            parent_sys_id = extract_sysid(record.get('parent_link'))
            child_sys_id = extract_sysid(record.get('child_link'))

            # predefined relationships cover both directions, so
            # split the type and build the reciprocal relationship
            relationship = record.get('type_display_value',
                                      'unknown by::unknown to').split('::')
            cmdb_graph.add_edge(child_sys_id,
                                parent_sys_id,