Python get_dlp_job示例，google.cloud.dlp.get_dlp_job Python示例

示例#1

0

显示文件

文件： risk.py 项目： GoogleCloudPlatform/python-docs-samples

    def callback(message):
        if (message.attributes['DlpJobName'] == operation.name):
            # This is the message we're looking for, so acknowledge it.
            message.ack()

            # Now that the job is done, fetch the results and print them.
            job = dlp.get_dlp_job(operation.name)
            histogram_buckets = (
                job.risk_details
                   .l_diversity_result
                   .sensitive_value_frequency_histogram_buckets)
            # Print bucket stats
            for i, bucket in enumerate(histogram_buckets):
                print('Bucket {}:'.format(i))
                print('   Bucket size range: [{}, {}]'.format(
                    bucket.sensitive_value_frequency_lower_bound,
                    bucket.sensitive_value_frequency_upper_bound))
                for value_bucket in bucket.bucket_values:
                    print('   Quasi-ID values: {}'.format(
                        map(get_values, value_bucket.quasi_ids_values)))
                    print('   Class size: {}'.format(
                        value_bucket.equivalence_class_size))
                    for value in value_bucket.top_sensitive_values:
                        print(('   Sensitive value {} occurs {} time(s)'
                               .format(value.value, value.count)))
            subscription.set_result(None)
        else:
            # This is not the message we're looking for.
            message.drop()

示例#2

0

显示文件

文件： risk.py 项目： GoogleCloudPlatform/python-docs-samples

    def callback(message):
        if (message.attributes['DlpJobName'] == operation.name):
            # This is the message we're looking for, so acknowledge it.
            message.ack()

            # Now that the job is done, fetch the results and print them.
            job = dlp.get_dlp_job(operation.name)
            histogram_buckets = (job.risk_details
                                    .categorical_stats_result
                                    .value_frequency_histogram_buckets)
            # Print bucket stats
            for i, bucket in enumerate(histogram_buckets):
                print('Bucket {}:'.format(i))
                print('   Most common value occurs {} time(s)'.format(
                    bucket.value_frequency_upper_bound))
                print('   Least common value occurs {} time(s)'.format(
                    bucket.value_frequency_lower_bound))
                print('   {} unique values total.'.format(
                    bucket.bucket_size))
                for value in bucket.bucket_values:
                    print('   Value {} occurs {} time(s)'.format(
                        value.value.integer_value, value.count))
            subscription.set_result(None)
        else:
            # This is not the message we're looking for.
            message.drop()

示例#3

0

显示文件

    def callback(message):
        try:
            if (message.attributes['DlpJobName'] == operation.name):
                # This is the message we're looking for, so acknowledge it.
                message.ack()

                # Now that the job is done, fetch the results and print them.
                job = dlp.get_dlp_job(operation.name)
                histogram_buckets = (job.risk_details
                                        .categorical_stats_result
                                        .value_frequency_histogram_buckets)
                # Print bucket stats
                for i, bucket in enumerate(histogram_buckets):
                    print('Bucket {}:'.format(i))
                    print('   Most common value occurs {} time(s)'.format(
                        bucket.value_frequency_upper_bound))
                    print('   Least common value occurs {} time(s)'.format(
                        bucket.value_frequency_lower_bound))
                    print('   {} unique values total.'.format(
                        bucket.bucket_size))
                    for value in bucket.bucket_values:
                        print('   Value {} occurs {} time(s)'.format(
                            value.value.integer_value, value.count))
                # Signal to the main thread that we can exit.
                job_done.set()
            else:
                # This is not the message we're looking for.
                message.drop()
        except Exception as e:
            # Because this is executing in a thread, an exception won't be
            # noted unless we print it manually.
            print(e)
            raise

示例#4

0

显示文件

    def callback(message):
        try:
            if message.attributes["DlpJobName"] == operation.name:
                # This is the message we're looking for, so acknowledge it.
                message.ack()

                # Now that the job is done, fetch the results and print them.
                job = dlp.get_dlp_job(operation.name)
                if job.inspect_details.result.info_type_stats:
                    for finding in job.inspect_details.result.info_type_stats:
                        print(
                            "Info type: {}; Count: {}".format(
                                finding.info_type.name, finding.count
                            )
                        )
                else:
                    print("No findings.")

                # Signal to the main thread that we can exit.
                job_done.set()
            else:
                # This is not the message we're looking for.
                message.drop()
        except Exception as e:
            # Because this is executing in a thread, an exception won't be
            # noted unless we print it manually.
            print(e)
            raise

示例#5

0

显示文件

文件： inspect_content.py 项目： GoogleCloudPlatform/python-docs-samples

    def callback(message):
        try:
            if (message.attributes['DlpJobName'] == operation.name):
                # This is the message we're looking for, so acknowledge it.
                message.ack()

                # Now that the job is done, fetch the results and print them.
                job = dlp.get_dlp_job(operation.name)
                if job.inspect_details.result.info_type_stats:
                    for finding in job.inspect_details.result.info_type_stats:
                        print('Info type: {}; Count: {}'.format(
                            finding.info_type.name, finding.count))
                else:
                    print('No findings.')

                # Signal to the main thread that we can exit.
                job_done.set()
            else:
                # This is not the message we're looking for.
                message.drop()
        except Exception as e:
            # Because this is executing in a thread, an exception won't be
            # noted unless we print it manually.
            print(e)
            raise

示例#6

0

显示文件

文件： risk.py 项目： chappers00/teslacam-analyser

    def callback(message):
        if message.attributes["DlpJobName"] == operation.name:
            # This is the message we're looking for, so acknowledge it.
            message.ack()

            # Now that the job is done, fetch the results and print them.
            job = dlp.get_dlp_job(operation.name)
            histogram_buckets = (
                job.risk_details.k_anonymity_result.equivalence_class_histogram_buckets
            )
            # Print bucket stats
            for i, bucket in enumerate(histogram_buckets):
                print("Bucket {}:".format(i))
                if bucket.equivalence_class_size_lower_bound:
                    print(
                        "   Bucket size range: [{}, {}]".format(
                            bucket.equivalence_class_size_lower_bound,
                            bucket.equivalence_class_size_upper_bound,
                        )
                    )
                    for value_bucket in bucket.bucket_values:
                        print(
                            "   Quasi-ID values: {}".format(
                                map(get_values, value_bucket.quasi_ids_values)
                            )
                        )
                        print(
                            "   Class size: {}".format(
                                value_bucket.equivalence_class_size
                            )
                        )
            subscription.set_result(None)
        else:
            # This is not the message we're looking for.
            message.drop()

示例#7

0

显示文件

文件： risk.py 项目： chappers00/teslacam-analyser

    def callback(message):
        if message.attributes["DlpJobName"] == operation.name:
            # This is the message we're looking for, so acknowledge it.
            message.ack()

            # Now that the job is done, fetch the results and print them.
            job = dlp.get_dlp_job(operation.name)
            histogram_buckets = (
                job.risk_details.k_map_estimation_result.k_map_estimation_histogram
            )
            # Print bucket stats
            for i, bucket in enumerate(histogram_buckets):
                print("Bucket {}:".format(i))
                print(
                    "   Anonymity range: [{}, {}]".format(
                        bucket.min_anonymity, bucket.max_anonymity
                    )
                )
                print("   Size: {}".format(bucket.bucket_size))
                for value_bucket in bucket.bucket_values:
                    print(
                        "   Values: {}".format(
                            map(get_values, value_bucket.quasi_ids_values)
                        )
                    )
                    print(
                        "   Estimated k-map anonymity: {}".format(
                            value_bucket.estimated_anonymity
                        )
                    )
            subscription.set_result(None)
        else:
            # This is not the message we're looking for.
            message.drop()

示例#8

0

显示文件

    def callback(message):
        if message.attributes["DlpJobName"] == operation.name:
            # This is the message we're looking for, so acknowledge it.
            message.ack()

            # Now that the job is done, fetch the results and print them.
            job = dlp.get_dlp_job(request={"name": operation.name})
            histogram_buckets = (
                job.risk_details.l_diversity_result.
                sensitive_value_frequency_histogram_buckets  # noqa: E501
            )
            # Print bucket stats
            for i, bucket in enumerate(histogram_buckets):
                print("Bucket {}:".format(i))
                print("   Bucket size range: [{}, {}]".format(
                    bucket.sensitive_value_frequency_lower_bound,
                    bucket.sensitive_value_frequency_upper_bound,
                ))
                for value_bucket in bucket.bucket_values:
                    print("   Quasi-ID values: {}".format(
                        map(get_values, value_bucket.quasi_ids_values)))
                    print("   Class size: {}".format(
                        value_bucket.equivalence_class_size))
                    for value in value_bucket.top_sensitive_values:
                        print(
                            ("   Sensitive value {} occurs {} time(s)".format(
                                value.value, value.count)))
            subscription.set_result(None)
        else:
            # This is not the message we're looking for.
            message.drop()

示例#9

0

显示文件

文件： risk.py 项目： chappers00/teslacam-analyser

    def callback(message):
        if message.attributes["DlpJobName"] == operation.name:
            # This is the message we're looking for, so acknowledge it.
            message.ack()

            # Now that the job is done, fetch the results and print them.
            job = dlp.get_dlp_job(operation.name)
            histogram_buckets = (
                job.risk_details.categorical_stats_result.value_frequency_histogram_buckets
            )
            # Print bucket stats
            for i, bucket in enumerate(histogram_buckets):
                print("Bucket {}:".format(i))
                print(
                    "   Most common value occurs {} time(s)".format(
                        bucket.value_frequency_upper_bound
                    )
                )
                print(
                    "   Least common value occurs {} time(s)".format(
                        bucket.value_frequency_lower_bound
                    )
                )
                print("   {} unique values total.".format(bucket.bucket_size))
                for value in bucket.bucket_values:
                    print(
                        "   Value {} occurs {} time(s)".format(
                            value.value.integer_value, value.count
                        )
                    )
            subscription.set_result(None)
        else:
            # This is not the message we're looking for.
            message.drop()

示例#10

0

显示文件

    def callback(message):
        try:
            if (message.attributes['DlpJobName'] == operation.name):
                # This is the message we're looking for, so acknowledge it.
                message.ack()

                # Now that the job is done, fetch the results and print them.
                job = dlp.get_dlp_job(operation.name)
                results = job.risk_details.numerical_stats_result
                print('Value Range: [{}, {}]'.format(
                    results.min_value.integer_value,
                    results.max_value.integer_value))
                prev_value = None
                for percent, result in enumerate(results.quantile_values):
                    value = result.integer_value
                    if prev_value != value:
                        print('Value at {}% quantile: {}'.format(
                              percent, value))
                        prev_value = value
                # Signal to the main thread that we can exit.
                job_done.set()
            else:
                # This is not the message we're looking for.
                message.drop()
        except Exception as e:
            # Because this is executing in a thread, an exception won't be
            # noted unless we print it manually.
            print(e)
            raise

示例#11

0

显示文件

    def callback(message):
        try:
            if (message.attributes['DlpJobName'] == operation.name):
                # This is the message we're looking for, so acknowledge it.
                message.ack()

                # Now that the job is done, fetch the results and print them.
                job = dlp.get_dlp_job(operation.name)
                histogram_buckets = (job.risk_details
                                        .k_map_estimation_result
                                        .k_map_estimation_histogram)
                # Print bucket stats
                for i, bucket in enumerate(histogram_buckets):
                    print('Bucket {}:'.format(i))
                    print('   Anonymity range: [{}, {}]'.format(
                        bucket.min_anonymity, bucket.max_anonymity))
                    print('   Size: {}'.format(bucket.bucket_size))
                    for value_bucket in bucket.bucket_values:
                        print('   Values: {}'.format(
                            map(get_values, value_bucket.quasi_ids_values)))
                        print('   Estimated k-map anonymity: {}'.format(
                            value_bucket.estimated_anonymity))
                # Signal to the main thread that we can exit.
                job_done.set()
            else:
                # This is not the message we're looking for.
                message.drop()
        except Exception as e:
            # Because this is executing in a thread, an exception won't be
            # noted unless we print it manually.
            print(e)
            raise

示例#12

0

显示文件

文件： risk.py 项目： GoogleCloudPlatform/python-docs-samples

    def callback(message):
        if (message.attributes['DlpJobName'] == operation.name):
            # This is the message we're looking for, so acknowledge it.
            message.ack()

            # Now that the job is done, fetch the results and print them.
            job = dlp.get_dlp_job(operation.name)
            histogram_buckets = (job.risk_details
                                    .k_map_estimation_result
                                    .k_map_estimation_histogram)
            # Print bucket stats
            for i, bucket in enumerate(histogram_buckets):
                print('Bucket {}:'.format(i))
                print('   Anonymity range: [{}, {}]'.format(
                    bucket.min_anonymity, bucket.max_anonymity))
                print('   Size: {}'.format(bucket.bucket_size))
                for value_bucket in bucket.bucket_values:
                    print('   Values: {}'.format(
                        map(get_values, value_bucket.quasi_ids_values)))
                    print('   Estimated k-map anonymity: {}'.format(
                        value_bucket.estimated_anonymity))
            subscription.set_result(None)
        else:
            # This is not the message we're looking for.
            message.drop()

示例#13

0

显示文件

def resolve_DLP(data, context):
    """This function listens to the pub/sub notification from function above.

      As soon as it gets pub/sub notification, it picks up results from the
      DLP job and moves the file to sensitive bucket or nonsensitive bucket
      accordingly.
      Args:
          data: The Cloud Pub/Sub event

      Returns:
          None. Debug information is printed to the log.
      """
    # Get the targeted DLP job name that is created by the create_DLP_job function
    job_name = data['attributes']['DlpJobName']
    log('Received pub/sub notification from DLP job: {}'.format(job_name),
        severity=LOG_SEVERITY_INFO)

    # Get the DLP job details by the job_name
    job = dlp.get_dlp_job(request={'name': job_name})
    log('Job Name:{name}\nStatus:{status}'.format(name=job.name,
                                                  status=job.state),
        severity=LOG_SEVERITY_INFO)

    # Fetching Filename in Cloud Storage from the original dlpJob config.
    # See defintion of "JSON Output' in Limiting Cloud Storage Scans':
    # https://cloud.google.com/dlp/docs/inspecting-storage

    file_path = (job.inspect_details.requested_options.job_config.
                 storage_config.cloud_storage_options.file_set.url)
    file_name = file_path.split("/", 3)[3]

    info_type_stats = job.inspect_details.result.info_type_stats
    source_bucket = storage_client.get_bucket(STAGING_BUCKET)
    source_blob = source_bucket.blob(file_name)
    if (len(info_type_stats) > 0):
        # Found at least one sensitive data
        for stat in info_type_stats:
            log('Found {stat_cnt} instances of {stat_type_name}.'.format(
                stat_cnt=stat.count, stat_type_name=stat.info_type.name),
                severity=LOG_SEVERITY_WARNING)
        log('Moving item to sensitive bucket', severity=LOG_SEVERITY_DEBUG)
        destination_bucket = storage_client.get_bucket(SENSITIVE_BUCKET)
        source_bucket.copy_blob(
            source_blob, destination_bucket,
            file_name)  # copy the item to the sensitive bucket
        source_blob.delete()  # delete item from the quarantine bucket

    else:
        # No sensitive data found
        log('Moving item to non-sensitive bucket', severity=LOG_SEVERITY_DEBUG)
        destination_bucket = storage_client.get_bucket(NONSENSITIVE_BUCKET)
        source_bucket.copy_blob(
            source_blob, destination_bucket,
            file_name)  # copy the item to the non-sensitive bucket
        source_blob.delete()  # delete item from the quarantine bucket
    log('classifying file [{}] Finished'.format(file_name),
        severity=LOG_SEVERITY_DEBUG)

示例#14

0

显示文件

文件： risk.py 项目： pheonixblade9/python-docs-samples

    def callback(message):
        if message.attributes["DlpJobName"] == operation.name:
            # This is the message we're looking for, so acknowledge it.
            message.ack()

            # Now that the job is done, fetch the results and print them.
            job = dlp.get_dlp_job(operation.name)
            results = job.risk_details.numerical_stats_result
            print("Value Range: [{}, {}]".format(
                results.min_value.integer_value,
                results.max_value.integer_value))
            prev_value = None
            for percent, result in enumerate(results.quantile_values):
                value = result.integer_value
                if prev_value != value:
                    print("Value at {}% quantile: {}".format(percent, value))
                    prev_value = value
            subscription.set_result(None)
        else:
            # This is not the message we're looking for.
            message.drop()

示例#15

0

显示文件

文件： risk.py 项目： GoogleCloudPlatform/python-docs-samples

    def callback(message):
        if (message.attributes['DlpJobName'] == operation.name):
            # This is the message we're looking for, so acknowledge it.
            message.ack()

            # Now that the job is done, fetch the results and print them.
            job = dlp.get_dlp_job(operation.name)
            results = job.risk_details.numerical_stats_result
            print('Value Range: [{}, {}]'.format(
                results.min_value.integer_value,
                results.max_value.integer_value))
            prev_value = None
            for percent, result in enumerate(results.quantile_values):
                value = result.integer_value
                if prev_value != value:
                    print('Value at {}% quantile: {}'.format(
                          percent, value))
                    prev_value = value
            subscription.set_result(None)
        else:
            # This is not the message we're looking for.
            message.drop()

示例#16

0

显示文件

    def callback(message):
        try:
            if (message.attributes['DlpJobName'] == operation.name):
                # This is the message we're looking for, so acknowledge it.
                message.ack()

                # Now that the job is done, fetch the results and print them.
                job = dlp.get_dlp_job(operation.name)
                histogram_buckets = (
                    job.risk_details
                       .l_diversity_result
                       .sensitive_value_frequency_histogram_buckets)
                # Print bucket stats
                for i, bucket in enumerate(histogram_buckets):
                    print('Bucket {}:'.format(i))
                    print('   Bucket size range: [{}, {}]'.format(
                        bucket.sensitive_value_frequency_lower_bound,
                        bucket.sensitive_value_frequency_upper_bound))
                    for value_bucket in bucket.bucket_values:
                        print('   Quasi-ID values: {}'.format(
                            map(get_values, value_bucket.quasi_ids_values)))
                        print('   Class size: {}'.format(
                            value_bucket.equivalence_class_size))
                        for value in value_bucket.top_sensitive_values:
                            print(('   Sensitive value {} occurs {} time(s)'
                                   .format(value.value, value.count)))
                # Signal to the main thread that we can exit.
                job_done.set()
            else:
                # This is not the message we're looking for.
                message.drop()
        except Exception as e:
            # Because this is executing in a thread, an exception won't be
            # noted unless we print it manually.
            print(e)
            raise

示例#17

0

显示文件

文件： main.py 项目： hammad42/data_loss_prevention_on_bigquery_Table

def dlp(request):
  from google.cloud import bigquery
  import os
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="C:\gcp_credentials\elaborate-howl-285701-105c2e8355a8.json"
  client_bigquery = bigquery.Client()#bigquery client
  import uuid
  import google.cloud.dlp
  import time
  
  
  uuid=str(uuid.uuid4())
  print(uuid)
  request_json = request.get_json()#json message received from http request

  if request_json:
    file_name=request_json["file_name"]
    print(file_name)
    #query of creating table start
    
  query="""

      create table `elaborate-howl-285701.context.{uuid}_dlp` as SELECT * FROM `elaborate-howl-285701.context.form_key_pair` 
  where file_name=\"{file_name}\";
  """.format(uuid=uuid,file_name=file_name)
  #query of creating table end
  job_config = bigquery.QueryJobConfig()
  query_job = client_bigquery.query(query, location="US", job_config=job_config)
  query_job.result()
  #dlp work start

  project='elaborate-howl-285701'
  bigquery_project='elaborate-howl-285701'
  dataset_id='context'
  table_id=uuid+'_dlp'
  min_likelihood=None,
  max_findings=None,
  parent = f"projects/{project}/locations/global"

  inspect_job_data = {
      'storage_config': {
          
          'big_query_options': {
              'table_reference': {
                  
                  'project_id': bigquery_project,
                  'dataset_id': dataset_id,
                  'table_id': table_id
                  
              },
              'identifying_fields':[
                  {
                    'name':'file_name',
                  }
              ],
              'excluded_fields':[
                  {
                    'name':'field_name',
                    'name':'time_stamp',
                    'name':'validated_field_name',
                    'name':'validated_field_value',
                    'name':'updated_date',
                    'name':'confidence',
                    'name':'updated_by',
                    'name':'key_x1',   
                    'name':'key_x2',
                    'name':'key_y1',
                    'name':'key_y2',
                    'name':'value_x1', 
                    'name':'value_x2',   
                    'name':'value_y1',
                    'name':'value_y2',
                    'name':'pageNumber',
                    'name':'id', 
                    'name':'type'

                  }
              ],
              
              'rows_limit':10000,
              'sample_method':'TOP',
          },
      },
      'inspect_config': {
          'info_types': [{'name': 'FIRST_NAME'}, {'name': 'LAST_NAME'}, {'name': 'EMAIL_ADDRESS'},{'name': 'AGE'}, {'name': 'CREDIT_CARD_NUMBER'}, {'name': 'DATE'},{'name': 'DATE_OF_BIRTH'}, {'name': 'DOMAIN_NAME'}, {'name': 'EMAIL_ADDRESS'},
           {'name': 'US_EMPLOYER_IDENTIFICATION_NUMBER'}, {'name': 'US_INDIVIDUAL_TAXPAYER_IDENTIFICATION_NUMBER'},{'name': 'US_PREPARER_TAXPAYER_IDENTIFICATION_NUMBER'}, {'name': 'US_SOCIAL_SECURITY_NUMBER'}, {'name': 'US_VEHICLE_IDENTIFICATION_NUMBER'},
           {'name': 'US_TOLLFREE_PHONE_NUMBER'}, {'name': 'US_STATE'}, {'name': 'US_PASSPORT'},{'name': 'US_HEALTHCARE_NPI'}, {'name': 'GENDER'}, {'name': 'LOCATION'}, {'name': 'PASSPORT'}, {'name': 'PASSWORD'},
            {'name': 'PHONE_NUMBER'}, {'name': 'STREET_ADDRESS'},{'name': 'URL'}, {'name': 'US_BANK_ROUTING_MICR'}, {'name': 'US_DEA_NUMBER'},{'name': 'US_DRIVERS_LICENSE_NUMBER'}],
          "include_quote": True,
          "min_likelihood": 2,
      },
      'actions': [
          {
              'save_findings': {
                  'output_config':{
                      'table':{
                          'project_id': bigquery_project,
                          'dataset_id': dataset_id,
                          'table_id': '{}_job'.format(table_id)
                      }
                  }
                  
              },
          },
      ]
  }
  dlp = google.cloud.dlp_v2.DlpServiceClient()
  operation = dlp.create_dlp_job(parent=parent, inspect_job=inspect_job_data)

  time.sleep(200)



  #dlp work end
  #query for dropping created table
  query2="""
  drop table  `elaborate-howl-285701.context.{table_id}`;
    
  """.format(table_id=table_id)
  #query of creating table end
  job_config = bigquery.QueryJobConfig()
  query_job2 = client_bigquery.query(query2, location="US", job_config=job_config)
  query_job2.result()

  #checking rows in form_key_pair table

  destination_table = client_bigquery.get_table('elaborate-howl-285701.context.form_key_pair_dlp')  # Make an API request.
  print("before insertion {} rows.".format(destination_table.num_rows))



  #copy data loss prevention on desired form_key_pair_dlp
  query3="""
  INSERT INTO `elaborate-howl-285701.context.form_key_pair_dlp`
  SELECT * FROM `elaborate-howl-285701.context.{tableid2}_job`
  """.format(tableid2=table_id)
  print(query3)
  #query of creating table end
  job_config = bigquery.QueryJobConfig()
  query_job3 = client_bigquery.query(query3, location="US", job_config=job_config)
  query_job3.result()
  #time.sleep(30)
  #checking rows in form_key_pair table

  destination_table = client_bigquery.get_table('elaborate-howl-285701.context.form_key_pair_dlp')  # Make an API request.
  print("after insertion {} rows.".format(destination_table.num_rows))

  job = dlp.get_dlp_job(request={"name": operation.name})
  result_count=""
  if job.inspect_details.result.info_type_stats:

    for finding in job.inspect_details.result.info_type_stats:

        result_="Info type: {}; Count: {}".format(finding.info_type.name, finding.count)
        result_count=result_+result_count+'\n'
        print(result_count)


  #query for dropping dlp table
  query4="""
  drop table  `elaborate-howl-285701.context.{table_id2}_job`;
  """.format(table_id2=table_id)
  #query of creating table end
  job_config = bigquery.QueryJobConfig()
  query_job4 = client_bigquery.query(query4, location="US", job_config=job_config)
  query_job4.result()

  ## work for neo4j starts

  query5 = """
    select distinct  a.field_value, a.field_name, b.info_type.name as info_, b.likelihood from `elaborate-howl-285701.context.form_key_pair` a,
    `elaborate-howl-285701.context.form_key_pair_dlp` b
    where a.file_name=\"{file_name}\"
    and lower(a.field_value)=lower(b.quote);
    """.format(file_name=file_name)
  query_job5 = client_bigquery.query(
      query5,
    # Location must match that of the dataset(s) referenced in the query.
      location="US",
  )  # API request - starts the query

  df = query_job5.to_dataframe()
  f_value=[]
  for a in df.field_value:
    f_value.append(a)

  f_name=[]
  for b in df.field_name:
    f_name.append(b)

  info_name=[]
  for c in df.info_:
    info_name.append(c)    
  from neo4j import GraphDatabase
  import logging
  from neo4j.exceptions import ServiceUnavailable
  class App:


    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
    def close(self):
            # Don't forget to close the driver connection when you are finished with it
            self.driver.close()
    def create_friendship(self,file_name,field_value,field_name,info_):
            with self.driver.session() as session:
                # Write transactions allow the driver to handle retries and transient errors
                result = session.write_transaction(
                    self._create_and_return_friendship, file_name, field_value,field_name,info_)
                print(result)
                #for row in result:
                #    print("Created relation between: {n}, {m} ".format(n=row['n'], m=row['m']))
                #    print("Created relation between: {n}, {e} ".format(n=row['n'], e=row['e']))
                #    print("Created relation between: {e}, {m} ".format(e=row['e'], m=row['m']))
                #   print("Created relation between: {m}, {w} ".format(m=row['m'], w=row['w']))
    @staticmethod
    def _create_and_return_friendship(tx, file_name, field_value,field_name,info_):
        # To learn more about the Cypher syntax, see https://neo4j.com/docs/cypher-manual/current/
        # The Reference Card is also a good resource for keywords https://neo4j.com/docs/cypher-refcard/current/
        query = """
        merge (n:File {Name: $file_name})
        merge (m:FIELD {Name: $field_name})
        merge (e:VALUE {value: $field_value})
        merge (w:DLP_Classification {NAME: $info_})

        merge (n)-[p:CONTAINS_FIELD]->(m)
        merge (n)-[q:CONTAINS_VALUE]->(e)
        merge (e)-[r:TYPE_IS]->(m)
        merge (m)-[s:DATA_Classification]->(w)


        
        RETURN n, m, e, w, p, q, r, s
        """
        result = tx.run(query, file_name=file_name, field_value=field_value,field_name=field_name,info_=info_)
        try:
            return [{"n": row["n"]["name"], "e": row["e"]["address"]}
                    for row in result]
        # Capture any errors along with the query and data for traceability
        except ServiceUnavailable as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query, exception=exception))
            raise
  import itertools
  for (a,b,c) in zip(f_value,f_name,info_name):
    print(a+','+b+','+c)
    bolt_url = "neo4j+s://cfb079ca.databases.neo4j.io"
    user = "******"
    password = "******"
    app = App(bolt_url, user, password)
    app.create_friendship(file_name, a,b,c)
    app.close()


  

  return "df"