示例#1
0
def get_all_objects_for_project(db, project_id):
    result_type = get_project_column(db, project_id, 'result_type')
    object_store_files = []
    dps = query_db(
        db, """
        SELECT id
        FROM dataproviders
        WHERE project = %s
        """, [project_id])

    for dp in dps:
        clk_file_ref = query_db(db,
                                """
            SELECT file FROM uploads
            WHERE dp = %s
            """, [dp['id']],
                                one=True)

        if clk_file_ref is not None and clk_file_ref['file'] is not None:
            logger.info("upload record found: {}".format(clk_file_ref))
            object_store_files.append(clk_file_ref['file'])

    if result_type == "similarity_scores":
        similarity_files = get_project_similarity_files(db, project_id)
        object_store_files.extend(similarity_files)

    return object_store_files
示例#2
0
def insert_encoding_metadata(db, clks_filename, dp_id, receipt_token, encoding_count, block_count):
    logger.info("Adding metadata on encoded entities to database")
    sql_insertion_query = """
        INSERT INTO uploads
        (dp, token, file, count, block_count, state)
        VALUES
        (%s, %s, %s, %s, %s, %s)
        """

    with db.cursor() as cur:
        cur.execute(sql_insertion_query, [dp_id, receipt_token, clks_filename, encoding_count, block_count, 'pending'])
示例#3
0
def get_run_state(db, run_id):
    logger.info("Selecting run")
    sql_query = """
        SELECT state
        FROM runs
        WHERE
          run_id = %s
        """
    query_result = query_db(db, sql_query, [run_id], one=True)
    state = query_result['state']
    logger.info("Run with run_id={} is in state: {}".format(run_id, state))
    return state
示例#4
0
def update_encoding_metadata_set_encoding_size(db, dp_id, encoding_size):
    sql_query = """
        UPDATE bloomingdata
        SET
          encoding_size = %s
        WHERE
          dp = %s
        """

    logger.info("Updating database with info about encodings")
    with db.cursor() as cur:
        cur.execute(sql_query, [
            encoding_size,
            dp_id,
        ])
示例#5
0
def update_encoding_metadata_set_encoding_size(db, dp_id, encoding_size):
    sql_query = """
        UPDATE uploads
        SET
          encoding_size = %s
        WHERE
          dp = %s
        """

    logger.info(f"Updating uploads table for dp {dp_id} with encoding size ({encoding_size})")
    with db.cursor() as cur:
        cur.execute(sql_query, [
            encoding_size,
            dp_id,
        ])
示例#6
0
def update_encoding_metadata(db, clks_filename, dp_id, state):
    sql_query = """
        UPDATE bloomingdata
        SET
          state = %s,
          file = %s
        WHERE
          dp = %s
        """

    logger.info("Updating database with info about encodings")
    with db.cursor() as cur:
        cur.execute(sql_query, [
            state,
            clks_filename,
            dp_id,
        ])
示例#7
0
def insert_blocking_metadata(db, dp_id, blocks):
    """
    Insert new entries into the blocks table.

    :param blocks: A dict mapping block id to the number of encodings per block.
    """
    logger.info("Adding blocking metadata to database")
    sql_insertion_query = """
        INSERT INTO blocks
        (dp, block_name, count, state)
        VALUES %s
        """

    logger.info("Preparing SQL for bulk insert of blocks")
    values = [(dp_id, block_name, blocks[block_name], 'pending') for block_name in blocks]

    with db.cursor() as cur:
        psycopg2.extras.execute_values(cur, sql_insertion_query, values)
示例#8
0
def get_all_objects_for_project(db, project_id):
    result_type = get_project_column(db, project_id, 'result_type')
    object_store_files = []
    dps = query_db(
        db, """
        SELECT id
        FROM dataproviders
        WHERE project = %s
        """, [project_id])

    for dp in dps:
        clk_file_ref = query_db(db,
                                """
            SELECT file FROM bloomingdata
            WHERE dp = %s
            """, [dp['id']],
                                one=True)

        if clk_file_ref is not None:
            logger.info("blooming data file found: {}".format(clk_file_ref))
            object_store_files.append(clk_file_ref['file'])

    if result_type == "similarity_scores":
        query_response = query_db(
            db, """
            SELECT 
              similarity_scores.file
            FROM 
              similarity_scores, runs
            WHERE 
              runs.run_id = similarity_scores.run AND
              runs.project = %s
            """, [project_id])
        similarity_files = [res['file'] for res in query_response]
        object_store_files.extend(similarity_files)

    return object_store_files