def get_all_objects_for_project(db, project_id): result_type = get_project_column(db, project_id, 'result_type') object_store_files = [] dps = query_db( db, """ SELECT id FROM dataproviders WHERE project = %s """, [project_id]) for dp in dps: clk_file_ref = query_db(db, """ SELECT file FROM uploads WHERE dp = %s """, [dp['id']], one=True) if clk_file_ref is not None and clk_file_ref['file'] is not None: logger.info("upload record found: {}".format(clk_file_ref)) object_store_files.append(clk_file_ref['file']) if result_type == "similarity_scores": similarity_files = get_project_similarity_files(db, project_id) object_store_files.extend(similarity_files) return object_store_files
def insert_encoding_metadata(db, clks_filename, dp_id, receipt_token, encoding_count, block_count): logger.info("Adding metadata on encoded entities to database") sql_insertion_query = """ INSERT INTO uploads (dp, token, file, count, block_count, state) VALUES (%s, %s, %s, %s, %s, %s) """ with db.cursor() as cur: cur.execute(sql_insertion_query, [dp_id, receipt_token, clks_filename, encoding_count, block_count, 'pending'])
def get_run_state(db, run_id): logger.info("Selecting run") sql_query = """ SELECT state FROM runs WHERE run_id = %s """ query_result = query_db(db, sql_query, [run_id], one=True) state = query_result['state'] logger.info("Run with run_id={} is in state: {}".format(run_id, state)) return state
def update_encoding_metadata_set_encoding_size(db, dp_id, encoding_size): sql_query = """ UPDATE bloomingdata SET encoding_size = %s WHERE dp = %s """ logger.info("Updating database with info about encodings") with db.cursor() as cur: cur.execute(sql_query, [ encoding_size, dp_id, ])
def update_encoding_metadata_set_encoding_size(db, dp_id, encoding_size): sql_query = """ UPDATE uploads SET encoding_size = %s WHERE dp = %s """ logger.info(f"Updating uploads table for dp {dp_id} with encoding size ({encoding_size})") with db.cursor() as cur: cur.execute(sql_query, [ encoding_size, dp_id, ])
def update_encoding_metadata(db, clks_filename, dp_id, state): sql_query = """ UPDATE bloomingdata SET state = %s, file = %s WHERE dp = %s """ logger.info("Updating database with info about encodings") with db.cursor() as cur: cur.execute(sql_query, [ state, clks_filename, dp_id, ])
def insert_blocking_metadata(db, dp_id, blocks): """ Insert new entries into the blocks table. :param blocks: A dict mapping block id to the number of encodings per block. """ logger.info("Adding blocking metadata to database") sql_insertion_query = """ INSERT INTO blocks (dp, block_name, count, state) VALUES %s """ logger.info("Preparing SQL for bulk insert of blocks") values = [(dp_id, block_name, blocks[block_name], 'pending') for block_name in blocks] with db.cursor() as cur: psycopg2.extras.execute_values(cur, sql_insertion_query, values)
def get_all_objects_for_project(db, project_id): result_type = get_project_column(db, project_id, 'result_type') object_store_files = [] dps = query_db( db, """ SELECT id FROM dataproviders WHERE project = %s """, [project_id]) for dp in dps: clk_file_ref = query_db(db, """ SELECT file FROM bloomingdata WHERE dp = %s """, [dp['id']], one=True) if clk_file_ref is not None: logger.info("blooming data file found: {}".format(clk_file_ref)) object_store_files.append(clk_file_ref['file']) if result_type == "similarity_scores": query_response = query_db( db, """ SELECT similarity_scores.file FROM similarity_scores, runs WHERE runs.run_id = similarity_scores.run AND runs.project = %s """, [project_id]) similarity_files = [res['file'] for res in query_response] object_store_files.extend(similarity_files) return object_store_files