def process_image_entries(image_entries): db_connector = dbutils.connect_to_main_database() # Go through all entries. bar = progressbar.ProgressBar(max_value=len(image_entries)) for index, (artifact_id, path) in enumerate(image_entries): bar.update(index) # Check if there is already an entry. select_sql_statement = "" select_sql_statement += "SELECT COUNT(*) FROM artifact_quality" select_sql_statement += " WHERE artifact_id='{}'".format( artifact_id) select_sql_statement += " AND type='{}'".format(db_type) select_sql_statement += " AND key='{}'".format(db_key) results = db_connector.execute(select_sql_statement, fetch_one=True)[0] # There is an entry. Skip if results != 0: continue bluriness = get_blur_variance(path) # Create an SQL statement for insertion. sql_statement = "" sql_statement += "INSERT INTO artifact_quality (type, key, value, artifact_id, misc)" sql_statement += " VALUES(\'{}\', \'{}\', \'{}\', \'{}\', \'{}\');".format( db_type, db_key, bluriness, artifact_id, "") # Call database. result = db_connector.execute(sql_statement) bar.finish()
def get_timestamps_from_pcd(qr_code): connector2 = dbutils.connect_to_main_database() sql_statement = "SELECT path FROM artifact " sql_statement += " WHERE qr_code = '{}'".format(qr_code) sql_statement += " AND type = 'pcd'" path = connector2.execute(sql_statement, fetch_all=True) timestamps = np.array([]) #iterate over all paths pointing to pcds for p in path: try: stamp = get_timestamp_from_pcd(p) timestamps = np.append(timestamps, stamp) except IndexError: error = np.array([]) logging.error("Error with timestamp in pcd") return [error, path] if (len(timestamps) == 0): error = np.array([]) return [error, path] return timestamps, path
def __init__(self, db_connector): """ Args: db_connector (json_file): json file to connect to the database. """ self.db_connector = db_connector self.ml_connector = dbutils.connect_to_main_database(self.db_connector)
def main(): # Getting the models. db_connector = dbutils.connect_to_main_database() # get the number of rgb artifacts select_sql_statement = "SELECT path FROM artifact WHERE type='pcrgb';" pcd_paths = db_connector.execute(select_sql_statement, fetch_all=True) #[0][0] print ('Available fused data: ' + str(len(pcd_paths))) # # todo: remove the (1) or (2) backup ? # unique_qr_codes = [x[0] for x in unique_qr_codes] # initialze log file logging.basicConfig(filename='/tmp/command_update_depth.log',level=logging.DEBUG, format='%(asctime)s %(message)s') # Run this in multiprocess mode. utils.multiprocess(pcd_paths, process_method = update_depth, process_individial_entries = False, number_of_workers = 8, pass_process_index = True, progressbar = False, disable_gpu = True) print("*** Done ***.")
def update_artifactsquality_with_pointcloud_data(): # Get all pointclouds. sql_script = "SELECT id, path FROM artifact WHERE type='pcd'" db_connector = dbutils.connect_to_main_database() pointcloud_entries = db_connector.execute(sql_script, fetch_all=True) print("Found {} pointclouds.".format(len(pointcloud_entries))) db_type = "pcd" def process_pointcloud_entries(pointcloud_entries): db_connector = dbutils.connect_to_main_database() # Go through all entries. bar = progressbar.ProgressBar(max_value=len(pointcloud_entries)) for index, (artifact_id, path) in enumerate(pointcloud_entries): bar.update(index) pointcloud_values = get_pointcloud_values(path) for db_key, db_value in pointcloud_values.items(): # Check if there is already an entry. select_sql_statement = "" select_sql_statement += "SELECT COUNT(*) FROM artifact_quality" select_sql_statement += " WHERE artifact_id='{}'".format( artifact_id) select_sql_statement += " AND type='{}'".format(db_type) select_sql_statement += " AND key='{}'".format(db_key) results = db_connector.execute(select_sql_statement, fetch_one=True)[0] # There is an entry. Skip if results != 0: continue # Create an SQL statement for insertion. sql_statement = "" sql_statement += "INSERT INTO artifact_quality (type, key, value, artifact_id, misc)" sql_statement += " VALUES(\'{}\', \'{}\', \'{}\', \'{}\', \'{}\');".format( db_type, db_key, db_value, artifact_id, "") # Call database. try: result = db_connector.execute(sql_statement) except: print(sql_statement, pointcloud_values) exit(0) bar.finish() # Run this in multiprocess mode. utils.multiprocess(pointcloud_entries, process_method=process_pointcloud_entries, process_individial_entries=False, progressbar=False) print("Done.")
def process_artifact_paths(artifact_paths): main_connector = dbutils.connect_to_main_database() table = "artifact" batch_size = 100 insert_count = 0 no_measurements_count = 0 skip_count = 0 bar = progressbar.ProgressBar(max_value=len(artifact_paths)) sql_statement = "" last_index = len(artifact_paths) - 1 for index, artifact_path in enumerate(artifact_paths): bar.update(index) # Check if there is already an entry. basename = os.path.basename(artifact_path) sql_statement_select = dbutils.create_select_statement( "artifact", ["id"], [basename]) # TODO is this the proper id? results = main_connector.execute(sql_statement_select, fetch_all=True) # No results found. Insert. if len(results) == 0: insert_data = {} insert_data["id"] = basename # TODO proper? # Process the artifact. default_values = get_default_values(artifact_path, table, main_connector) if default_values != None: insert_data.update(default_values) sql_statement += dbutils.create_insert_statement( table, insert_data.keys(), insert_data.values()) insert_count += 1 else: no_measurements_count += 1 # Found a result. Update. elif len(results) != 0: skip_count += 1 # Update database. if index != 0 and ( (index % batch_size) == 0) or index == last_index: if sql_statement != "": result = main_connector.execute(sql_statement) sql_statement = "" bar.finish() print("Inserted {} new entries.".format(insert_count)) print("No measurements for {} entries.".format(no_measurements_count)) print("Skipped {} entries.".format(skip_count))
def update_artifactsquality_with_posenet(): # Get all images. sql_script = "SELECT id, path FROM artifact WHERE type='jpg'" db_connector = dbutils.connect_to_main_database() image_entries = db_connector.execute(sql_script, fetch_all=True) print("Found {} images.".format(len(image_entries))) db_type = "rgb" db_key = "No of People" def process_image_entries(image_entries): db_connector = dbutils.connect_to_main_database() # Go through all entries. bar = progressbar.ProgressBar(max_value=len(image_entries)) for index, (artifact_id, path) in enumerate(image_entries): bar.update(index) # Check if there is already an entry. select_sql_statement = "" select_sql_statement += "SELECT COUNT(*) FROM artifact_quality" select_sql_statement += " WHERE artifact_id='{}'".format( artifact_id) select_sql_statement += " AND type='{}'".format(db_type) select_sql_statement += " AND key='{}'".format(db_key) results = db_connector.execute(select_sql_statement, fetch_one=True)[0] # There is an entry. Skip if results != 0: continue Pose = get_pose(path) # Create an SQL statement for insertion. sql_statement = "" sql_statement += "INSERT INTO artifact_quality (type, key, value, artifact_id, misc)" sql_statement += " VALUES(\'{}\', \'{}\', \'{}\', \'{}\', \'{}\');".format( db_type, db_key, Pose, artifact_id, "") # Call database. result = db_connector.execute(sql_statement) bar.finish() # Run this in multiprocess mode. utils.multiprocess(image_entries, process_method=process_image_entries, process_individial_entries=False, progressbar=False, number_of_workers=1) print("Done.")
def main(): models_file = str(sys.argv[1]) db_connection_file = str(sys.argv[2]) table_name = str(sys.argv[3]) if len(sys.argv) != 4: print( "usage: command_update_models.py models_file db_connection_file table_name" ) main_connector = dbutils.connect_to_main_database(db_connection_file) with open(models_file) as json_file: json_data = json.load(json_file) for data in json_data["models"]: check_existing_models = "SELECT id from {};".format(table_name) results = main_connector.execute(check_existing_models, fetch_all=True) if data["name"] in str(results): print("{} already exists in model table".format(data["name"])) continue value_mapping = {} value_mapping["id"] = data["name"] value_mapping["name"] = data["name"] version = data["name"].split('_')[0] value_mapping["version"] = version del data["name"] value_mapping["json_metadata"] = json.dumps(data) keys = [] values = [] for key in value_mapping.keys(): keys.append(key) values.append(value_mapping[key]) sql_statement = dbutils.create_insert_statement( table_name, keys, values, False, True) try: results = main_connector.execute(sql_statement) print("{} successfully added to model table".format( value_mapping["name"])) except Exception as error: print(error) main_connector.cursor.close() main_connector.connection.close()
def get_timestamps_from_rgb(qr_code): connector1 = dbutils.connect_to_main_database() # get all artifacts of a certain unique qr code sql_statement = "SELECT path, type, tango_timestamp FROM artifact " sql_statement += " WHERE qr_code = '{}'".format(qr_code) sql_statement += " AND type = 'rgb'" all_rgb = connector1.execute(sql_statement, fetch_all=True) timestamps = [x[2] for x in all_rgb] path = [x[0] for x in all_rgb] if (len(timestamps) == 0): error = np.array([]) return [error, path] timestamps = np.asarray(timestamps) return [timestamps, path]
def process_artifacts(artifacts): # Create database connection. db_connector = dbutils.connect_to_main_database() # Load the model first. model = load_model(model_path) model_name = model_path.split("/")[-2] # Evaluate and create SQL-statements. bar = progressbar.ProgressBar(max_value=len(artifacts)) for artifact_index, artifact in enumerate(artifacts): bar.update(artifact_index) # Execute SQL statement. try: # Load the artifact and evaluate. artifact_id, pcd_path, target_height, qrcode = artifact pcd_array = utils.load_pcd_as_ndarray(pcd_path) pcd_array = utils.subsample_pointcloud(pcd_array, 10000) mse, mae = model.evaluate(np.expand_dims(pcd_array, axis=0), np.array([target_height]), verbose=0) if qrcode in qrcodes_train: misc = "training" else: misc = "nottraining" # Create an SQL statement. sql_statement = "" sql_statement += "INSERT INTO artifact_quality (type, key, value, artifact_id, misc)" sql_statement += " VALUES(\'{}\', \'{}\', \'{}\', \'{}\', \'{}\');".format( model_name, "mae", mae, artifact_id, misc) # Call database. result = db_connector.execute(sql_statement) except psycopg2.IntegrityError: print("Already in DB. Skipped.", pcd_path) except ValueError: print("Skipped.", pcd_path) bar.finish()
# import core packages from cgm from cgmcore.utils import load_pcd_as_ndarray from cgmcore import utils # import packages for visualizationi from pyntcloud import PyntCloud from timeit import default_timer as timer import tensorflow as tf from PIL import Image from io import BytesIO import datetime # Connect to database. main_connector = dbutils.connect_to_main_database() class DeepLabModel(object): """Class to load deeplab model and run inference.""" INPUT_TENSOR_NAME = 'ImageTensor:0' OUTPUT_TENSOR_NAME = 'SemanticPredictions:0' INPUT_SIZE = 513 FROZEN_GRAPH_NAME = 'frozen_inference_graph' def __init__(self, tarball_path): """Creates and loads pretrained deeplab model.""" self.graph = tf.Graph() graph_def = None
split_qrc = qrc.split('/')[1] return split_qrc # Parse all the configuration variables db_file = cfg["database"]['db_connection_file'] training_file = cfg['csv_paths']['training_paths'] testing_file = cfg['csv_paths']['testing_paths'] number_of_scans = cfg['scans']['scan_amount'] calibration_file = cfg['calibration']['calibration_file'] dataset_name = cfg['data']['dataset'] target_folder = cfg['paths']['target_path'] ##connect to the databse ml_connector = dbutils.connect_to_main_database(db_file) columns = ml_connector.get_columns('artifacts_with_target') #query to select the data from the database. NOTE: storing all the data in dataframe and then filtering is much faster select_artifacts_with_target = "select * from artifacts_with_target;" database = ml_connector.execute(select_artifacts_with_target, fetch_all=True) complete_data = database[database['tag'] == 'good'] complete_data['qrcode'] = complete_data.apply(qrcode, axis=1) unique_qrcodes = pd.DataFrame(list(set(complete_data['qrcode'].tolist())), columns=['qrcode']) usable_qrcodes = unique_qrc[unique_qrc['qrcode'].str.match( '15' )] ## Select only the relevant qrcodes from the whole database, which is starting with '15' in this case #Read the already training and testing qrcodes present in the previous dataset training_qrcodes = pd.read_csv(training_file)
def execute_command_preprocess(preprocess_pcds=True, preprocess_ply=False, preprocess_jpgs=False, preprocess_npy=False, preprocess_png=False, path_suffix=""): print("Preprocessing data-set...") print("Using '{}'".format(config.preprocessed_root_path)) if os.path.exists(config.preprocessed_root_path) == False: print("Folder does not exists. Creating...") os.mkdir(config.preprocessed_root_path) # Create the base-folder. if path_suffix != "": path_suffix = "-" + path_suffix datetime_path = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") base_path = os.path.join(config.preprocessed_root_path, datetime_path + path_suffix) os.mkdir(base_path) if preprocess_pcds == True: os.mkdir(os.path.join(base_path, "pcd")) if preprocess_ply == True: os.mkdir(os.path.join(base_path, "ply")) if preprocess_jpgs == True: os.mkdir(os.path.join(base_path, "jpg")) if preprocess_npy == True: os.mkdir(os.path.join(base_path, "npy")) print("Writing preprocessed data to {}...".format(base_path)) # Process the filtered PCDs. if preprocess_pcds == True: # Get entries. sql_statement = """ SELECT artifact_path, qr_code, height, weight FROM artifacts_with_targets WHERE type='pcd' AND status='standing' ; """ main_connector = dbutils.connect_to_main_database() entries = main_connector.execute(sql_statement, fetch_all=True) print("Found {} PCDs. Processing...".format(len(entries))) # Method for processing a single entry. def process_pcd_entry(entry): path, qr_code, height, weight = entry if os.path.exists(path) == False: print("\n", "File {} does not exist!".format(path), "\n") return try: pointcloud = utils.load_pcd_as_ndarray(path) targets = np.array([height, weight]) pickle_filename = os.path.basename(path).replace(".pcd", ".p") qrcode_path = os.path.join(base_path, "pcd", qr_code) if os.path.exists(qrcode_path) == False: os.mkdir(qrcode_path) pickle_output_path = os.path.join(qrcode_path, pickle_filename) pickle.dump((pointcloud, targets), open(pickle_output_path, "wb")) except: pass # Start multiprocessing. utils.multiprocess(entries, process_pcd_entry) if preprocess_ply == True: # Get entries. sql_statement = """ SELECT artifact_path, qr_code, height, weight FROM artifacts_with_targets WHERE type='pcrgb' AND status='standing' ; """ main_connector = dbutils.connect_to_main_database() entries = main_connector.execute(sql_statement, fetch_all=True) print("Found {} Plys. Processing...".format(len(entries))) # Method for processing a single entry. def process_ply_entry(entry): path, qr_code, height, weight = entry print(path) if os.path.exists(path) == False: print("\n", "File {} does not exist!".format(path), "\n") return try: pointcloud = utils.load_pcd_as_ndarray(path) targets = np.array([height, weight]) pickle_filename = os.path.basename(path).replace(".ply", ".p") qrcode_path = os.path.join(base_path, "ply", qr_code) # print('qr code path: ' + qrcode_path) # if not os.path.exists(qrcode_path): os.mkdir(qrcode_path) print('creating path') pickle_output_path = os.path.join(qrcode_path, pickle_filename) pickle.dump((pointcloud, targets), open(pickle_output_path, "wb")) except BaseException as e: print(e) pass # Start multiprocessing. utils.multiprocess(entries, process_ply_entry) # Process the filtered JPGs. if preprocess_jpgs == True: assert False entries = filterjpgs()["results"] print("Found {} JPGs. Processing...".format(len(entries))) bar = progressbar.ProgressBar(max_value=len(entries)) # Method for processing a single entry. def process_jpg_entry(entry): path = entry["path"] if os.path.exists(path) == False: print("\n", "File {} does not exist!".format(path), "\n") return image = cv2.imread(path) targets = np.array([entry["height_cms"], entry["weight_kgs"]]) qrcode = entry["qrcode"] pickle_filename = os.path.basename(entry["path"]).replace( ".jpg", ".p") qrcode_path = os.path.join(base_path, "jpg", qrcode) if os.path.exists(qrcode_path) == False: os.mkdir(qrcode_path) pickle_output_path = os.path.join(qrcode_path, pickle_filename) pickle.dump((image, targets), open(pickle_output_path, "wb")) # Start multiprocessing. utils.multiprocess(entries, process_pcd_entry) # Process depth images numpy array if preprocess_npy == True: # Get entries. sql_statement = """ SELECT artifact_path, qr_code, height, weight FROM artifacts_with_targets WHERE type='depth_npy' AND status='standing' ; """ main_connector = dbutils.connect_to_main_database() entries = main_connector.execute(sql_statement, fetch_all=True) print("Found {} depth_npy. Processing...".format(len(entries))) # Method for processing a single entry. def process_pcd_entry(entry): path, qr_code, height, weight = entry if os.path.exists(path) == False: print("\n", "File {} does not exist!".format(path), "\n") return try: print(path) np_array = np.load(path) print("opened file") targets = np.array([height, weight]) pickle_filename = os.path.basename(path).replace(".npy", ".p") qrcode_path = os.path.join(base_path, "npy", qr_code) if os.path.exists(qrcode_path) == False: os.mkdir(qrcode_path) pickle_output_path = os.path.join(qrcode_path, pickle_filename) pickle.dump((np_array, targets), open(pickle_output_path, "wb")) except: print("exception fails") pass # Start multiprocessing. utils.multiprocess(entries, process_pcd_entry) # print(preprocess_png) # preprocess_png = True # Process the filtered depth pngs. if preprocess_png == True: # Get entries. sql_statement = """ SELECT artifact_path, qr_code, height, weight FROM artifacts_with_targets WHERE type='depth_png' AND status='standing' ; """ main_connector = dbutils.connect_to_main_database() entries = main_connector.execute(sql_statement, fetch_all=True) print("Found {} depth_npy. Processing...".format(len(entries))) # Method for processing a single entry. def process_pcd_entry(entry): path, qr_code, height, weight = entry if os.path.exists(path) == False: print("\n", "File {} does not exist!".format(path), "\n") return try: print(path) np_array = cv2.imread(path, cv2.CV_8U) print(type(np_array)) print("opened file") print(np_array.shape) targets = np.array([height, weight]) pickle_filename = os.path.basename(path).replace(".png", ".p") qrcode_path = os.path.join(base_path, "png", qr_code) print(qrcode_path) # if os.path.exists(qrcode_path) == False: try: os.makedirs(qrcode_path, exist_ok=True) except: print("can not create folder") pass # print("path created") # os.mkdir(qrcode_path) print("after path created") print("in advance to pickle") pickle_output_path = os.path.join(qrcode_path, pickle_filename) print("created pickle") pickle.dump((np_array, targets), open(pickle_output_path, "wb")) except: print("Failed to import image") pass # Start multiprocessing. utils.multiprocess(entries, process_pcd_entry)
def main(): # Check the arguments. if len(sys.argv) != 2: raise Exception("ERROR! Must provide model filename.") model_path = sys.argv[1] if not os.path.exists(model_path): raise Exception("ERROR! \"{}\" does not exist.".format(model_path)) if not os.path.isfile(model_path): raise Exception("ERROR! \"{}\" is not a file.".format(model_path)) # Get the training QR-codes. search_path = os.path.join(os.path.dirname(model_path), "*.p") paths = glob.glob(search_path) details_path = [path for path in paths if "details" in path][0] details = pickle.load(open(details_path, "rb")) qrcodes_train = details["qrcodes_train"] # Create database connection. db_connector = dbutils.connect_to_main_database() # Query the database for artifacts. print("Getting all artifacts...") sql_statement = "" # Select all artifacts. sql_statement += "SELECT pointcloud_data.id, pointcloud_data.path, measurements.height_cms, pointcloud_data.qrcode FROM pointcloud_data" # Join them with measurements. sql_statement += " INNER JOIN measurements ON pointcloud_data.measurement_id=measurements.id" # Only take into account manual measurements. sql_statement += " WHERE measurements.type=\'manual\'" artifacts = db_connector.execute(sql_statement, fetch_all=True) print("Found {} artifacts.".format(len(artifacts))) # Method for processing a set of artifacts. # Note: This method will run in its own process. def process_artifacts(artifacts): # Create database connection. db_connector = dbutils.connect_to_main_database() # Load the model first. model = load_model(model_path) model_name = model_path.split("/")[-2] # Evaluate and create SQL-statements. bar = progressbar.ProgressBar(max_value=len(artifacts)) for artifact_index, artifact in enumerate(artifacts): bar.update(artifact_index) # Execute SQL statement. try: # Load the artifact and evaluate. artifact_id, pcd_path, target_height, qrcode = artifact pcd_array = utils.load_pcd_as_ndarray(pcd_path) pcd_array = utils.subsample_pointcloud(pcd_array, 10000) mse, mae = model.evaluate(np.expand_dims(pcd_array, axis=0), np.array([target_height]), verbose=0) if qrcode in qrcodes_train: misc = "training" else: misc = "nottraining" # Create an SQL statement. sql_statement = "" sql_statement += "INSERT INTO artifact_quality (type, key, value, artifact_id, misc)" sql_statement += " VALUES(\'{}\', \'{}\', \'{}\', \'{}\', \'{}\');".format(model_name, "mae", mae, artifact_id, misc) # Call database. result = db_connector.execute(sql_statement) except psycopg2.IntegrityError: print("Already in DB. Skipped.", pcd_path) except ValueError: print("Skipped.", pcd_path) bar.finish() # Run this in multiprocess mode. utils.multiprocess(artifacts, process_method=process_artifacts, process_individial_entries=False, progressbar=False) print("Done.")
def update_qrs(unique_qr_codes, process_index): #def update_qrs(unique_qr_codes): # initialize the rrogress bar with the maxium number of unique qr codes #bar = progressbar.ProgressBar(max_value=len(unique_qr_codes)) qr_counter = 0 # for qr in tqdm(unique_qr_codes,position=process_index): for qr in unique_qr_codes: qr_counter = qr_counter + 1 logging.error(qr_counter) logging.error(qr) # exclude the folling qrcodes if qr == "{qrcode}": continue if qr == "data": continue # get all images from a unique qr sql_statement = "SELECT path, type, tango_timestamp FROM artifact " sql_statement += " WHERE qr_code = '{}'".format(qr) sql_statement += " AND type = 'rgb'" connector1 = dbutils.connect_to_main_database() all_rgb = connector1.execute(sql_statement, fetch_all=True) for rgb in all_rgb: # get path and generate output path from it img_path = rgb[0] img_path = img_path.replace("whhdata", "localssd") seg_path = img_path.replace(".jpg", "_SEG.png") # load image from path try: logging.info("Trying to open : " + img_path) jpeg_str = open(img_path, "rb").read() orignal_im = Image.open(BytesIO(jpeg_str)) except IOError: print('Cannot retrieve image. Please check file: ' + img_path) continue # apply segmentation via pre trained model logging.info('running deeplab on image %s...' % img_path) resized_im, seg_map = MODEL.run(orignal_im) # convert the image into a binary mask width, height = resized_im.size dummyImg = np.zeros([height, width, 4], dtype=np.uint8) for x in range(width): for y in range(height): color = seg_map[y, x] if color == 0: dummyImg[y, x] = [0, 0, 0, 255] else: dummyImg[y, x] = [255, 255, 255, 255] img = Image.fromarray(dummyImg) # img = img.convert("RGB") img = img.convert('RGB').resize(orignal_im.size, Image.ANTIALIAS) img.save('/tmp/output.png') print(seg_path) print(img_path) logging.info("saved file to" + seg_path) img.save(seg_path)
def execute_command_updatemeasurements(): print("Updating measurements...") main_connector = dbutils.connect_to_main_database() # TODO import persons # Where to get the data. glob_search_path = os.path.join(whhdata_path, "*.csv") csv_paths = sorted(glob.glob(glob_search_path)) csv_paths.sort(key=os.path.getmtime) csv_path = csv_paths[-1] print("Using {}".format(csv_path)) # Load the data-frame. df = pd.read_csv(csv_path) # List all columns. columns = list(df) print(columns) ignored_columns = ["sex", "address", "qrcode", "latitude", "longitude", "personId"] columns_mapping = { column: column for column in columns if column not in ignored_columns} columns_mapping["personId"] = "person_id" columns_mapping["headCircumference"] = "head_circumference" columns_mapping["deletedBy"] = "deleted_by" columns_mapping["createdBy"] = "created_by" table = "measure" # Number of rows before. rows_number = main_connector.get_number_of_rows(table) print("Number of rows before: {}".format(rows_number)) # Insert data in batches. batch_size = 1000 sql_statement = "" rows_number_df = len(df.index) bar = progressbar.ProgressBar(max_value=rows_number_df) for index, row in df.iterrows(): bar.update(index) keys = [] values = [] for df_key, db_key in columns_mapping.items(): keys.append(str(db_key)) values.append(str(row[df_key])) # TODO what is this? keys.append("date") values.append(int(time.time())) # TODO what is this? keys.append("artifact") values.append("UNKNOWN") sql_statement += dbutils.create_insert_statement(table, keys, values) if index != 0 and ((index % batch_size) == 0 or index == rows_number_df - 1): main_connector.execute(sql_statement) sql_statement = "" bar.finish() # Number of rows after sync. rows_number = main_connector.get_number_of_rows(table) print("Number of rows after: {}".format(rows_number))
def execute_command_persons(): print("Updating persons...") main_connector = dbutils.connect_to_main_database() # Where to get the data. csv_path = config.measure_csv_path print("Using {}".format(csv_path)) # Load the data-frame. df = pd.read_csv(csv_path) # List all columns. columns = list(df) print(columns) table = "person" # Number of rows before. rows_number = main_connector.get_number_of_rows(table) print("Number of rows before: {}".format(rows_number)) # Insert data in batches. batch_size = 1000 sql_statement = "" rows_number_df = len(df.index) bar = progressbar.ProgressBar(max_value=rows_number_df) used_ids = [] for index, row in df.iterrows(): bar.update(index) # Make sure there are no duplicates. Local. select_sql_statement = "SELECT COUNT(*) FROM person WHERE id='{}'".format(row["personId"]) result = main_connector.execute(select_sql_statement, fetch_one=True)[0] if row["personId"] in used_ids or result != 0: #print(row["personId"], "already in DB") pass else: # TODO check all of these. insert_data = {} insert_data["id"] = row["personId"] insert_data["name"] = "UNKNOWN" insert_data["surname"] = "UNKNOWN" insert_data["birthday"] = 0 insert_data["sex"] = "UNKNOWN" insert_data["guardian"] = "UNKNOWN" insert_data["is_age_estimated"] = False insert_data["qr_code"] = row["qrcode"] insert_data["created"] = 0 insert_data["timestamp"] = row["timestamp"] insert_data["created_by"] = row["createdBy"] insert_data["deleted"] = row["deleted"] insert_data["deleted_by"] = row["deletedBy"] sql_statement += dbutils.create_insert_statement(table, insert_data.keys(), insert_data.values()) #print(sql_statement) used_ids.append(row["personId"]) if index != 0 and ((index % batch_size) == 0 or index == rows_number_df - 1) and sql_statement != "": main_connector.execute(sql_statement) sql_statement = "" bar.finish() # Number of rows after sync. rows_number = main_connector.get_number_of_rows(table) print("Number of rows after: {}".format(rows_number))
def process_artifacts(artifacts, process_index): # Create database connection. db_connector = dbutils.connect_to_main_database() # Load the model first. model_weights_path = [ x for x in glob.glob((os.path.join(model_path, "*"))) if x.endswith("-model-weights.h5") ][0] model_details_path = [ x for x in glob.glob((os.path.join(model_path, "*"))) if x.endswith("-details.p") ][0] model_name = model_path.split("/")[-1] model_details = pickle.load(open(model_details_path, "rb")) pointcloud_target_size = model_details["dataset_parameters"][ "pointcloud_target_size"] pointcloud_subsampling_method = model_details["dataset_parameters"][ "pointcloud_subsampling_method"] target_key = model_details["dataset_parameters"]["output_targets"][0] model = load_model(model_weights_path, pointcloud_target_size) # Evaluate and create SQL-statements. for artifact_index, artifact in enumerate( tqdm(artifacts, position=process_index)): # Unpack fields. artifact_id, pcd_path = artifact # Check if there is already an entry. select_sql_statement = "" select_sql_statement += "SELECT COUNT(*) FROM artifact_result" select_sql_statement += " WHERE artifact_id='{}'".format( artifact_id) select_sql_statement += " AND model_name='{}'".format(model_name) select_sql_statement += " AND target_key='{}'".format(target_key) results = db_connector.execute(select_sql_statement, fetch_one=True)[0] # There is an entry. Skip if results != 0: continue # Execute SQL statement. try: # Load the artifact and evaluate. pcd_path = pcd_path.replace("/whhdata/qrcode", "/localssd/qrcode") pcd_array = utils.load_pcd_as_ndarray(pcd_path) pcd_array = utils.subsample_pointcloud( pcd_array, pointcloud_target_size, subsampling_method=pointcloud_subsampling_method) value = model.predict(np.expand_dims(pcd_array, axis=0), verbose=0)[0][0] # Create an SQL statement. sql_statement = "" sql_statement += "INSERT INTO artifact_result (model_name, target_key, value, artifact_id)" sql_statement += " VALUES(\'{}\', \'{}\', \'{}\', \'{}\');".format( model_name, target_key, value, artifact_id) # Call database. result = db_connector.execute(sql_statement) except psycopg2.IntegrityError as e: #print("Already in DB. Skipped.", pcd_path) pass except ValueError as e: #print("Skipped.", pcd_path) pass
def process_person_paths(person_paths, process_index): #person_paths = person_paths[0:4] # TODO remove this! # Go through each person (qr-code). for person_path in tqdm(person_paths, position=process_index): person_path = person_path.replace('localssd/', 'localssd2/') print(person_path) # Find all artifacts for that person. artifact_paths = [] for file_extension in file_extensions: print(file_extension) glob_search_path = os.path.join( person_path, "**/*.{}".format(file_extension)) #print (glob_search_path) artifact_paths.extend(glob.glob(glob_search_path)) # print(artifact_paths) print("Found {} artifacts in {}".format(len(artifact_paths), person_path)) # Process those artifacts. main_connector = dbutils.connect_to_main_database() table = "artifact" batch_size = 100 insert_count = 0 no_measurements_count = 0 skip_count = 0 sql_statement = "" last_index = len(artifact_paths) - 1 for artifact_index, artifact_path in enumerate(artifact_paths): # Check if there is already an entry in the database. basename = os.path.basename(artifact_path) sql_statement_select = dbutils.create_select_statement( "artifact", ["id"], [basename]) results = main_connector.execute(sql_statement_select, fetch_all=True) # No results found. Insert. if len(results) == 0: insert_data = {} insert_data["id"] = basename # TODO proper? # Get the default values for the artifact. default_values = get_default_values( artifact_path, table, main_connector) # Check if there is a measure_id. if "measure_id" in default_values.keys(): insert_count += 1 else: no_measurements_count += 1 # Create SQL statement. insert_data.update(default_values) sql_statement_for_artifact = dbutils.create_insert_statement( table, insert_data.keys(), insert_data.values()) sql_statement += sql_statement_for_artifact # Found a result. Update. elif len(results) != 0: skip_count += 1 # Update database. if artifact_index != 0 and ( (artifact_index % batch_size) == 0) or artifact_index == last_index: if sql_statement != "": result = main_connector.execute(sql_statement) sql_statement = "" # Return statistics. return (insert_count, no_measurements_count, skip_count)
def main(): # Check the arguments. if len(sys.argv) != 2: raise Exception("ERROR! Must provide model filename.") model_path = sys.argv[1] if not os.path.exists(model_path): raise Exception("ERROR! \"{}\" does not exist.".format(model_path)) # Get the training QR-codes. #search_path = os.path.join(os.path.dirname(model_path), "*.p") #paths = glob.glob(search_path) #details_path = [path for path in paths if "details" in path][0] #details = pickle.load(open(details_path, "rb")) #qrcodes_train = details["qrcodes_train"] #qrcodes_validate = details["qrcodes_validate"] #print("QR codes train:", len(qrcodes_train), "QR codes validate:", len(qrcodes_validate)) # Query the database for artifacts. print("Getting all artifacts...") db_connector = dbutils.connect_to_main_database() sql_statement = "" sql_statement += "SELECT id, path FROM artifact" sql_statement += " WHERE type='pcd'" sql_statement += ";" artifacts = db_connector.execute(sql_statement, fetch_all=True) print("Found {} artifacts.".format(len(artifacts))) # Method for processing a set of artifacts. # Note: This method will run in its own process. def process_artifacts(artifacts, process_index): # Create database connection. db_connector = dbutils.connect_to_main_database() # Load the model first. model_weights_path = [ x for x in glob.glob((os.path.join(model_path, "*"))) if x.endswith("-model-weights.h5") ][0] model_details_path = [ x for x in glob.glob((os.path.join(model_path, "*"))) if x.endswith("-details.p") ][0] model_name = model_path.split("/")[-1] model_details = pickle.load(open(model_details_path, "rb")) pointcloud_target_size = model_details["dataset_parameters"][ "pointcloud_target_size"] pointcloud_subsampling_method = model_details["dataset_parameters"][ "pointcloud_subsampling_method"] target_key = model_details["dataset_parameters"]["output_targets"][0] model = load_model(model_weights_path, pointcloud_target_size) # Evaluate and create SQL-statements. for artifact_index, artifact in enumerate( tqdm(artifacts, position=process_index)): # Unpack fields. artifact_id, pcd_path = artifact # Check if there is already an entry. select_sql_statement = "" select_sql_statement += "SELECT COUNT(*) FROM artifact_result" select_sql_statement += " WHERE artifact_id='{}'".format( artifact_id) select_sql_statement += " AND model_name='{}'".format(model_name) select_sql_statement += " AND target_key='{}'".format(target_key) results = db_connector.execute(select_sql_statement, fetch_one=True)[0] # There is an entry. Skip if results != 0: continue # Execute SQL statement. try: # Load the artifact and evaluate. pcd_path = pcd_path.replace("/whhdata/qrcode", "/localssd/qrcode") pcd_array = utils.load_pcd_as_ndarray(pcd_path) pcd_array = utils.subsample_pointcloud( pcd_array, pointcloud_target_size, subsampling_method=pointcloud_subsampling_method) value = model.predict(np.expand_dims(pcd_array, axis=0), verbose=0)[0][0] # Create an SQL statement. sql_statement = "" sql_statement += "INSERT INTO artifact_result (model_name, target_key, value, artifact_id)" sql_statement += " VALUES(\'{}\', \'{}\', \'{}\', \'{}\');".format( model_name, target_key, value, artifact_id) # Call database. result = db_connector.execute(sql_statement) except psycopg2.IntegrityError as e: #print("Already in DB. Skipped.", pcd_path) pass except ValueError as e: #print("Skipped.", pcd_path) pass # Run this in multiprocess mode. utils.multiprocess(artifacts, process_method=process_artifacts, process_individial_entries=False, progressbar=False, pass_process_index=True, disable_gpu=True) print("Done.")
def execute_command_persons(): print("Updating persons...") main_connector = dbutils.connect_to_main_database() # TODO import persons # Where to get the data. glob_search_path = os.path.join(whhdata_path, "*.csv") csv_paths = sorted(glob.glob(glob_search_path)) csv_paths.sort(key=os.path.getmtime) csv_path = csv_paths[-1] print("Using {}".format(csv_path)) # Load the data-frame. df = pd.read_csv(csv_path) # List all columns. columns = list(df) print(columns) # ['personId', 'qrcode', 'sex', 'type', 'age', 'height', 'weight', 'muac', 'headCircumference', 'oedema', 'latitude', 'longitude', 'address', 'timestamp', 'deleted', 'deletedBy', 'visible', 'createdBy'] """ id VARCHAR(255) PRIMARY KEY, name TEXT NOT NULL, surname TEXT NOT NULL, birthday BIGINT NOT NULL, sex TEXT NOT NULL, guardian TEXT NOT NULL, is_age_estimated BOOLEAN NOT NULL, qr_code TEXT NOT NULL, created BIGINT NOT NULL, timestamp BIGINT NOT NULL, created_by TEXT NOT NULL, deleted BOOLEAN NOT NULL, deleted_by TEXT NOT NULL """ table = "person" # Number of rows before. rows_number = main_connector.get_number_of_rows(table) print("Number of rows before: {}".format(rows_number)) # Insert data in batches. batch_size = 1000 sql_statement = "" rows_number_df = len(df.index) bar = progressbar.ProgressBar(max_value=rows_number_df) used_ids = [] for index, row in df.iterrows(): bar.update(index) # Make sure there are no duplicates. Local. select_sql_statement = "SELECT COUNT(*) FROM person WHERE id='{}'".format( row["personId"]) result = main_connector.execute(select_sql_statement, fetch_one=True)[0] if row["personId"] in used_ids or result != 0: #print(row["personId"], "already in DB") pass else: # TODO check all of these. insert_data = {} insert_data["id"] = row["personId"] insert_data["name"] = "UNKNOWN" insert_data["surname"] = "UNKNOWN" insert_data["birthday"] = 0 insert_data["sex"] = "UNKNOWN" insert_data["guardian"] = "UNKNOWN" insert_data["is_age_estimated"] = False insert_data["qr_code"] = row["qrcode"] insert_data["created"] = 0 insert_data["timestamp"] = row["timestamp"] insert_data["created_by"] = row["createdBy"] insert_data["deleted"] = row["deleted"] insert_data["deleted_by"] = row["deletedBy"] sql_statement += dbutils.create_insert_statement( table, insert_data.keys(), insert_data.values()) #print(sql_statement) used_ids.append(row["personId"]) if index != 0 and ((index % batch_size) == 0 or index == rows_number_df - 1) and sql_statement != "": main_connector.execute(sql_statement) sql_statement = "" bar.finish() # Number of rows after sync. rows_number = main_connector.get_number_of_rows(table) print("Number of rows after: {}".format(rows_number))
def execute_command_updatemeasurements(): print("Updating measurements...") main_connector = dbutils.connect_to_main_database() # Where to get the data. glob_search_path = os.path.join(whhdata_path, "*.csv") csv_paths = sorted(glob.glob(glob_search_path)) csv_paths.sort(key=os.path.getmtime) csv_path = csv_paths[-1] print("Using {}".format(csv_path)) # Load the data-frame. df = pd.read_csv(csv_path) # List all columns. columns = list(df) columns_mapping = {column: column for column in columns} columns_mapping["id"] = "measurement_id" columns_mapping["personId"] = "person_id" columns_mapping["age"] = "age_days" columns_mapping["height"] = "height_cms" columns_mapping["weight"] = "weight_kgs" columns_mapping["muac"] = "muac_cms" columns_mapping["headCircumference"] = "head_circumference_cms" columns_mapping["deletedBy"] = "deleted_by" columns_mapping["createdBy"] = "created_by" columns_mapping["personId"] = "person_id" table = "measurements" # Number of rows before. rows_number = main_connector.get_number_of_rows(table) print("Number of rows before: {}".format(rows_number)) # Insert data in batches. batch_size = 1000 sql_statement = "" rows_number_df = len(df.index) bar = progressbar.ProgressBar(max_value=rows_number_df) for index, row in df.iterrows(): bar.update(index) keys = [] values = [] for df_key, db_key in columns_mapping.items(): keys.append(str(db_key)) values.append(str(row[df_key])) sql_statement += dbutils.create_insert_statement(table, keys, values) if index != 0 and ((index % batch_size) == 0 or index == rows_number_df - 1): main_connector.execute(sql_statement) sql_statement = "" bar.finish() # Number of rows after sync. rows_number = main_connector.get_number_of_rows(table) print("Number of rows after: {}".format(rows_number))
def execute_command_init(): print("Initializing DB...") main_connector = dbutils.connect_to_main_database() main_connector.execute_script_file("schema.sql") print("Done.")