def process_artifact_paths(artifact_paths):
        main_connector = dbutils.connect_to_main_database()
        table = "artifact"

        batch_size = 100

        insert_count = 0
        no_measurements_count = 0
        skip_count = 0
        bar = progressbar.ProgressBar(max_value=len(artifact_paths))
        sql_statement = ""
        last_index = len(artifact_paths) - 1
        for index, artifact_path in enumerate(artifact_paths):
            bar.update(index)

            # Check if there is already an entry.
            basename = os.path.basename(artifact_path)
            sql_statement_select = dbutils.create_select_statement(
                "artifact", ["id"], [basename])  # TODO is this the proper id?
            results = main_connector.execute(sql_statement_select,
                                             fetch_all=True)

            # No results found. Insert.
            if len(results) == 0:
                insert_data = {}
                insert_data["id"] = basename  # TODO proper?

                # Process the artifact.
                default_values = get_default_values(artifact_path, table,
                                                    main_connector)
                if default_values != None:
                    insert_data.update(default_values)
                    sql_statement += dbutils.create_insert_statement(
                        table, insert_data.keys(), insert_data.values())
                    insert_count += 1
                else:
                    no_measurements_count += 1

            # Found a result. Update.
            elif len(results) != 0:
                skip_count += 1

            # Update database.
            if index != 0 and (
                (index % batch_size) == 0) or index == last_index:
                if sql_statement != "":
                    result = main_connector.execute(sql_statement)
                    sql_statement = ""

        bar.finish()
        print("Inserted {} new entries.".format(insert_count))
        print("No measurements for {} entries.".format(no_measurements_count))
        print("Skipped {} entries.".format(skip_count))
Пример #2
0
def update_media_table(file_paths, table, get_values, batch_size=1000):
    insert_count = 0
    no_measurements_count = 0
    skip_count = 0
    bar = progressbar.ProgressBar(max_value=len(file_paths))
    sql_statement = ""
    last_index = len(file_paths) - 1
    for index, file_path in enumerate(file_paths):
        bar.update(index)

        # Check if there is already an entry.
        path = os.path.basename(file_path)
        sql_statement_select = dbutils.create_select_statement(
            table, ["path"], [file_path])
        results = main_connector.execute(sql_statement_select, fetch_all=True)

        # No results found. Insert.
        if len(results) == 0:
            insert_data = {"path": path}
            default_values = get_default_values(file_path, table)
            if default_values != None:
                insert_data.update(default_values)
                insert_data.update(get_values(file_path))
                sql_statement += dbutils.create_insert_statement(
                    table, insert_data.keys(), insert_data.values())
                insert_count += 1
            else:
                no_measurements_count += 1

        # Found a result. Update.
        elif len(results) != 0:
            # TODO check if measurement id is missing or not
            skip_count += 1

        # Update database.
        if index != 0 and ((index % batch_size) == 0) or index == last_index:
            if sql_statement != "":
                #print("")
                #print(sql_statement)
                #print("")
                result = main_connector.execute(sql_statement)
                sql_statement = ""

    bar.finish()
    print("Inserted {} new entries.".format(insert_count))
    print("No measurements for {} entries.".format(no_measurements_count))
    print("Skipped {} entries.".format(skip_count))
Пример #3
0
def main():

    models_file = str(sys.argv[1])
    db_connection_file = str(sys.argv[2])
    table_name = str(sys.argv[3])

    if len(sys.argv) != 4:
        print(
            "usage: command_update_models.py models_file db_connection_file table_name"
        )

    main_connector = dbutils.connect_to_main_database(db_connection_file)

    with open(models_file) as json_file:
        json_data = json.load(json_file)

        for data in json_data["models"]:

            check_existing_models = "SELECT id from {};".format(table_name)
            results = main_connector.execute(check_existing_models,
                                             fetch_all=True)
            if data["name"] in str(results):
                print("{} already exists in model table".format(data["name"]))
                continue
            value_mapping = {}
            value_mapping["id"] = data["name"]
            value_mapping["name"] = data["name"]
            version = data["name"].split('_')[0]
            value_mapping["version"] = version
            del data["name"]
            value_mapping["json_metadata"] = json.dumps(data)
            keys = []
            values = []
            for key in value_mapping.keys():
                keys.append(key)
                values.append(value_mapping[key])
            sql_statement = dbutils.create_insert_statement(
                table_name, keys, values, False, True)
            try:
                results = main_connector.execute(sql_statement)
                print("{} successfully added to model table".format(
                    value_mapping["name"]))
            except Exception as error:
                print(error)

    main_connector.cursor.close()
    main_connector.connection.close()
Пример #4
0
def execute_command_updatemeasurements():
    print("Updating measurements...")

    main_connector = dbutils.connect_to_main_database()

    # Where to get the data.
    glob_search_path = os.path.join(whhdata_path, "*.csv")
    csv_paths = sorted(glob.glob(glob_search_path))
    csv_paths.sort(key=os.path.getmtime)
    csv_path = csv_paths[-1]
    print("Using {}".format(csv_path))

    # Load the data-frame.
    df = pd.read_csv(csv_path)

    # List all columns.
    columns = list(df)
    columns_mapping = {column: column for column in columns}
    columns_mapping["id"] = "measurement_id"
    columns_mapping["personId"] = "person_id"
    columns_mapping["age"] = "age_days"
    columns_mapping["height"] = "height_cms"
    columns_mapping["weight"] = "weight_kgs"
    columns_mapping["muac"] = "muac_cms"
    columns_mapping["headCircumference"] = "head_circumference_cms"
    columns_mapping["deletedBy"] = "deleted_by"
    columns_mapping["createdBy"] = "created_by"
    columns_mapping["personId"] = "person_id"

    table = "measurements"

    # Number of rows before.
    rows_number = main_connector.get_number_of_rows(table)
    print("Number of rows before: {}".format(rows_number))

    # Insert data in batches.
    batch_size = 1000
    sql_statement = ""
    rows_number_df = len(df.index)
    bar = progressbar.ProgressBar(max_value=rows_number_df)
    for index, row in df.iterrows():
        bar.update(index)

        keys = []
        values = []
        for df_key, db_key in columns_mapping.items():
            keys.append(str(db_key))
            values.append(str(row[df_key]))

        sql_statement += dbutils.create_insert_statement(table, keys, values)

        if index != 0 and ((index % batch_size) == 0
                           or index == rows_number_df - 1):
            main_connector.execute(sql_statement)
            sql_statement = ""

    bar.finish()

    # Number of rows after sync.
    rows_number = main_connector.get_number_of_rows(table)
    print("Number of rows after: {}".format(rows_number))
def execute_command_updatemeasurements():
    print("Updating measurements...")
    
    main_connector = dbutils.connect_to_main_database()
    
    # TODO import persons
    
    # Where to get the data.
    glob_search_path = os.path.join(whhdata_path, "*.csv")
    csv_paths = sorted(glob.glob(glob_search_path))
    csv_paths.sort(key=os.path.getmtime)
    csv_path = csv_paths[-1]
    print("Using {}".format(csv_path))

    # Load the data-frame.
    df = pd.read_csv(csv_path)
    
    # List all columns.
    columns = list(df)
    print(columns)
    ignored_columns = ["sex", "address", "qrcode", "latitude", "longitude", "personId"]
    columns_mapping = { column: column for column in columns if column not in ignored_columns}
    columns_mapping["personId"] = "person_id"
    columns_mapping["headCircumference"] = "head_circumference"
    columns_mapping["deletedBy"] = "deleted_by"
    columns_mapping["createdBy"] = "created_by"
    
    table = "measure"

    # Number of rows before.
    rows_number = main_connector.get_number_of_rows(table)
    print("Number of rows before: {}".format(rows_number))

    # Insert data in batches.
    batch_size = 1000
    sql_statement = ""
    rows_number_df = len(df.index)
    bar = progressbar.ProgressBar(max_value=rows_number_df)
    for index, row in df.iterrows():
        bar.update(index)

        keys = []
        values = []
        for df_key, db_key in columns_mapping.items():
            keys.append(str(db_key))
            values.append(str(row[df_key]))
            
        # TODO what is this?
        keys.append("date")
        values.append(int(time.time()))
        
        # TODO what is this?
        keys.append("artifact")
        values.append("UNKNOWN")
        
        sql_statement += dbutils.create_insert_statement(table, keys, values)
        
        if index != 0 and ((index % batch_size) == 0 or index == rows_number_df - 1):
            main_connector.execute(sql_statement)
            sql_statement = ""

    bar.finish()

    # Number of rows after sync.
    rows_number = main_connector.get_number_of_rows(table)
    print("Number of rows after: {}".format(rows_number))
def execute_command_persons():
    print("Updating persons...")

    main_connector = dbutils.connect_to_main_database()

    # TODO import persons

    # Where to get the data.
    glob_search_path = os.path.join(whhdata_path, "*.csv")
    csv_paths = sorted(glob.glob(glob_search_path))
    csv_paths.sort(key=os.path.getmtime)
    csv_path = csv_paths[-1]
    print("Using {}".format(csv_path))

    # Load the data-frame.
    df = pd.read_csv(csv_path)

    # List all columns.
    columns = list(df)
    print(columns)

    #    ['personId', 'qrcode', 'sex', 'type', 'age', 'height', 'weight', 'muac', 'headCircumference', 'oedema', 'latitude', 'longitude', 'address', 'timestamp', 'deleted', 'deletedBy', 'visible', 'createdBy']
    """
    id VARCHAR(255) PRIMARY KEY,
    name TEXT NOT NULL,
    surname TEXT NOT NULL,
    birthday BIGINT NOT NULL,
    sex TEXT NOT NULL,
    guardian TEXT NOT NULL,
    is_age_estimated BOOLEAN NOT NULL,
    qr_code TEXT NOT NULL,
    created BIGINT NOT NULL,
    timestamp BIGINT NOT NULL,
    created_by TEXT NOT NULL,
    deleted BOOLEAN NOT NULL,
    deleted_by TEXT NOT NULL
    """

    table = "person"

    # Number of rows before.
    rows_number = main_connector.get_number_of_rows(table)
    print("Number of rows before: {}".format(rows_number))

    # Insert data in batches.
    batch_size = 1000
    sql_statement = ""
    rows_number_df = len(df.index)
    bar = progressbar.ProgressBar(max_value=rows_number_df)
    used_ids = []
    for index, row in df.iterrows():
        bar.update(index)

        # Make sure there are no duplicates. Local.
        select_sql_statement = "SELECT COUNT(*) FROM person WHERE id='{}'".format(
            row["personId"])
        result = main_connector.execute(select_sql_statement,
                                        fetch_one=True)[0]
        if row["personId"] in used_ids or result != 0:
            #print(row["personId"], "already in DB")
            pass
        else:

            # TODO check all of these.
            insert_data = {}
            insert_data["id"] = row["personId"]
            insert_data["name"] = "UNKNOWN"
            insert_data["surname"] = "UNKNOWN"
            insert_data["birthday"] = 0
            insert_data["sex"] = "UNKNOWN"
            insert_data["guardian"] = "UNKNOWN"
            insert_data["is_age_estimated"] = False
            insert_data["qr_code"] = row["qrcode"]
            insert_data["created"] = 0
            insert_data["timestamp"] = row["timestamp"]
            insert_data["created_by"] = row["createdBy"]
            insert_data["deleted"] = row["deleted"]
            insert_data["deleted_by"] = row["deletedBy"]

            sql_statement += dbutils.create_insert_statement(
                table, insert_data.keys(), insert_data.values())
            #print(sql_statement)
            used_ids.append(row["personId"])

        if index != 0 and ((index % batch_size) == 0 or index
                           == rows_number_df - 1) and sql_statement != "":
            main_connector.execute(sql_statement)
            sql_statement = ""

    bar.finish()

    # Number of rows after sync.
    rows_number = main_connector.get_number_of_rows(table)
    print("Number of rows after: {}".format(rows_number))
Пример #7
0
def execute_command_persons():
    print("Updating persons...")
    
    main_connector = dbutils.connect_to_main_database()
    
    # Where to get the data.
    csv_path = config.measure_csv_path
    print("Using {}".format(csv_path))

    # Load the data-frame.
    df = pd.read_csv(csv_path)
    
    # List all columns.
    columns = list(df)
    print(columns)
        
    table = "person"

    # Number of rows before.
    rows_number = main_connector.get_number_of_rows(table)
    print("Number of rows before: {}".format(rows_number))

    # Insert data in batches.
    batch_size = 1000
    sql_statement = ""
    rows_number_df = len(df.index)
    bar = progressbar.ProgressBar(max_value=rows_number_df)
    used_ids = []
    for index, row in df.iterrows():
        bar.update(index)
        
        # Make sure there are no duplicates. Local.
        select_sql_statement = "SELECT COUNT(*) FROM person WHERE id='{}'".format(row["personId"])
        result = main_connector.execute(select_sql_statement, fetch_one=True)[0]
        if row["personId"] in used_ids or result != 0:
            #print(row["personId"], "already in DB")
            pass
        else:
        
            # TODO check all of these.
            insert_data = {}
            insert_data["id"] = row["personId"]
            insert_data["name"] = "UNKNOWN" 
            insert_data["surname"] = "UNKNOWN" 
            insert_data["birthday"] = 0   
            insert_data["sex"] = "UNKNOWN" 
            insert_data["guardian"] = "UNKNOWN" 
            insert_data["is_age_estimated"] = False
            insert_data["qr_code"] = row["qrcode"]    
            insert_data["created"] = 0
            insert_data["timestamp"] = row["timestamp"]
            insert_data["created_by"] = row["createdBy"]
            insert_data["deleted"] = row["deleted"]
            insert_data["deleted_by"] = row["deletedBy"]

            sql_statement += dbutils.create_insert_statement(table, insert_data.keys(), insert_data.values())
            #print(sql_statement)
            used_ids.append(row["personId"])
        
        if index != 0 and ((index % batch_size) == 0 or index == rows_number_df - 1) and sql_statement != "":
            main_connector.execute(sql_statement)
            sql_statement = ""

    bar.finish()

    # Number of rows after sync.
    rows_number = main_connector.get_number_of_rows(table)
    print("Number of rows after: {}".format(rows_number))
Пример #8
0
#main_connector.clear_table(table)

sql_statement = "SELECT qrcode FROM measurements WHERE qrcode != 'NaN';"
results = main_connector.execute(sql_statement, fetch_all=True)
results = [result[0] for result in results]
print(sorted(list(set(results))))

exit(0)

insert_data = {}
insert_data["path"] = "somepath"
insert_data["qrcode"] = "someqrcode"
insert_data["targets"] = "10, 20"
insert_data["last_updated"] = str(datetime.datetime.now())
insert_data["rejected_by_expert"] = False
insert_data["had_error"] = False
insert_data["error_message"] = ""
insert_data["width_px"] = 128
insert_data["height_px"] = 127
insert_data["blur_variance"] = 1.0
sql_statement = dbutils.create_insert_statement(table, insert_data.keys(),
                                                insert_data.values())
print(sql_statement)
results = main_connector.execute(sql_statement)
print(results)

sql_statement = dbutils.create_select_statement(table, ["path"], ["somepath"])
print(sql_statement)
results = main_connector.execute(sql_statement, fetch_all=True)
print(len(results))
    def process_person_paths(person_paths, process_index):

        #person_paths = person_paths[0:4] # TODO remove this!

        # Go through each person (qr-code).
        for person_path in tqdm(person_paths, position=process_index):

            person_path = person_path.replace('localssd/', 'localssd2/')

            print(person_path)

            # Find all artifacts for that person.
            artifact_paths = []
            for file_extension in file_extensions:
                print(file_extension)
                glob_search_path = os.path.join(
                    person_path, "**/*.{}".format(file_extension))

                #print (glob_search_path)
                artifact_paths.extend(glob.glob(glob_search_path))
                # print(artifact_paths)

            print("Found {} artifacts in {}".format(len(artifact_paths),
                                                    person_path))

            # Process those artifacts.
            main_connector = dbutils.connect_to_main_database()
            table = "artifact"
            batch_size = 100
            insert_count = 0
            no_measurements_count = 0
            skip_count = 0
            sql_statement = ""
            last_index = len(artifact_paths) - 1
            for artifact_index, artifact_path in enumerate(artifact_paths):

                # Check if there is already an entry in the database.
                basename = os.path.basename(artifact_path)
                sql_statement_select = dbutils.create_select_statement(
                    "artifact", ["id"], [basename])
                results = main_connector.execute(sql_statement_select,
                                                 fetch_all=True)

                # No results found. Insert.
                if len(results) == 0:
                    insert_data = {}
                    insert_data["id"] = basename  # TODO proper?

                    # Get the default values for the artifact.
                    default_values = get_default_values(
                        artifact_path, table, main_connector)

                    # Check if there is a measure_id.
                    if "measure_id" in default_values.keys():
                        insert_count += 1
                    else:
                        no_measurements_count += 1

                    # Create SQL statement.
                    insert_data.update(default_values)
                    sql_statement_for_artifact = dbutils.create_insert_statement(
                        table, insert_data.keys(), insert_data.values())
                    sql_statement += sql_statement_for_artifact

                # Found a result. Update.
                elif len(results) != 0:
                    skip_count += 1

                # Update database.
                if artifact_index != 0 and (
                    (artifact_index % batch_size)
                        == 0) or artifact_index == last_index:
                    if sql_statement != "":
                        result = main_connector.execute(sql_statement)
                        sql_statement = ""

        # Return statistics.
        return (insert_count, no_measurements_count, skip_count)