Exemplo n.º 1
0
def insert_simple_item(kind, resources):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    if kind not in controle_kind:
        raise Exception("kind unknown")

    splitted_kind = kind.split('_')
    level = controle_kind[kind]
    resource_type = splitted_kind[0]
    
    if resource_type not in resource_kinds:
        raise Exception("resource unknown") 

    conn = db_init.db_connect()
    cur = conn.cursor()
    cur.execute("INSERT INTO controles.metadata VALUES (DEFAULT, %s, %s, %s, %s, %s) RETURNING id;", (timestamp, kind, level, len(resources), "FR"))
    meta_id = cur.fetchone()[0]
    if len(resources) == 0:
        conn.commit()
        return
        
    cur.execute("INSERT INTO controles.item VALUES (DEFAULT, %s) RETURNING id;", (meta_id,))
    item_id = cur.fetchone()[0]
    
    for res in resources:
        cur.execute("INSERT INTO controles.resource VALUES (DEFAULT, %s, %s, %s, %s)", (res[0], res[1], resource_type, item_id))
    conn.commit()
Exemplo n.º 2
0
def execute_simple(kind, sql):
    conn = db_init.db_connect()
    cur = conn.cursor()
    cur.execute(sql)
    resources = cur.fetchall()

    insert_simple_item(kind, resources)
Exemplo n.º 3
0
def check_housenumber_same_ordinal():
    conn = db_init.db_connect()
    cur = conn.cursor()
    cur.execute("SELECT hn1.pk, hn1.version, hn2.pk, hn2.version FROM housenumber as hn1, housenumber as hn2 WHERE hn1.pk != hn2.pk AND hn1.parent_id =hn2.parent_id AND hn1.number = hn2.number AND upper(hn1.ordinal) = substring(upper(hn2.ordinal) from 1 for 1);")
    resources = cur.fetchall()

    insert_double_item("housenumber_same_ordinal", resources)
Exemplo n.º 4
0
def check_group_kind():
    conn = db_init.db_connect()
    cur = conn.cursor()

    script_path = os.path.dirname(__file__)
    way_file_path = os.path.join(script_path, 'way.txt')
    area_file_path = os.path.join(script_path, 'area.txt')
    way_arr = open(way_file_path, 'r').read().splitlines()
    ways = ''
    for way in way_arr:
        ways += way + '|'
    ways = ways[:-1]

    areas = ''
    area_arr = open(area_file_path, 'r').read().splitlines()
    for area in area_arr:
        areas += area + '|'
    areas = areas[:-1]

    way_schem = '('+ways+') %'
    area_schem = '('+areas+') %'
    cur.execute("""SELECT pk, version FROM "group" where (name SIMILAR TO %s AND name NOT SIMILAR TO %s AND kind != 'way') OR (name SIMILAR TO %s AND name NOT SIMILAR TO %s AND kind != 'area')""", (way_schem, area_schem, area_schem, way_schem))
    resources = cur.fetchall()
    
    insert_simple_item("group_kind", resources)
Exemplo n.º 5
0
def check_pile():
    conn = db_init.db_connect()
    cur = conn.cursor()
    cur.execute("drop table if exists position90;")
    cur.execute(
        """create table position90 as select p.*, st_transform(p.center,2154) as c2154 from position p, housenumber h, "group" g, municipality m
    where p.housenumber_id=h.pk
    and h.parent_id=g.pk
    and g.municipality_id=m.pk
    and m.insee like '90%';""")
    cur.execute(
        "CREATE INDEX position90_c2154 ON position90 USING gist (c2154);")
    conn.commit()
    #    cur.execute(
    #        """SELECT p1.pk, p1.version, p2.pk, p2.version
    #        FROM position p1, position p2, housenumber h1, housenumber h2
    #        WHERE p1.housenumber_id=h1.pk AND p2.housenumber_id=h2.pk
    #        AND p1.source_kind=p2.source_kind
    #        AND st_distance(st_transform(p1.center,2154), st_transform(p2.center,2154))<5
    #        AND NOT h1.pk=h2.pk;"""
    #    )
    cur.execute(
        "SELECT p1.pk, p1.version, p2.pk, p2.version,'' FROM position90 p1, position90 p2, housenumber h1, housenumber h2 WHERE p1.housenumber_id=h1.pk AND p2.housenumber_id=h2.pk AND p1.source_kind=p2.source_kind AND p1.c2154 && st_buffer(p2.c2154,5) AND st_distance(p1.c2154, p2.c2154)<5 AND NOT h1.pk=h2.pk;"
    )
    resources = cur.fetchall()

    insert_multiple_item("housenumber_pile", resources)
Exemplo n.º 6
0
def check_group_name_format():
    conn = db_init.db_connect()
    cur = conn.cursor()
    sql = """[!"$%&()*+,/:;<=>?[\]^_|~#]+"""
    cur.execute("""select pk, version from "group" where name ~* %s;""", (sql,))
    resources = cur.fetchall()

    insert_simple_item("group_name_format", resources)
Exemplo n.º 7
0
def check_housenumber_ordinal_format():
    conn = db_init.db_connect()
    cur = conn.cursor()
    sql = """[!"$%&()*+,./:;<=>?[\]^-_|~#]+"""
    cur.execute("select pk, version from housenumber where ordinal ~* %s;", (sql,))
    resources = cur.fetchall()

    insert_simple_item("housenumber_ordinal_format", resources)
Exemplo n.º 8
0
def sum_stat_table(cur=db_connect()[0]):
    cols = [
        "population", "poverty_rate", "pct_renter_occupied",
        "median_gross_rent", "median_household_income",
        "median_property_value", "rent_burden", "pct_white", "pct_af_am",
        "pct_hispanic", "pct_am_ind", "pct_asian", "pct_nh_pi", "pct_multiple",
        "pct_other", "renter_occupied_households", "eviction_filings",
        "evictions", "eviction_rate", "eviction_filing_rate"
    ]

    cur.execute(
        '''SELECT avg(population), avg(poverty_rate), avg(pct_renter_occupied), avg(median_gross_rent), avg(median_household_income),
			avg(median_property_value), avg(rent_burden), avg(pct_white), avg(pct_af_am), avg(pct_hispanic), avg(pct_am_ind),
			avg(pct_asian), avg(pct_nh_pi), avg(pct_multiple), avg(pct_other), avg(renter_occupied_households), avg(eviction_filings),
			avg(evictions), avg(eviction_rate), avg(eviction_filing_rate) FROM evictions.blockgroup;'''
    )
    avg_row = list(cur.fetchone())

    cur.execute(
        '''SELECT min(population), min(poverty_rate), min(pct_renter_occupied), min(median_gross_rent), min(median_household_income),
			min(median_property_value), min(rent_burden), min(pct_white), min(pct_af_am), min(pct_hispanic), min(pct_am_ind),
			min(pct_asian), min(pct_nh_pi), min(pct_multiple), min(pct_other), min(renter_occupied_households), min(eviction_filings),
			min(evictions), min(eviction_rate), min(eviction_filing_rate) FROM evictions.blockgroup;'''
    )
    min_row = list(cur.fetchone())

    cur.execute(
        '''SELECT max(population), max(poverty_rate), max(pct_renter_occupied), max(median_gross_rent), max(median_household_income),
			max(median_property_value), max(rent_burden), max(pct_white), max(pct_af_am), max(pct_hispanic), max(pct_am_ind),
			max(pct_asian), max(pct_nh_pi), max(pct_multiple), max(pct_other), max(renter_occupied_households), max(eviction_filings),
			max(evictions), max(eviction_rate), max(eviction_filing_rate) FROM evictions.blockgroup;'''
    )
    max_row = list(cur.fetchone())

    cur.execute(
        '''SELECT stddev(population), stddev(poverty_rate), stddev(pct_renter_occupied), stddev(median_gross_rent), stddev(median_household_income),
			stddev(median_property_value), stddev(rent_burden), stddev(pct_white), stddev(pct_af_am), stddev(pct_hispanic), stddev(pct_am_ind),
			stddev(pct_asian), stddev(pct_nh_pi), stddev(pct_multiple), stddev(pct_other), stddev(renter_occupied_households), stddev(eviction_filings),
			stddev(evictions), stddev(eviction_rate), stddev(eviction_filing_rate) FROM evictions.blockgroup;'''
    )
    stddev_row = list(cur.fetchone())

    df = pd.DataFrame(
        OrderedDict({
            "variable": cols,
            "avg": avg_row,
            "min": min_row,
            "max": max_row,
            "std": stddev_row
        }))
    df.to_csv("sum_stat_table.csv")
    return df
Exemplo n.º 9
0
def check_group_same_name():
    conn = db_init.db_connect()
    cur = conn.cursor()
    cur.execute("""select g1.pk, g1.version, g2.pk, g2.version, '' 
        FROM "group" as g1, "group" as g2 
        where g1.municipality_id = g2.municipality_id 
        and g1.pk > g2.pk 
        and dmetaphone_alt(g1.name)=dmetaphone_alt(g2.name) 
        and levenshtein(g1.name, g2.name)::float/LEAST(length(g1.name), length(g2.name)) + (1-similarity(g1.name, g2.name)::float ) < 0.44"""
                )
    resources = cur.fetchall()

    insert_double_item("group_same_name", resources)
Exemplo n.º 10
0
def outlier_table(cur=db_connect()[0]):
    cols = [
        "population", "poverty_rate", "pct_renter_occupied",
        "median_gross_rent", "median_household_income",
        "median_property_value", "rent_burden", "pct_white", "pct_af_am",
        "pct_hispanic", "pct_am_ind", "pct_asian", "pct_nh_pi", "pct_multiple",
        "pct_other", "renter_occupied_households", "eviction_filings",
        "evictions", "eviction_rate", "eviction_filing_rate"
    ]

    table = []
    for col in cols:
        cur.execute("ROLLBACK;")
        #drop temp table if it exists already
        try:
            cur.execute("DROP TABLE tmp;")
        except:
            pass

        #create tmp table for vairable
        tmp = '''CREATE TEMP TABLE tmp as SELECT avg({}) as mean, stddev({})*3 as out_there 
                FROM evictions.blockgroup;'''.format(col, col)

        #identify count and average of high and low outliers
        query_high = "SELECT count({}), avg({}) FROM evictions.blockgroup WHERE {} > (select mean + out_there from tmp);".format(
            col, col, col)
        query_low = "SELECT count({}), avg({}) FROM evictions.blockgroup WHERE {} < (select mean - out_there from tmp);".format(
            col, col, col)

        #execute queries and build output table
        cur.execute(tmp)
        cur.execute(query_high)
        row = [col] + list(cur.fetchone())
        cur.execute(query_low)
        row += list(cur.fetchone())
        table.append(row)

    df = pd.DataFrame(table)
    df.columns = ["variable", "count high", "avg high", "count low", "avg low"]
    df.to_csv("outlier_table.csv")

    return df
Exemplo n.º 11
0
def check_housenumber_outside_municipality():
    conn = db_init.db_connect()
    cur = conn.cursor()

    cur.execute(
        """SELECT hn.pk, hn.version, 'num : ' || hn.number || ' // parent : ' || hn.parent_id || ' // insee : ' || mu.insee
        FROM housenumber hn INNER JOIN postcode pc ON hn.postcode_id=pc.pk
                            INNER JOIN municipality mu ON pc.municipality_id=mu.pk
                            INNER JOIN surface_commune sc ON mu.insee=sc.code_insee_cdc
                            INNER JOIN position po ON hn.pk=po.housenumber_id
        WHERE sc.gcms_detruit=FALSE AND hn.postcode_id IS NOT NULL AND ST_Within(ST_Transform(po.center, 2154), sc.geometrie)=FALSE
        UNION
        SELECT hn.pk, hn.version, 'num : ' || hn.number || ' // parent : ' || hn.parent_id || ' // insee : ' || mu.insee
        FROM housenumber hn INNER JOIN "group" gp ON hn.postcode_id=gp.pk
                            INNER JOIN municipality mu ON gp.municipality_id=mu.pk
                            INNER JOIN surface_commune sc ON mu.insee=sc.code_insee_cdc 
                            INNER JOIN position po ON hn.pk=po.housenumber_id
        WHERE sc.gcms_detruit=FALSE AND hn.postcode_id IS NULL AND ST_Within(ST_Transform(po.center, 2154), sc.geometrie)=FALSE;
    """)
    resources = cur.fetchall()

    insert_simple_item("housenumber_outside_municipality", resources)
Exemplo n.º 12
0
def insert_multiple_item(kind, resources):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    if kind not in controle_kind:
        raise Exception("kind unknown")

    splitted_kind = kind.split('_')
    level = controle_kind[kind]
    resource_type = splitted_kind[0]
    if resource_type not in resource_kinds:
        raise Exception("resource unknown")

    conn = db_init.db_connect()
    cur = conn.cursor()
    cur.execute(
        "INSERT INTO controles.metadata VALUES (DEFAULT, %s, %s, %s, %s, %s) RETURNING id;",
        (timestamp, kind, level, len(resources), 'FR'))
    meta_id = cur.fetchone()[0]

    g = nx.Graph()
    for res in resources:
        g.add_edge(res[0], res[2])
    cc = nx.connected_components(g)
    for cci in cc:
        print("composante connexe {}".format(cci))
Exemplo n.º 13
0
def check_housenumber_missing_ordinal():
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    kind = 'housenumber_missing_ordinal'
    if kind not in controle_kind:
        raise Exception("kind unknown")

    splitted_kind = kind.split('_')
    level = controle_kind[kind]
    resource_type = splitted_kind[0]
    if resource_type not in resource_kinds:
        raise Exception("resource unknown")

    conn = db_init.db_connect()
    cur = conn.cursor()
    cur.execute("""CREATE TEMPORARY TABLE IF NOT EXISTS temp_pos (
            ordinal VARCHAR(16),
            pos INT
        );
        INSERT INTO temp_pos (ordinal, pos) VALUES('', 1);
        INSERT INTO temp_pos (ordinal, pos) VALUES('A', 2);
        INSERT INTO temp_pos (ordinal, pos) VALUES('B', 3);
        INSERT INTO temp_pos (ordinal, pos) VALUES('C', 4);
        INSERT INTO temp_pos (ordinal, pos) VALUES('D', 5);
        INSERT INTO temp_pos (ordinal, pos) VALUES('E', 6);
        INSERT INTO temp_pos (ordinal, pos) VALUES('F', 7);
        INSERT INTO temp_pos (ordinal, pos) VALUES('G', 8);
        INSERT INTO temp_pos (ordinal, pos) VALUES('H', 9);
        INSERT INTO temp_pos (ordinal, pos) VALUES('I', 10);
        INSERT INTO temp_pos (ordinal, pos) VALUES('J', 11);
        INSERT INTO temp_pos (ordinal, pos) VALUES('K', 12);
        INSERT INTO temp_pos (ordinal, pos) VALUES('L', 13);
        INSERT INTO temp_pos (ordinal, pos) VALUES('BIS', 2);
        INSERT INTO temp_pos (ordinal, pos) VALUES('TER', 3);
        INSERT INTO temp_pos (ordinal, pos) VALUES('QUATER', 4);
        INSERT INTO temp_pos (ordinal, pos) VALUES('QUINQUIES', 5);
        INSERT INTO temp_pos (ordinal, pos) VALUES('SEXTO', 6);
        SELECT number, parent_id
        FROM (
            SELECT pk, version, number, parent_id, pos, rank() OVER (PARTITION BY number, parent_id ORDER BY pos ASC) AS posit
            FROM (
                SELECT hn.pk, hn.version, hn.number, hn.parent_id, hn.ordinal, tp.pos
                FROM housenumber hn JOIN temp_pos tp ON coalesce(hn.ordinal, '') = tp.ordinal
            ) AS t1
        ) AS t2
        WHERE posit!= pos
        GROUP BY number, parent_id;
        """)
    resources = cur.fetchall()
    cur.execute(
        "INSERT INTO controles.metadata VALUES (DEFAULT, %s, %s, %s, %s, %s) RETURNING id;",
        (timestamp, kind, level, len(resources), 'FR'))
    meta_id = cur.fetchone()[0]

    for res in resources:
        cur.execute(
            "INSERT INTO controles.item VALUES (DEFAULT, %s, %s) RETURNING id;",
            (meta_id, ''))
        item_id = cur.fetchone()[0]
        cur.execute(
            "SELECT hn.pk, hn.version, coalesce(hn.ordinal, '') FROM housenumber hn JOIN temp_pos tp ON coalesce(hn.ordinal, '')=tp.ordinal WHERE number=%s AND parent_id=%s ORDER BY tp.pos;",
            (res[0], res[1]))
        housenumbers = cur.fetchall()
        comment = ''
        for hn in housenumbers:
            comment = comment + hn[2] + '-'
            cur.execute(
                "INSERT INTO controles.resource VALUES (DEFAULT, %s, %s, %s, %s);",
                (hn[0], hn[1], resource_type, item_id))
        cur.execute("UPDATE controles.item SET comment=%s WHERE id=%s;",
                    (comment, item_id))
    conn.commit()