Пример #1
0
def geoi_assemble(
        db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
        lookup_schema='geoimputation',
        lookup_table='ged',
        month_start=400,
        month_end=400):

    engine = create_engine(db_engine_name)
    with engine.connect() as con:
        trans = con.begin()
        query = alchemy_text("DROP TABLE IF EXISTS " + lookup_schema +
                             ".geoi_out_all;")
        trans.commit()
        con.execute(query)
        query = alchemy_text('''
        CREATE TABLE ''' + lookup_schema + '''.geoi_out_all AS
SELECT * FROM ''' + lookup_schema + '''.geoi_out_4 WHERE "1"<>1 UNION
SELECT * FROM ''' + lookup_schema + '''.geoi_out_6 WHERE "1"<>1 UNION
SELECT id,
  priogrid_gid as "1",
  priogrid_gid as "2",
  priogrid_gid as "3",
  priogrid_gid as "4",
  priogrid_gid as "5",
  priogrid_gid as "6",
  priogrid_gid as "7",
  priogrid_gid as "8",
  priogrid_gid as "9",
    priogrid_gid as "10",
    priogrid_gid as "11",
    priogrid_gid as "12",
    priogrid_gid as "13",
    priogrid_gid as "14",
    priogrid_gid as "15"
FROM ''' + lookup_schema + '''.''' + lookup_table + '''
WHERE month_id_end BETWEEN :m1 AND :m2;
        ''')
        con.execute(query, m1=month_start, m2=month_end)

        query = alchemy_text(
            '''
ALTER TABLE ''' + lookup_schema +
            '''.geoi_out_all ADD COLUMN month_start BIGINT;
ALTER TABLE ''' + lookup_schema + '''.geoi_out_all ADD COLUMN month_end BIGINT;
ALTER TABLE ''' + lookup_schema +
            '''.geoi_out_all ADD COLUMN type_of_violence BIGINT;
        ''')
        con.execute(query)

        query = alchemy_text('''
        UPDATE ''' + lookup_schema + '''.geoi_out_all SET
  month_start=c.month_id_start,
  month_end=c.month_id_end,
  type_of_violence=c.type_of_violence
FROM ''' + lookup_schema + '''.gedfull c
WHERE ''' + lookup_schema + '''.geoi_out_all.id = c.id;
''')
        con.execute(query)
Пример #2
0
def report_pgm (
        month_start,
        month_end,
        include_previous_months=0,
        db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
):
    month_start -= include_previous_months
    query=alchemy_text("""
    with a as (
    SELECT
      year_id,
      sum(to_dummy(ged_count_sb)) AS sb_count_pg,
      sum(to_dummy(ged_count_ns)) AS ns_count_pg,
      sum(to_dummy(ged_count_os)) AS os_count_pg,
      sum(ged_count_sb)           AS sb_count_events,
      sum(ged_count_ns)           AS ns_count_events,
      sum(ged_count_os)           AS os_count_events,
      sum(ged_best_sb)            AS sb_sum_deaths,
      sum(ged_best_ns)            AS ns_sum_deaths,
      sum(ged_best_os)            AS os_sum_deaths,
      month_id
    FROM preflight.flight_pgm
    WHERE month_id BETWEEN :s AND :e
    GROUP BY month_id, year_id
)
  SELECT m.month, a.*
FROM a, staging.month as m
WHERE a.month_id=m.id;
    """)
    query = query.bindparams(s=month_start, e=month_end)
    engine = create_engine(db_engine_name)
    with engine.connect() as con:
        report = pd.read_sql(sql=query,con=con)
        return (report)
Пример #3
0
def _check_ged(month_start: int, api_version: str) -> None:
    """ Check that GED has data for month_start """

    # Imputation needs 12 months, check that we have that in the API?
    month_check = month_start - 11
    log.debug(f"Checking GED has data for {month_check}")

    engine = create_engine(CONNECTSTRING)

    with engine.connect() as con:
        query = alchemy_text(
            "SELECT month, year_id FROM staging.month WHERE id=:sd")
        result = con.execute(query, sd=month_check).fetchone()
        iso_start_check = "{1:04d}-{0:02d}-01".format(*result)
        iso_end_check = "{1:04d}-{0:02d}-25".format(*result)
        next_page_url = ("http://ucdpapi.pcr.uu.se/api/gedevents/" +
                         api_version + "?pagesize=1&StartDate=" +
                         iso_start_check + "&EndDate=" + iso_end_check)
        next_page_url, ged_slice, total_pages = _get_ged_slice(
            next_page_url=next_page_url)

        if total_pages > 0:
            log.debug(f"GED data exists for {month_check}")
        else:
            raise RuntimeError(
                f"No data in api version {api_version} for month {month_check}"
            )
Пример #4
0
def geteventdensity(
        priogrids_df,
        month,
        conflict_id,
        db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
        schema='geoimputation',
        table='ged'):
    """
    :param priogrids_df:
    :param month:
    :param db_engine_name:
    :param db_table_name:
    :return:
    """
    engine = create_engine(db_engine_name)
    with engine.connect() as con:
        query = alchemy_text(
            "SELECT priogrid_gid, count(*)*100 as density FROM " + schema +
            "." + table +
            " WHERE conflict_new_id = :conflict AND month_id_end=:date GROUP BY priogrid_gid"
        )
        query = query.bindparams(conflict=conflict_id, date=month)
        density_df = pd.read_sql(query, con)
        #print(density_df)
    df_merge = priogrids_df.merge(density_df,
                                  left_on='gid',
                                  right_on='priogrid_gid',
                                  how='left')
    df_merge = df_merge.drop('priogrid_gid', axis=1)
    df_merge = df_merge.fillna(1)
    #print (df_merge)
    #print ("**************************")
    return df_merge
Пример #5
0
def _geteventdensity(
    priogrids_df: pd.DataFrame,
    month: int,
    conflict_id: int,
    schema: str,
    table: str,
):
    engine = create_engine(CONNECTSTRING)
    with engine.connect() as con:
        query = alchemy_text(f"""
            SELECT priogrid_gid,
            count(*)*100 as density
            FROM {schema}.{table}
            WHERE conflict_new_id = :conflict
            AND month_id_end=:date GROUP BY priogrid_gid
            """)
        query = query.bindparams(conflict=conflict_id, date=month)
        density_df = pd.read_sql(query, con)

    df_merge = priogrids_df.merge(density_df,
                                  left_on="gid",
                                  right_on="priogrid_gid",
                                  how="left")
    df_merge = df_merge.drop("priogrid_gid", axis=1)
    df_merge = df_merge.fillna(1)
    return df_merge
Пример #6
0
def checkGED(db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
             version_number='5.9.99',
             month_start=400,
             imputation=True):

    print(month_start)

    if imputation:
        month_start = month_start - 11

    engine = create_engine(db_engine_name)

    with engine.connect() as con:
        query = alchemy_text(
            "SELECT month, year_id FROM staging.month WHERE id=:sd")
        go = False
        try:
            result = con.execute(query, sd=month_start).fetchone()
            iso_start_check = '{1:04d}-{0:02d}-01'.format(*result)
            iso_end_check = '{1:04d}-{0:02d}-25'.format(*result)
            next_page_url = 'http://ucdpapi.pcr.uu.se/api/gedevents/' + version_number + '?pagesize=1&StartDate=' + iso_start_check + '&EndDate=' + iso_end_check
            print(next_page_url)
            next_page_url, ged_slice, total_pages = _get_ged_slice(
                next_page_url=next_page_url, token='48dda3460c347f3b')
            # print(iso_start_check, iso_end_check, total_pages)
            if total_pages > 0: go = True
        except:
            go = False
        return go
Пример #7
0
def geoi_run(count=15,
             db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
             lookup_schema='geoimputation',
             lookup_table='ged',
             adm1=True):

    engine = create_engine(db_engine_name)
    out_df = pd.DataFrame()
    if adm1:
        poly_table = 'gadm1'
        table_id = '4'
    else:
        poly_table = 'country'
        table_id = '6'
    with engine.connect() as con:
        query = alchemy_text("SELECT count(*) FROM " + lookup_schema +
                             '.toimp' + table_id)
        row_count = con.execute(query).fetchone()
        i = 0
        tools.printProgressBar(0,
                               row_count[0],
                               prefix='Progress:',
                               suffix='Complete',
                               bar_length=50)

        query = alchemy_text(
            "SELECT id,geom_wkt,month_id_end as month_id,conflict_new_id as conflict_id "
            "FROM " + lookup_schema + '.toimp' + table_id)
        for row in con.execute(query):
            #print(row)
            i += 1
            tools.printProgressBar(i,
                                   row_count[0],
                                   prefix='Progress:',
                                   suffix='Complete',
                                   bar_length=50)
            out_df = out_df.append(
                geom_inputation(db_engine_name, lookup_schema, lookup_table,
                                poly_table, count, *row))
        out_df = out_df.rename(columns={0: 'id'})
        out_df.to_sql(con=con,
                      name='geoi_out_' + table_id,
                      schema=lookup_schema,
                      if_exists='replace',
                      index=False)
Пример #8
0
def monthid_to_ymd(month_id, db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views'):
    '''given month_id, return Y-M-01 in ISO format'''
    engine = create_engine(db_engine_name)
    with engine.connect() as con:
        query = alchemy_text("SELECT month, year_id FROM staging.month WHERE id=:id")
        try:
            result = con.execute(query,id=int(month_id)).fetchone()
            result = '{1:04d}-{0:02d}-01'.format(*result)
        except: result = None
        return result
Пример #9
0
def geoi_prepare(db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
                 lookup_schema='geoimputation',
                 lookup_table='ged',
                 month_start=400,
                 month_end=400):
    """Copies the current attached GED for the lookup to work"""
    engine = create_engine(db_engine_name)
    with engine.connect() as con:
        con.execute('DROP TABLE IF EXISTS ' + lookup_schema + '.' +
                    lookup_table)
        con.execute('DROP TABLE IF EXISTS ' + lookup_schema + '.gedfull')
        con.execute('DROP TABLE IF EXISTS ' + lookup_schema + '.toimp4')
        con.execute('DROP TABLE IF EXISTS ' + lookup_schema + '.toimp6')
        trans = con.begin()
        con.execute('CREATE TABLE  ' + lookup_schema + '.' + lookup_table +
                    ' AS SELECT id,priogrid_gid,conflict_new_id,month_id_end,'
                    'month_id_start,type_of_violence, geom, best '
                    'FROM preflight.ged_attached')
        con.execute(
            'CREATE TABLE  ' + lookup_schema +
            '.gedfull AS SELECT id,priogrid_gid,conflict_new_id,month_id_end,'
            'month_id_start,type_of_violence, geom, best '
            'FROM preflight.ged_attached_full')
        con.execute('CREATE INDEX lookup_idx ON ' + lookup_schema + '.' +
                    lookup_table + ' (conflict_new_id,month_id_end)')
        con.execute('CREATE INDEX lookup2_idx ON ' + lookup_schema + '.' +
                    lookup_table + ' (priogrid_gid)')
        con.execute(alchemy_text(
            'CREATE TABLE  ' + lookup_schema +
            '.toimp4 AS SELECT * FROM preflight.ged_attached_full where where_prec=4'
            ' AND month_id_end between :m1 and :m2'),
                    m1=month_start,
                    m2=month_end)
        con.execute(alchemy_text(
            'CREATE TABLE  ' + lookup_schema +
            '.toimp6 AS SELECT * FROM preflight.ged_attached_full where where_prec=6'
            ' AND month_id_end between :m1 and :m2 AND geom IS NOT NULL'),
                    m1=month_start,
                    m2=month_end)
        trans.commit()
Пример #10
0
def _geoi_run(adm1: bool) -> None:

    log.debug("Started _geoi_run()")
    count = 15
    lookup_table = "ged"
    lookup_schema = "left_imputation"

    engine = create_engine(CONNECTSTRING)
    out_df = pd.DataFrame()

    if adm1:
        poly_table = "gadm1"
        table_id = "4"
    else:
        poly_table = "country"
        table_id = "6"

    with engine.connect() as con:
        query = alchemy_text("SELECT count(*) FROM " + lookup_schema +
                             ".toimp" + table_id)
        row_count = con.execute(query).fetchone()
        query = alchemy_text(
            "SELECT id,geom_wkt,month_id_end as month_id,conflict_new_id as conflict_id "
            "FROM " + lookup_schema + ".toimp" + table_id)
        for row in con.execute(query):
            out_df = out_df.append(
                _geom_imputation(lookup_schema, lookup_table, poly_table,
                                 count, *row))
        out_df = out_df.rename(columns={0: "id"})
        out_df.to_sql(
            con=con,
            name="geoi_out_" + table_id,
            schema=lookup_schema,
            if_exists="replace",
            index=False,
        )

    log.debug("Finished _geoi_run()")
Пример #11
0
def backupOldTable (db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
               schema_name='dataprep',
               table_name='ged'):
    '''
    creates a backup copy of a table with the name TABLEnnnnnnn where n is unix timestamp
    :param db_engine_name engine URI
    :param schema_name schema
    :param table_name
    '''

    ot = schema_name+'.'+table_name
    nt= schema_name+'.'+table_name+'_'+str(int(time()))
    query = alchemy_text('CREATE TABLE '+nt+' AS SELECT * FROM '+ot)

    engine = create_engine(db_engine_name)

    with engine.connect() as con:
        con.execute(query)
Пример #12
0
def validate (month_start,
              month_end,
              db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
              schema='preflight',
              table='flight_pgm'):
    engine = create_engine(db_engine_name)
    with engine.connect() as con:
        query = alchemy_text("""SELECT min(month_id) as min, max(month_id) as max FROM """+schema+"."+table+""" 
        WHERE ged_count_sb IS NOT NULL  AND ged_count_sb > 0 AND month_id BETWEEN :m1 and :m2 """)
        try:
            result = con.execute(query, m1=month_start, m2=month_end).fetchone()
        except:
            result = None, None
    if result[0] is None:
        return False
    if int(result[0]) == int(month_start) and int(result[1]) == int(month_end):
        return True
    return False
Пример #13
0
def getMonthID(year,
               month,
               db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views'):
    """
    converts a year and month into a Views MonthID
    :param year: Year as YYYY
    :param month:
    :param db_engine_name: Location of the VIEWS urI
    :return:
    """
    engine = create_engine(db_engine_name)
    with engine.connect() as con:
        query = alchemy_text("""
        SELECT id FROM staging.month WHERE year_id=:y AND month=:m""")
        try:
            month_id = con.execute(query, y=year, m=month).fetchone()[0]
        except TypeError:
            raise ValueError(
                "Supplied year and month do not fit the 1980-2030 range for values:",
                year, month)
        return month_id
Пример #14
0
def preflightRUN(db_engine_name, level='pgm'):
    """
    :param db_engine_name: the Views DB URI
    :param level: the Views level to which you want to denormalize. CM and PGM are implemented
    :return: 0 on success, error code on failure.

    Runs a normal preflight run for Views, i.e. denormalizes the Views Staging tables to a flat format
    suitable for model running.
    Current implemented levels are cm and pgm.
    """

    script_path = os.path.join(
        os.path.dirname(os.path.realpath('__file__')),
        '../SQLFuelingPlans/flight_' + level.lower() + '.sql')
    try:
        preflight_file = open(script_path, 'r')
    except:
        return 1

    preflight_query = alchemy_text(preflight_file.read())

    if db_engine_name is None:
        print(preflight_query)
        return 2
    else:
        engine = create_engine(db_engine_name)
        with engine.connect() as con:
            trans = con.begin()
            con.execute("DROP TABLE IF EXISTS preflight.flight_" +
                        level.lower())
            trans.commit()
            trans = con.begin()
            print(preflight_query)
            con.execute(preflight_query)
            trans.commit()
        return 0
Пример #15
0
def date_to_monthid(*args,**kwargs):
    '''if called with date parse ISO string to Y-M-D and return month_id.
       if called with year,month,day obtain return month_id
       and if called with only an actual month_id, guess what, pass through!'''
    db_engine_name = 'postgresql://VIEWSADMIN@VIEWSHOST:5432/views'
    if 'db_engine_name' in kwargs:
        db_engine_name = kwargs['db_engine_name']

    if len(args) not in (1,3): return None

    if len(args)==1:
        if '-' not in args[0]:
            try: passthrough = int(args[0])
            except: passthrough = None
            return passthrough
        ymd=args[0].split('-')
        if len(ymd) != 3: return None
        try:
            y=int(ymd[0])
            m=int(ymd[1])
        except ValueError:
            return None
    if len(args)==3:
        try:
            y=int(args[0])
            m=int(args[1])
        except ValueError:
            return None

    engine = create_engine(db_engine_name)
    with engine.connect() as con:
        query = alchemy_text("SELECT id FROM staging.month WHERE year_id=:y and month=:m")
        try:
            result = con.execute(query,y=y, m=m).fetchone()
            return result[0]
        except: return None
Пример #16
0
def _geoi_prepare(month_start: int, month_end: int) -> None:
    """Copies the current attached GED for the lookup to work"""
    log.debug("Starting _geoi_prepare()")
    engine = create_engine(CONNECTSTRING)

    db.drop_table("left_imputation.ged")
    db.drop_table("left_imputation.gedfull")
    db.drop_table("left_imputation.toimp4")
    db.drop_table("left_imputation.toimp6")
    with engine.connect() as con:
        trans = con.begin()
        log.debug("Creating left_imputation.ged")
        con.execute("""
            CREATE TABLE left_imputation.ged AS
            SELECT id,
                   priogrid_gid,
                   conflict_new_id,
                   month_id_end,
                   month_id_start,
                   type_of_violence,
                   geom,
                   best
            FROM preflight.ged_attached
            """)
        log.debug("Creating left_imputation.gedfull")
        con.execute("""
            CREATE TABLE left_imputation.gedfull AS
            SELECT id,
                   priogrid_gid,
                   conflict_new_id,
                   month_id_end,
                   month_id_start,
                   type_of_violence,
                   geom,
                   best
            FROM preflight.ged_attached_full
            """)
        con.execute(
            "CREATE INDEX lookup_idx ON left_imputation.ged (conflict_new_id, month_id_end)"
        )
        con.execute(
            "CREATE INDEX lookup2_idx ON left_imputation.ged (priogrid_gid)")
        log.debug("Creating left_imputation.toimp4")
        con.execute(
            alchemy_text("""
            CREATE TABLE  left_imputation.toimp4
            AS SELECT * FROM preflight.ged_attached_full
            where where_prec=4 AND month_id_end between :m1 and :m2
            """),
            m1=month_start,
            m2=month_end,
        )
        log.debug("Creating left_imputation.toimp6")
        con.execute(
            alchemy_text("""
            CREATE TABLE left_imputation.toimp6 AS
            SELECT * FROM preflight.ged_attached_full
            WHERE where_prec=6
            AND month_id_end between :m1 and :m2
            AND geom IS NOT NULL
            """),
            m1=month_start,
            m2=month_end,
        )
        trans.commit()

    log.debug("Finished _geoi_prepare()")
Пример #17
0
def stageGED2CM(month_start=445,
                month_end=446,
                db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views'):

    if month_start > month_end:
        month_start, month_end = month_end, month_start

    engine = create_engine(db_engine_name)

    with engine.connect() as con:
        limits = con.execute(
            "SELECT min(month_id_end) AS int, max(month_id_end) AS int FROM preflight.ged_attached"
        ).fetchone()
        if limits[0] > month_start: month_start = limits[0]
        if limits[1] <= month_end: month_end = limits[1]

    print("start/end:", month_start, month_end)

    print("Staging Country-Month with base GED variables and spatial lags...")

    query = alchemy_text("""
    UPDATE staging.country_month SET
    ged_best_sb = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,1),
    ged_best_ns = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,2),
    ged_best_os = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,3),
    ged_count_sb = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,1),
    ged_count_ns = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,2),
    ged_count_os = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,3),
    ged_best_sb_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,1),
    ged_best_ns_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,2),
    ged_best_os_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,3),
    ged_count_sb_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,1),
    ged_count_ns_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,2),
    ged_count_os_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,3) WHERE
month_id BETWEEN :m1 AND :m2 """)

    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    print("Staging Country-Month t-lags...")

    query = alchemy_text("""
SELECT public.make_country_month_temporal_lags(1,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(2,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(3,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(4,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(5,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(6,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(7,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(8,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(9,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(10,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(11,FALSE,109,:m2);
SELECT public.make_country_month_temporal_lags(12,FALSE,109,:m2);
    """)

    with engine.connect() as con:
        con.execute(query, m2=month_end)

    print("Staging Country-Month distances to nearest event...")

    query = alchemy_text("""
    UPDATE staging.country_month SET
ged_months_since_last_sb = public.cm_months_since_last_event('ged_count_sb', country_id, month_id),
ged_months_since_last_ns = public.cm_months_since_last_event('ged_count_ns', country_id, month_id),
ged_months_since_last_os = public.cm_months_since_last_event('ged_count_os', country_id, month_id),
ged_months_since_last_sb_lag1 = public.cm_months_since_last_event('ged_count_sb_lag1', country_id, month_id),
ged_months_since_last_ns_lag1 = public.cm_months_since_last_event('ged_count_ns_lag1', country_id, month_id),
ged_months_since_last_os_lag1 = public.cm_months_since_last_event('ged_count_os_lag1', country_id, month_id)
WHERE month_id BETWEEN :m1 AND :m2""")

    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    with engine.connect() as con:
        print("Updating base onset variables for CM...")
        trans = con.begin()
        query = alchemy_text(
            """with a as (SELECT * FROM onset_months_table('staging','country_month','ged_best_sb','country_id'))
          UPDATE staging.country_month SET onset_months_sb = a.onset_distance
          FROM a
          WHERE a.id=staging.country_month.id;

        with a as (SELECT * FROM onset_months_table('staging','country_month','ged_best_ns','country_id'))
          UPDATE staging.country_month SET onset_months_ns = a.onset_distance
          FROM a
          WHERE a.id=staging.country_month.id;

        with a as (SELECT * FROM onset_months_table('staging','country_month','ged_best_os','country_id'))
          UPDATE staging.country_month SET onset_months_os = a.onset_distance
          FROM a
          WHERE a.id=staging.country_month.id;""")
        con.execute(query)
        trans.commit()
Пример #18
0
def geoi_pgm_dummy_update(
        imputation_id=1,
        db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
        lookup_schema='geoimputation',
        month_start=400,
        month_end=400):

    if not isinstance(imputation_id, (int, long)):
        print("ERROR")
        return None

    imputation_id = str(imputation_id)

    engine = create_engine(db_engine_name)
    with engine.connect() as con:
        trans = con.begin()
        query = alchemy_text('''UPDATE ''' + lookup_schema + '''.pgm SET  
        ged_sb_dummy_''' + imputation_id + '''=0,  
        ged_ns_dummy_''' + imputation_id + '''=0,
        ged_os_dummy_''' + imputation_id + '''=0 
        WHERE month_id BETWEEN :m1 AND :m2''')
        con.execute(query, m1=month_start, m2=month_end)
        trans.commit()

        trans = con.begin()
        query = alchemy_text('''
    with a as
(
    SELECT
      "''' + imputation_id + '''" as priogrid_gid,
      type_of_violence,
      random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id
    FROM  ''' + lookup_schema + '''.geoi_out_all
)
UPDATE
  ''' + lookup_schema + '''.pgm as i SET ged_ns_dummy_''' + imputation_id +
                             '''=1
FROM a
WHERE a.type_of_violence=2 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id'''
                             )
        con.execute(query)

        query = alchemy_text('''
    with a as
(
    SELECT
      "''' + imputation_id + '''" as priogrid_gid,
      type_of_violence,
      random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id
    FROM  ''' + lookup_schema + '''.geoi_out_all
)
UPDATE
  ''' + lookup_schema + '''.pgm as i SET ged_os_dummy_''' + imputation_id +
                             '''=1
FROM a
WHERE a.type_of_violence=3 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id'''
                             )
        con.execute(query)

        query = alchemy_text('''
            with a as
        (
            SELECT
              "''' + imputation_id + '''" as priogrid_gid,
              type_of_violence,
              random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id
            FROM  ''' + lookup_schema + '''.geoi_out_all
        )
        UPDATE
          ''' + lookup_schema + '''.pgm as i SET ged_sb_dummy_''' +
                             imputation_id + '''=1
        FROM a
        WHERE a.type_of_violence=3 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id'''
                             )
        #print (query)
        con.execute(query)
        trans.commit()
Пример #19
0
def _stage_ged_2_cm(month_start: int, month_end: int) -> None:
    """ Update staging.country_month """

    log.debug("Starting _stage_ged_2_cm with "
              f"month_start: {month_start} month_end: {month_end}")
    engine = create_engine(CONNECTSTRING)

    log.debug("Finding limits ")
    with engine.connect() as con:
        limits = con.execute(
            "SELECT min(month_id_end) AS int, max(month_id_end) AS int FROM preflight.ged_attached"
        ).fetchone()
        if limits[0] > month_start:
            month_start = limits[0]
        if limits[1] <= month_end:
            month_end = limits[1]
    log.debug(f"Limits of preflight.ged_attached are {limits}")

    log.debug("Staging CM with base GED variables and spatial lags.")
    query = alchemy_text("""
        UPDATE staging.country_month SET
        ged_best_sb = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,1),
        ged_best_ns = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,2),
        ged_best_os = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,0,3),
        ged_count_sb = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,1),
        ged_count_ns = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,2),
        ged_count_os = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,0,3),
        ged_best_sb_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,1),
        ged_best_ns_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,2),
        ged_best_os_lag1 = public.aggregate_cm_deaths_on_date_end(id,FALSE,FALSE,1,3),
        ged_count_sb_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,1),
        ged_count_ns_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,2),
        ged_count_os_lag1 = public.aggregate_cm_deaths_on_date_end(id,TRUE,FALSE,1,3)
        WHERE month_id BETWEEN :m1 AND :m2
        """)
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    log.debug("Staging Country-Month t-lags.")
    query = alchemy_text("""
        SELECT public.make_country_month_temporal_lags(1,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(2,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(3,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(4,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(5,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(6,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(7,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(8,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(9,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(10,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(11,FALSE,109,:m2);
        SELECT public.make_country_month_temporal_lags(12,FALSE,109,:m2);
        """)
    with engine.connect() as con:
        con.execute(query, m2=month_end)

    log.debug("Staging Country-Month distances to nearest event...")
    query = alchemy_text("""
        UPDATE staging.country_month SET
        ged_months_since_last_sb = public.cm_months_since_last_event('ged_count_sb', country_id, month_id),
        ged_months_since_last_ns = public.cm_months_since_last_event('ged_count_ns', country_id, month_id),
        ged_months_since_last_os = public.cm_months_since_last_event('ged_count_os', country_id, month_id),
        ged_months_since_last_sb_lag1 = public.cm_months_since_last_event('ged_count_sb_lag1', country_id, month_id),
        ged_months_since_last_ns_lag1 = public.cm_months_since_last_event('ged_count_ns_lag1', country_id, month_id),
        ged_months_since_last_os_lag1 = public.cm_months_since_last_event('ged_count_os_lag1', country_id, month_id)
        WHERE month_id BETWEEN :m1 AND :m2
        """)
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    log.debug("Computing months_since_last")
    query = alchemy_text("""
        UPDATE staging.country_month SET
        ged_months_since_last_sb_tx5 = public.cm_months_since_last_event_threshold('ged_best_sb', country_id, month_id,5),
        ged_months_since_last_ns_tx5 = public.cm_months_since_last_event_threshold('ged_best_ns', country_id, month_id,5),
        ged_months_since_last_os_tx5 = public.cm_months_since_last_event_threshold('ged_best_os', country_id, month_id,5),
        ged_months_since_last_sb_lag1_tx5 = public.cm_months_since_last_event_threshold('ged_best_sb_lag1', country_id, month_id,5),
        ged_months_since_last_ns_lag1_tx5 = public.cm_months_since_last_event_threshold('ged_best_ns_lag1', country_id, month_id,5),
        ged_months_since_last_os_lag1_tx5 = public.cm_months_since_last_event_threshold('ged_best_os_lag1', country_id, month_id,5),
        ged_months_since_last_sb_tx25 = public.cm_months_since_last_event_threshold('ged_best_sb', country_id, month_id,25),
        ged_months_since_last_ns_tx25 = public.cm_months_since_last_event_threshold('ged_best_ns', country_id, month_id,25),
        ged_months_since_last_os_tx25 = public.cm_months_since_last_event_threshold('ged_best_os', country_id, month_id,25),
        ged_months_since_last_sb_lag1_tx25 = public.cm_months_since_last_event_threshold('ged_best_sb_lag1', country_id, month_id,25),
        ged_months_since_last_ns_lag1_tx25 = public.cm_months_since_last_event_threshold('ged_best_ns_lag1', country_id, month_id,25),
        ged_months_since_last_os_lag1_tx25 = public.cm_months_since_last_event_threshold('ged_best_os_lag1', country_id, month_id,25),
        ged_months_since_last_sb_tx100 = public.cm_months_since_last_event_threshold('ged_best_sb', country_id, month_id,100),
        ged_months_since_last_ns_tx100 = public.cm_months_since_last_event_threshold('ged_best_ns', country_id, month_id,100),
        ged_months_since_last_os_tx100 = public.cm_months_since_last_event_threshold('ged_best_os', country_id, month_id,100),
        ged_months_since_last_sb_lag1_tx100 = public.cm_months_since_last_event_threshold('ged_best_sb_lag1', country_id, month_id,100),
        ged_months_since_last_ns_lag1_tx100 = public.cm_months_since_last_event_threshold('ged_best_ns_lag1', country_id, month_id,100),
        ged_months_since_last_os_lag1_tx100 = public.cm_months_since_last_event_threshold('ged_best_os_lag1', country_id, month_id,100),
        ged_months_since_last_sb_tx500 = public.cm_months_since_last_event_threshold('ged_best_sb', country_id, month_id,500),
        ged_months_since_last_ns_tx500 = public.cm_months_since_last_event_threshold('ged_best_ns', country_id, month_id,500),
        ged_months_since_last_os_tx500 = public.cm_months_since_last_event_threshold('ged_best_os', country_id, month_id,500),
        ged_months_since_last_sb_lag1_tx500 = public.cm_months_since_last_event_threshold('ged_best_sb_lag1', country_id, month_id,500),
        ged_months_since_last_ns_lag1_tx500 = public.cm_months_since_last_event_threshold('ged_best_ns_lag1', country_id, month_id,500),
        ged_months_since_last_os_lag1_tx500 = public.cm_months_since_last_event_threshold('ged_best_os_lag1', country_id, month_id,500)
        WHERE month_id BETWEEN :m1 AND :m2
        """)
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    log.debug("Updating base onset variables for CM.")
    query = alchemy_text("""
        WITH a AS (SELECT * FROM onset_months_table('staging', 'country_month', 'ged_best_sb', 'country_id'))
        UPDATE staging.country_month
        SET onset_months_sb = a.onset_distance
        FROM a
        WHERE a.id = staging.country_month.id;

        WITH a AS (SELECT * FROM onset_months_table('staging', 'country_month', 'ged_best_ns', 'country_id'))
        UPDATE staging.country_month
        SET onset_months_ns = a.onset_distance
        FROM a
        WHERE a.id = staging.country_month.id;

        WITH a AS (SELECT * FROM onset_months_table('staging', 'country_month', 'ged_best_os', 'country_id'))
        UPDATE staging.country_month
        SET onset_months_os = a.onset_distance
        FROM a
        WHERE a.id = staging.country_month.id;
        """)
    with engine.connect() as con:
        trans = con.begin()
        con.execute(query)
        trans.commit()

    log.debug("Updating windowed onsets at cm level.")
    query = alchemy_text("""
        WITH a AS
                 (
                     SELECT id,
                            max(ged_best_sb)
                            OVER (PARTITION BY country_id ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS sb_x1,
                            max(ged_best_os)
                            OVER (PARTITION BY country_id ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS os_x1,
                            max(ged_best_ns)
                            OVER (PARTITION BY country_id ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS ns_x1
                     FROM staging.country_month
                     WHERE month_id BETWEEN :m1 - 48 AND :m2
                 )
        UPDATE staging.country_month
        SET max24_best_sb = a.sb_x1,
            max24_best_ns = a.ns_x1,
            max24_best_os = a.os_x1
        FROM a
        WHERE a.id = staging.country_month.id
          AND month_id BETWEEN :m1 AND :m2
        """)
    with engine.connect() as con:
        trans = con.begin()
        con.execute(query, m1=month_start, m2=month_end)
        trans.commit()
Пример #20
0
def _stage_ged_2_pgm(month_start: int, month_end: int) -> None:
    """ Update staging.priogrid_month """
    log.debug("Starting _stage_ged_2_pgm with "
              f"month_start: {month_start} month_end: {month_end}")
    engine = create_engine(CONNECTSTRING)

    log.debug("Finding limits ")
    with engine.connect() as con:
        limits = con.execute(
            "SELECT min(month_id_end) AS int, max(month_id_end) AS int FROM preflight.ged_attached"
        ).fetchone()
        if limits[0] > month_start:
            month_start = limits[0]
        if limits[1] <= month_end:
            month_end = limits[1]
    log.debug(f"Limits of preflight.ged_attached are {limits}")

    log.debug(f"Updating staging.priogrid_month sums.")
    query = alchemy_text("""
        UPDATE staging.priogrid_month
        SET ged_best_sb        = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 0, 1),
            ged_best_ns        = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 0, 2),
            ged_best_os        = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 0, 3),
            ged_count_sb       = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 0, 1),
            ged_count_ns       = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 0, 2),
            ged_count_os       = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 0, 3),
            ged_best_sb_start  = aggregate_deaths_on_date_start(priogrid_gid, month_id, FALSE, FALSE, 0, 1),
            ged_best_ns_start  = aggregate_deaths_on_date_start(priogrid_gid, month_id, FALSE, FALSE, 0, 2),
            ged_best_os_start  = aggregate_deaths_on_date_start(priogrid_gid, month_id, FALSE, FALSE, 0, 3),
            ged_count_sb_start = aggregate_deaths_on_date_start(priogrid_gid, month_id, TRUE, FALSE, 0, 1),
            ged_count_ns_start = aggregate_deaths_on_date_start(priogrid_gid, month_id, TRUE, FALSE, 0, 2),
            ged_count_os_start = aggregate_deaths_on_date_start(priogrid_gid, month_id, TRUE, FALSE, 0, 3),
            ged_best_sb_lag1   = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 1, 1),
            ged_best_ns_lag1   = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 1, 2),
            ged_best_os_lag1   = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 1, 3),
            ged_count_sb_lag1  = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 1, 1),
            ged_count_ns_lag1  = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 1, 2),
            ged_count_os_lag1  = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 1, 3),
            ged_best_sb_lag2   = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 2, 1),
            ged_best_ns_lag2   = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 2, 2),
            ged_best_os_lag2   = aggregate_deaths_on_date_end(priogrid_gid, month_id, FALSE, FALSE, 2, 3),
            ged_count_sb_lag2  = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 2, 1),
            ged_count_ns_lag2  = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 2, 2),
            ged_count_os_lag2  = aggregate_deaths_on_date_end(priogrid_gid, month_id, TRUE, FALSE, 2, 3)
        WHERE month_id >= :m1
          AND month_id <= :m2
        """)
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    log.debug("Creating temporal lags.")
    query = alchemy_text("""
        SELECT public.make_priogrid_month_temporal_lags(1,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(2,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(3,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(4,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(5,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(6,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(7,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(8,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(9,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(10,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(11,FALSE,:m1,:m2);
        SELECT public.make_priogrid_month_temporal_lags(12,FALSE,:m1,:m2);
    """)
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    log.debug("Updating count variables.")
    query = alchemy_text("""
        UPDATE staging.priogrid_month SET
        ged_months_since_last_sb = public.months_since_last_event('ged_count_sb', priogrid_gid, month_id),
        ged_months_since_last_ns = public.months_since_last_event('ged_count_ns', priogrid_gid, month_id),
        ged_months_since_last_os = public.months_since_last_event('ged_count_os', priogrid_gid, month_id),
        ged_months_since_last_sb_lag1 = public.months_since_last_event('ged_count_sb_lag1', priogrid_gid, month_id),
        ged_months_since_last_ns_lag1 = public.months_since_last_event('ged_count_ns_lag1', priogrid_gid, month_id),
        ged_months_since_last_os_lag1 = public.months_since_last_event('ged_count_os_lag1', priogrid_gid, month_id),
        ged_months_since_last_sb_lag2 = public.months_since_last_event('ged_count_sb_lag2', priogrid_gid, month_id),
        ged_months_since_last_ns_lag2 = public.months_since_last_event('ged_count_ns_lag2', priogrid_gid, month_id),
        ged_months_since_last_os_lag2 = public.months_since_last_event('ged_count_os_lag2', priogrid_gid, month_id)
        WHERE month_id BETWEEN :m1 AND :m2
        """)
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    log.debug("Updating monthly counts for thresholds.")
    query = alchemy_text("""
        UPDATE staging.priogrid_month SET
        ged_months_since_last_sb_tx5 = public.months_since_last_event_threshold('ged_best_sb', priogrid_gid, month_id,5),
        ged_months_since_last_ns_tx5 = public.months_since_last_event_threshold('ged_best_ns', priogrid_gid, month_id,5),
        ged_months_since_last_os_tx5 = public.months_since_last_event_threshold('ged_best_os', priogrid_gid, month_id,5),
        ged_months_since_last_sb_lag1_tx5 = public.months_since_last_event_threshold('ged_best_sb_lag1', priogrid_gid, month_id,5),
        ged_months_since_last_ns_lag1_tx5 = public.months_since_last_event_threshold('ged_best_ns_lag1', priogrid_gid, month_id,5),
        ged_months_since_last_os_lag1_tx5 = public.months_since_last_event_threshold('ged_best_os_lag1', priogrid_gid, month_id,5),
        ged_months_since_last_sb_tx25 = public.months_since_last_event_threshold('ged_best_sb', priogrid_gid, month_id,25),
        ged_months_since_last_ns_tx25 = public.months_since_last_event_threshold('ged_best_ns', priogrid_gid, month_id,25),
        ged_months_since_last_os_tx25 = public.months_since_last_event_threshold('ged_best_os', priogrid_gid, month_id,25),
        ged_months_since_last_sb_lag1_tx25 = public.months_since_last_event_threshold('ged_best_sb_lag1', priogrid_gid, month_id,25),
        ged_months_since_last_ns_lag1_tx25 = public.months_since_last_event_threshold('ged_best_ns_lag1', priogrid_gid, month_id,25),
        ged_months_since_last_os_lag1_tx25 = public.months_since_last_event_threshold('ged_best_os_lag1', priogrid_gid, month_id,25),
        ged_months_since_last_sb_tx100 = public.months_since_last_event_threshold('ged_best_sb', priogrid_gid, month_id,100),
        ged_months_since_last_ns_tx100 = public.months_since_last_event_threshold('ged_best_ns', priogrid_gid, month_id,100),
        ged_months_since_last_os_tx100 = public.months_since_last_event_threshold('ged_best_os', priogrid_gid, month_id,100),
        ged_months_since_last_sb_lag1_tx100 = public.months_since_last_event_threshold('ged_best_sb_lag1', priogrid_gid, month_id,100),
        ged_months_since_last_ns_lag1_tx100 = public.months_since_last_event_threshold('ged_best_ns_lag1', priogrid_gid, month_id,100),
        ged_months_since_last_os_lag1_tx100 = public.months_since_last_event_threshold('ged_best_os_lag1', priogrid_gid, month_id,100),
        ged_months_since_last_sb_tx500 = public.months_since_last_event_threshold('ged_best_sb', priogrid_gid, month_id,500),
        ged_months_since_last_ns_tx500 = public.months_since_last_event_threshold('ged_best_ns', priogrid_gid, month_id,500),
        ged_months_since_last_os_tx500 = public.months_since_last_event_threshold('ged_best_os', priogrid_gid, month_id,500),
        ged_months_since_last_sb_lag1_tx500 = public.months_since_last_event_threshold('ged_best_sb_lag1', priogrid_gid, month_id,500),
        ged_months_since_last_ns_lag1_tx500 = public.months_since_last_event_threshold('ged_best_ns_lag1', priogrid_gid, month_id,500),
        ged_months_since_last_os_lag1_tx500 = public.months_since_last_event_threshold('ged_best_os_lag1', priogrid_gid, month_id,500)
        WHERE month_id BETWEEN :m1 AND :m2
        """)
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    log.debug("Compute spatial distances.")
    query = alchemy_text("""
        UPDATE staging.priogrid_month SET
        dist_ged_sb_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,1),
        dist_ged_ns_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,2),
        dist_ged_os_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,3)
        WHERE month_id BETWEEN :m1 AND :m2
        """)
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    log.debug("Updating base onset variables for PGM.")
    query = alchemy_text("""
        WITH a AS (SELECT * FROM onset_months_table('staging', 'priogrid_month', 'ged_best_sb'))
        UPDATE staging.priogrid_month
        SET onset_months_sb = a.onset_distance
        FROM a
        WHERE a.id = staging.priogrid_month.id;

        WITH a AS (SELECT * FROM onset_months_table('staging', 'priogrid_month', 'ged_best_ns'))
        UPDATE staging.priogrid_month
        SET onset_months_ns = a.onset_distance
        FROM a
        WHERE a.id = staging.priogrid_month.id;

        WITH a AS (SELECT * FROM onset_months_table('staging', 'priogrid_month', 'ged_best_os'))
        UPDATE staging.priogrid_month
        SET onset_months_os = a.onset_distance
        FROM a
        WHERE a.id = staging.priogrid_month.id;
        """)
    with engine.connect() as con:
        trans = con.begin()
        con.execute(query)
        trans.commit()

    log.debug("Updating onsets based on spatial lags at PGM.")
    query = alchemy_text("""
        UPDATE staging.priogrid_month
        SET onset_month_sb_lag1 =
                onset_lags(
                        priogrid := priogrid_gid,
                        month_id := month_id,
                        lags := 1,
                        schema_name := 'staging'::varchar,
                        table_name := 'priogrid_month'::varchar,
                        column_name := 'ged_best_sb'::varchar),

            onset_month_sb_lag2 =
                onset_lags(
                        priogrid := priogrid_gid,
                        month_id := month_id,
                        lags := 2,
                        schema_name := 'staging'::varchar,
                        table_name := 'priogrid_month'::varchar,
                        column_name := 'ged_best_sb'::varchar)
        WHERE onset_months_sb > 0
          AND month_id BETWEEN :m1 AND :m2;


        UPDATE staging.priogrid_month
        SET onset_month_ns_lag1 =
                onset_lags(
                        priogrid := priogrid_gid,
                        month_id := month_id,
                        lags := 1,
                        schema_name := 'staging'::varchar,
                        table_name := 'priogrid_month'::varchar,
                        column_name := 'ged_best_ns'::varchar),

            onset_month_ns_lag2 =
                onset_lags(
                        priogrid := priogrid_gid,
                        month_id := month_id,
                        lags := 2,
                        schema_name := 'staging'::varchar,
                        table_name := 'priogrid_month'::varchar,
                        column_name := 'ged_best_ns'::varchar)
        WHERE onset_months_ns > 0
          AND month_id BETWEEN :m1 AND :m2;

        UPDATE staging.priogrid_month
        SET onset_month_os_lag1 =
                onset_lags(
                        priogrid := priogrid_gid,
                        month_id := month_id,
                        lags := 1,
                        schema_name := 'staging'::varchar,
                        table_name := 'priogrid_month'::varchar,
                        column_name := 'ged_best_os'::varchar),

            onset_month_os_lag2 =
                onset_lags(
                        priogrid := priogrid_gid,
                        month_id := month_id,
                        lags := 2,
                        schema_name := 'staging'::varchar,
                        table_name := 'priogrid_month'::varchar,
                        column_name := 'ged_best_os'::varchar)
        WHERE onset_months_os > 0
          AND month_id BETWEEN :m1 AND :m2;
    """)
    with engine.connect() as con:
        trans = con.begin()
        con.execute(query, m1=month_start, m2=month_end)
        trans.commit()

    log.debug("Updating windowed onsets.")
    query = alchemy_text("""
    WITH a AS
             (
                 SELECT id,
                        max(ged_best_sb)
                        OVER (PARTITION BY priogrid_gid ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS sb_x1,
                        max(ged_best_os)
                        OVER (PARTITION BY priogrid_gid ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS os_x1,
                        max(ged_best_ns)
                        OVER (PARTITION BY priogrid_gid ORDER BY month_id ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING) AS ns_x1
                 FROM staging.priogrid_month
                 WHERE month_id BETWEEN :m1 - 48 AND :m2
             )
    UPDATE staging.priogrid_month
    SET max24_best_sb = a.sb_x1,
        max24_best_ns = a.ns_x1,
        max24_best_os = a.os_x1
    FROM a
    WHERE a.id = staging.priogrid_month.id
      AND month_id BETWEEN :m1 AND :m2
    """)
    with engine.connect() as con:
        trans = con.begin()
        con.execute(query, m1=month_start, m2=month_end)
        trans.commit()

    log.info("Finished _stage_ged_2_pgm()")
Пример #21
0
def _geoi_pgm_dummy_update(
    imputation_id: int,
    month_start: int,
    month_end: int,
):
    log.debug("Started _geoi_pgm_dummy_update().")
    if not isinstance(imputation_id, numbers.Integral):
        raise RuntimeError(f"{imputation_id} isn't a numbers.Integral")

    lookup_schema = "left_imputation"

    # imputation_id = str(imputation_id)
    engine = create_engine(CONNECTSTRING)
    with engine.connect() as con:
        trans = con.begin()
        query = alchemy_text(f"""
            UPDATE {lookup_schema}.pgm
            SET
            ged_sb_dummy_{imputation_id} = 0,
            ged_ns_dummy_{imputation_id} = 0,
            ged_os_dummy_{imputation_id} = 0
            WHERE month_id BETWEEN :m1 AND :m2
            """)
        con.execute(query, m1=month_start, m2=month_end)

        trans.commit()
        trans = con.begin()
        query = alchemy_text(f"""
            WITH a AS
            (
                SELECT
                "{imputation_id}" as priogrid_gid,
                type_of_violence,
                random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id
                FROM  {lookup_schema}.geoi_out_all
            )
            UPDATE {lookup_schema}.pgm as i SET ged_ns_dummy_{imputation_id}=1
            FROM a
            WHERE a.type_of_violence=2 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id
            """)
        con.execute(query)

        query = alchemy_text(f"""
            WITH a AS
            (
                SELECT
                "{imputation_id}" as priogrid_gid,
                type_of_violence,
                random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id
                FROM  {lookup_schema}.geoi_out_all
            )
            UPDATE {lookup_schema}.pgm as i SET ged_os_dummy_{imputation_id}=1
            FROM a
            WHERE a.type_of_violence=3 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id
            """)
        con.execute(query)

        query = alchemy_text(f"""
            WITH a AS
            (
                SELECT
                "{imputation_id}" as priogrid_gid,
                type_of_violence,
                random_series_int(month_start :: INT, month_end :: INT + 1) AS month_id
                FROM  {lookup_schema}.geoi_out_all
            )
            UPDATE {lookup_schema}.pgm as i SET ged_sb_dummy_{imputation_id}=1
            FROM a
            WHERE a.type_of_violence=1 AND a.priogrid_gid=i.priogrid_gid AND a.month_id=i.month_id
            """)
        con.execute(query)
        trans.commit()

    log.debug("Finished _geoi_pgm_dummy_update().")
Пример #22
0
def stageGED2PGM(
        month_start=445,
        month_end=446,
        db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views'):

    if month_start > month_end:
        month_start, month_end = month_end, month_start

    engine = create_engine(db_engine_name)

    with engine.connect() as con:
        limits = con.execute(
            "SELECT min(month_id_end) AS int, max(month_id_end) AS int FROM preflight.ged_attached"
        ).fetchone()
        if limits[0] > month_start: month_start = limits[0]
        if limits[1] <= month_end: month_end = limits[1]

    print(month_start, month_end)

    print("Updating sums...")

    query = alchemy_text("""UPDATE staging.priogrid_month SET
  ged_best_sb = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 0, 1),
  ged_best_ns = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 0, 2),
  ged_best_os = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 0, 3),
  ged_count_sb = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 0, 1),
  ged_count_ns = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 0, 2),
  ged_count_os = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 0, 3),
   ged_best_sb_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, FALSE, FALSE, 0, 1),
   ged_best_ns_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, FALSE, FALSE, 0, 2),
   ged_best_os_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, FALSE, FALSE, 0, 3),
   ged_count_sb_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, TRUE, FALSE, 0, 1),
   ged_count_ns_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, TRUE, FALSE, 0, 2),
   ged_count_os_start = aggregate_deaths_on_date_start(priogrid_gid,month_id, TRUE, FALSE, 0, 3),
    ged_best_sb_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 1, 1),
 ged_best_ns_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 1, 2),
 ged_best_os_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 1, 3),
 ged_count_sb_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 1, 1),
 ged_count_ns_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 1, 2),
 ged_count_os_lag1 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 1, 3),
  ged_best_sb_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 2, 1),
 ged_best_ns_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 2, 2),
 ged_best_os_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, FALSE, FALSE, 2, 3),
 ged_count_sb_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 2, 1),
 ged_count_ns_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 2, 2),
 ged_count_os_lag2 = aggregate_deaths_on_date_end(priogrid_gid,month_id, TRUE, FALSE, 2, 3)
WHERE
  month_id >= :m1 AND month_id <= :m2 """)

    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    print("Creating temporal lags...")

    #This will compute temporal lags.
    #This must be run AFTER the above query has commited

    query = alchemy_text('''
SELECT public.make_priogrid_month_temporal_lags(1,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(2,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(3,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(4,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(5,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(6,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(7,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(8,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(9,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(10,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(11,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_temporal_lags(12,FALSE,:m1,:m2);
''')
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    print("Updating count variables...")

    query = alchemy_text('''
    UPDATE staging.priogrid_month SET
ged_months_since_last_sb = public.months_since_last_event('ged_count_sb', priogrid_gid, month_id),
ged_months_since_last_ns = public.months_since_last_event('ged_count_ns', priogrid_gid, month_id),
ged_months_since_last_os = public.months_since_last_event('ged_count_os', priogrid_gid, month_id),
ged_months_since_last_sb_lag1 = public.months_since_last_event('ged_count_sb_lag1', priogrid_gid, month_id),
ged_months_since_last_ns_lag1 = public.months_since_last_event('ged_count_ns_lag1', priogrid_gid, month_id),
ged_months_since_last_os_lag1 = public.months_since_last_event('ged_count_os_lag1', priogrid_gid, month_id),
ged_months_since_last_sb_lag2 = public.months_since_last_event('ged_count_sb_lag2', priogrid_gid, month_id),
ged_months_since_last_ns_lag2 = public.months_since_last_event('ged_count_ns_lag2', priogrid_gid, month_id),
ged_months_since_last_os_lag2 = public.months_since_last_event('ged_count_os_lag2', priogrid_gid, month_id)
WHERE month_id BETWEEN :m1 AND :m2''')
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    print("Computing Spatial Distances...")

    with engine.connect() as con:
        query = alchemy_text("""
UPDATE staging.priogrid_month SET
dist_ged_sb_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,1),
dist_ged_ns_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,2),
dist_ged_os_event=public.distance_to_nearest_ged('preflight','ged_attached',priogrid_gid,month_id,3)
WHERE month_id BETWEEN :m1 AND :m2
""")
        con.execute(query, m1=month_start, m2=month_end)

    print("Updating base onset variables for PGM...")

    with engine.connect() as con:
        trans = con.begin()
        query = alchemy_text("""
        with a as (SELECT * FROM onset_months_table('staging','priogrid_month','ged_best_sb'))
  UPDATE staging.priogrid_month SET onset_months_sb = a.onset_distance
  FROM a
  WHERE a.id=staging.priogrid_month.id;


with a as (SELECT * FROM onset_months_table('staging','priogrid_month','ged_best_ns'))
  UPDATE staging.priogrid_month SET onset_months_ns = a.onset_distance
  FROM a
  WHERE a.id=staging.priogrid_month.id;


with a as (SELECT * FROM onset_months_table('staging','priogrid_month','ged_best_os'))
  UPDATE staging.priogrid_month SET onset_months_os = a.onset_distance
  FROM a
  WHERE a.id=staging.priogrid_month.id;
        """)
        con.execute(query)
        trans.commit()

    print("Updating onsets based on spatial lags at PGM...")

    with engine.connect() as con:
        trans = con.begin()
        query = alchemy_text("""UPDATE staging.priogrid_month SET
  onset_month_sb_lag1 =
  onset_lags(
  priogrid := priogrid_gid,
  month_id := month_id,
  lags := 1,
  schema_name := 'staging'::varchar,
  table_name := 'priogrid_month'::varchar,
  column_name := 'ged_best_sb'::varchar),

  onset_month_sb_lag2 =
  onset_lags(
  priogrid := priogrid_gid,
  month_id := month_id,
  lags := 2,
  schema_name := 'staging'::varchar,
  table_name := 'priogrid_month'::varchar,
  column_name := 'ged_best_sb'::varchar)
WHERE onset_months_sb > 0 AND month_id BETWEEN :m1 AND :m2;


UPDATE staging.priogrid_month SET
  onset_month_ns_lag1 =
  onset_lags(
  priogrid := priogrid_gid,
  month_id := month_id,
  lags := 1,
  schema_name := 'staging'::varchar,
  table_name := 'priogrid_month'::varchar,
  column_name := 'ged_best_ns'::varchar),

  onset_month_ns_lag2 =
  onset_lags(
  priogrid := priogrid_gid,
  month_id := month_id,
  lags := 2,
  schema_name := 'staging'::varchar,
  table_name := 'priogrid_month'::varchar,
  column_name := 'ged_best_ns'::varchar)
WHERE onset_months_ns > 0 AND month_id BETWEEN :m1 AND :m2;

UPDATE staging.priogrid_month SET
  onset_month_os_lag1 =
  onset_lags(
  priogrid := priogrid_gid,
  month_id := month_id,
  lags := 1,
  schema_name := 'staging'::varchar,
  table_name := 'priogrid_month'::varchar,
  column_name := 'ged_best_os'::varchar),

  onset_month_os_lag2 =
  onset_lags(
  priogrid := priogrid_gid,
  month_id := month_id,
  lags := 2,
  schema_name := 'staging'::varchar,
  table_name := 'priogrid_month'::varchar,
  column_name := 'ged_best_os'::varchar)
WHERE onset_months_os > 0 AND month_id BETWEEN :m1 AND :m2;
""")
        con.execute(query, m1=month_start, m2=month_end)
        trans.commit()

    print("Update complete!")
Пример #23
0
def prepareACLED(db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
                 schema_name='dataprep',
                 table_name='acled'):
    engine = create_engine(db_engine_name)

    print("Dropping old attached ACLED...")

    with engine.connect() as con:
        trans = con.begin()
        con.execute("DROP TABLE IF EXISTS preflight.acled_full")
        con.execute("DROP TABLE IF EXISTS preflight.acled")
        trans.commit()

    print("Reattaching ACLED...")

    with engine.connect() as con:
        trans = con.begin()
        con.execute("""
    CREATE TABLE preflight.acled_full AS 
    WITH month_acled AS
    (
        SELECT
          *,
          EXTRACT(MONTH FROM event_date :: DATE) AS month,
          public.priogrid(latitude::float4,longitude::float4) AS priogrid_gid
        FROM """ + schema_name + '.' + table_name + """ 
        WHERE latitude::float BETWEEN -180 AND 180 AND longitude::float BETWEEN -90 AND 90
    ),
        month_acled2 AS
      (
          SELECT
            month_acled.*,
            staging.month.id AS month_id
          FROM month_acled, staging.month
          WHERE
            (month_acled.year :: INT = staging.month.year_id AND
             month_acled.month = staging.month.month)
      )
    SELECT *
    FROM month_acled2;
            """)
        trans.commit()
        trans = con.begin()
        con.execute(
            "ALTER TABLE preflight.acled_full ADD COLUMN type_of_violence INT")
        con.execute(
            "ALTER TABLE preflight.acled_full ADD COLUMN type_of_protest VARCHAR(10)"
        )
        trans.commit()
        trans = con.begin()

        print('Attaching ViEWS categories to attached ACLED... State-based...',
              end='')

        #1. We are emulating UCDP/ViEWS StateBased category using ACLED data.
        #i.e. Military Forces vs. others/other Military Forces, only "battles" and "remote violence"
        #no civilians involved.
        #TODO: shelling and remote violence may need to be treated differently

        con.execute('''
        UPDATE preflight.acled_full SET type_of_violence = 1 WHERE 
        (event_type ilike '%%battle%%' OR event_type ilike '%%remote%%') 
        AND actor1||actor2 ilike '%%military forces%%'
        AND actor1||actor2 NOT ilike '%%civilians%%'
        ''')

        #2. We are emulating UCDP/ViEWS StateBased category using ACLED data.
        #i.e. no military forces, no civilians, only "battles" and "remote violence"
        #UCDP's artificial organizational criteria are not included and cannot for now be included

        print('Non-state...', end='')

        con.execute('''
        UPDATE preflight.acled_full SET type_of_violence = 2 WHERE 
        (event_type ilike '%%battle%%' OR event_type ilike '%%remote%%')
        AND actor1||actor2 not ilike '%%military forces%%'
        AND actor1||actor2 NOT ilike '%%civilians%%'
        ''')

        print('One-Sided...', end='')

        #3: Emulate UCDP/Views OneSided category.
        # Remote violence, battle and violence against civilians
        # TODO: This may be improved using a better division of "Remote Violence"

        con.execute('''
        UPDATE preflight.acled_full SET type_of_violence = 3 WHERE 
        (event_type ilike '%%battle%%' OR event_type ilike '%%remote%%' OR event_type ilike '%%civi%%')
        AND actor1||actor2 ilike '%%civilians%%'
        ''')

        print('Protest')

        #4: Protests
        #The entire protest category, as is

        con.execute('''
        UPDATE preflight.acled_full SET type_of_violence = 4 WHERE
        event_type ilike '%%protest%%'        
        ''')
        trans.commit()
        trans = con.begin()
        query = alchemy_text("""
        UPDATE preflight.acled_full SET type_of_protest = 'p' 
        WHERE 
        type_of_violence=4 AND (inter1::int=6 OR inter2::int=6);
        """)
        con.execute(query)
        query = alchemy_text("""
        UPDATE preflight.acled_full SET type_of_protest = coalesce(type_of_protest,'') || 'r' 
        WHERE 
        type_of_violence=4 AND (inter1::int=5 OR inter2::int=5);
        """)
        con.execute(query)
        query = alchemy_text("""
        UPDATE preflight.acled_full SET type_of_protest = COALESCE(type_of_protest,'') || 'x' 
        WHERE 
        event_type ilike '%violence against civi%' AND interaction::int IN (15,16,25,26,35,36,45,46);
        """)
        con.execute(query)
        query = alchemy_text("""
        UPDATE preflight.acled_full SET type_of_protest = COALESCE(type_of_protest,'') || 'y' 
        WHERE
        event_type ilike '%violence against civi%' AND interaction::int IN (15,16);
        """)
        con.execute(query)
        trans.commit()

        # We are only using events precise enough to have locations within PGM cells
        # Thus, we exclude geo_precision 3 which indicates "larger area"
        # (unclear what that means but during testing, it was nearly always ADM1 or higher.

        trans = con.begin()
        print("Espen's categroies...")
        con.execute('DROP TABLE IF EXISTS preflight.acled')
        trans.commit()
        trans = con.begin()
        con.execute(
            'CREATE TABLE preflight.acled AS SELECT * FROM preflight.acled_full WHERE geo_precision::int<3'
        )
        trans.commit()
        trans = con.begin()
        print("Indexing...")
        con.execute('''
        ALTER TABLE preflight.acled ADD PRIMARY KEY(index);
        ALTER TABLE preflight.acled_full ADD  PRIMARY KEY (index);
        CREATE INDEX acled_idx ON preflight.acled(priogrid_gid, month_id, type_of_violence);
        CREATE INDEX acled_full_idx ON preflight.acled_full(priogrid_gid, month_id, type_of_violence);
        CREATE INDEX acled2_idx ON preflight.acled(priogrid_gid, month_id, type_of_violence,type_of_protest);
        CREATE INDEX acled2_full_idx ON preflight.acled_full(priogrid_gid, month_id, type_of_violence,type_of_protest)
        ''')
        trans.commit()
Пример #24
0
def stageACLED(month_start=448,
               month_end=449,
               db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views'):

    # swap if month start > month end
    if month_start > month_end:
        month_start, month_end = month_end, month_start

    engine = create_engine(db_engine_name)

    with engine.connect() as con:

        # Check we don't want to update some column that doesn't exist in the downloaded ACLED
        # Last month in ACLED is always incomplete, thus use month-1 for end limit.

        limits = con.execute(
            "SELECT min(month_id) AS int, max(month_id)-1 AS int FROM preflight.acled"
        ).fetchone()
        if limits[0] > month_start: month_start = limits[0]
        if limits[1] <= month_end: month_end = limits[1]

    print("Updating PGM for months between ", month_start, " and ", month_end,
          "with sum/counts and geographic lags...")

    #This will compute event counts for priogrid-month observations as well as for 1st and 2nd order lags

    query = alchemy_text('''
    UPDATE staging.priogrid_month SET
  acled_count_sb = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,1),
  acled_count_ns = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,2),
  acled_count_os = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,3),
  acled_count_pr = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,4),
  acled_count_prp= public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,'p'),
  acled_count_prr= public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,'r'),
  acled_count_prx= public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,'x'),
  acled_count_pry= public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,0,'y'),
  acled_fat_sb = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,1),
  acled_fat_ns = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,2),
  acled_fat_os = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,3),
  acled_fat_pr = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,4),
  acled_fat_prp= public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,'p'),
  acled_fat_prr= public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,'r'),
  acled_fat_prx= public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,'x'),
  acled_fat_pry= public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,0,'y'),
  acled_count_sb_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,1),
  acled_count_ns_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,2),
  acled_count_os_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,3),
  acled_count_pr_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,4),
  acled_count_prp_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,'p'),
  acled_count_prr_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,'r'),
  acled_count_prx_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,'x'),
  acled_count_pry_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,1,'y'),
  acled_fat_sb_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,1),
  acled_fat_ns_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,2),
  acled_fat_os_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,3),
  acled_fat_pr_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,4),
  acled_fat_prp_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,'p'),
  acled_fat_prr_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,'r'),
  acled_fat_prx_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,'x'),
  acled_fat_pry_lag1 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,1,'y'),
  acled_count_sb_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,1),
  acled_count_ns_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,2),
  acled_count_os_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,3),
  acled_count_pr_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,4),
  acled_count_prp_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,'p'),
  acled_count_prr_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,'r'),
  acled_count_prx_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,'x'),
  acled_count_pry_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,TRUE,2,'y'),
  acled_fat_sb_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,1),
  acled_fat_ns_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,2),
  acled_fat_os_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,3),
  acled_fat_pr_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,4),
  acled_fat_prp_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,'p'),
  acled_fat_prr_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,'r'),
  acled_fat_prx_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,'x'),
  acled_fat_pry_lag2 = public.aggregate_acled_pgm (priogrid_gid,month_id,FALSE,2,'y')
  WHERE month_id BETWEEN :m1 AND :m2
        AND
  priogrid_gid IN (SELECT gid FROM staging.priogrid WHERE in_africa)
  ''')
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    print("Creating temporal lags...")

    #This will compute temporal lags.
    #This must be run AFTER the above query has commited

    query = alchemy_text('''
SELECT public.make_priogrid_month_acled_temporal_lags(1,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_acled_temporal_lags(2,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_acled_temporal_lags(3,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_acled_temporal_lags(4,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_acled_temporal_lags(5,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_acled_temporal_lags(6,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_acled_temporal_lags(7,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_acled_temporal_lags(8,FALSE,:m1,:m2);
SELECT public.make_priogrid_month_acled_temporal_lags(9,FALSE,:m1,:m2);
''')
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    print("Computing time since last event...")

    #This will compute time since last event.
    #This must be run AFTER the above query has commited

    query = alchemy_text('''
    UPDATE staging.priogrid_month SET
acled_months_since_last_sb = public.months_since_last_event('acled_count_sb', priogrid_gid, month_id),
acled_months_since_last_ns = public.months_since_last_event('acled_count_ns', priogrid_gid, month_id),
acled_months_since_last_os = public.months_since_last_event('acled_count_os', priogrid_gid, month_id),
acled_months_since_last_pr = public.months_since_last_event('acled_count_pr', priogrid_gid, month_id),
acled_months_since_last_prp = public.months_since_last_event('acled_count_prp', priogrid_gid, month_id),
acled_months_since_last_prr = public.months_since_last_event('acled_count_prr', priogrid_gid, month_id),
acled_months_since_last_prx = public.months_since_last_event('acled_count_prx', priogrid_gid, month_id),
acled_months_since_last_pry = public.months_since_last_event('acled_count_pry', priogrid_gid, month_id),
acled_months_since_last_sb_lag1 = public.months_since_last_event('acled_count_sb_lag1', priogrid_gid, month_id),
acled_months_since_last_ns_lag1 = public.months_since_last_event('acled_count_ns_lag1', priogrid_gid, month_id),
acled_months_since_last_os_lag1 = public.months_since_last_event('acled_count_os_lag1', priogrid_gid, month_id),
acled_months_since_last_pr_lag1 = public.months_since_last_event('acled_count_pr_lag1', priogrid_gid, month_id),
acled_months_since_last_prp_lag1 = public.months_since_last_event('acled_count_prp_lag1', priogrid_gid, month_id),
acled_months_since_last_prr_lag1 = public.months_since_last_event('acled_count_prr_lag1', priogrid_gid, month_id),
acled_months_since_last_prx_lag1 = public.months_since_last_event('acled_count_prx_lag1', priogrid_gid, month_id),
acled_months_since_last_pry_lag1 = public.months_since_last_event('acled_count_pry_lag1', priogrid_gid, month_id),
acled_months_since_last_sb_lag2 = public.months_since_last_event('acled_count_sb_lag2', priogrid_gid, month_id),
acled_months_since_last_ns_lag2 = public.months_since_last_event('acled_count_ns_lag2', priogrid_gid, month_id),
acled_months_since_last_os_lag2 = public.months_since_last_event('acled_count_os_lag2', priogrid_gid, month_id),
acled_months_since_last_pr_lag2 = public.months_since_last_event('acled_count_pr_lag2', priogrid_gid, month_id),
acled_months_since_last_prp_lag2 = public.months_since_last_event('acled_count_prp_lag2', priogrid_gid, month_id),
acled_months_since_last_prr_lag2 = public.months_since_last_event('acled_count_prr_lag2', priogrid_gid, month_id),
acled_months_since_last_prx_lag2 = public.months_since_last_event('acled_count_prx_lag2', priogrid_gid, month_id),
acled_months_since_last_pry_lag2 = public.months_since_last_event('acled_count_pry_lag2', priogrid_gid, month_id)
WHERE month_id BETWEEN :m1 AND :m2;
    ''')

    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    print("Preparing ACLED for CM...")

    with engine.connect() as con:

        trans = con.begin()
        query = "ALTER TABLE preflight.acled_full ADD COLUMN gwno INT"
        con.execute(query)
        trans.commit()

        trans = con.begin()
        query = "UPDATE preflight.acled_full SET gwno=isonum_to_gwcode(iso::int)"
        con.execute(query)
        trans.commit()

        trans = con.begin()
        try:
            con.execute(
                "ALTER TABLE preflight.acled_full ADD COLUMN country_month_id INT"
            )
        except:
            pass
        trans.commit()

        trans = con.begin()
        query = alchemy_text("""
        with a as
(SELECT cm.*, c.gwcode FROM staging.country_month cm left join
      staging.country c on (cm.country_id=c.id))
UPDATE preflight.acled_full SET country_month_id=a.id
FROM a
WHERE (a.gwcode::int = acled_full.gwno::int AND a.month_id = acled_full.month_id);
        """)
        con.execute(query)
        con.execute(
            "CREATE INDEX acled_full_cm_idx ON preflight.acled_full(country_month_id, type_of_violence)"
        )
        trans.commit()

        print("Updating CM aggregates for ACLED...")

        trans = con.begin()
        query = alchemy_text("""UPDATE staging.country_month SET
  acled_count_sb = public.aggregate_cm_acled(id,TRUE,0,1),
  acled_count_ns = public.aggregate_cm_acled(id,TRUE,0,2),
  acled_count_os = public.aggregate_cm_acled(id,TRUE,0,3),
  acled_count_pr = public.aggregate_cm_acled(id,TRUE,0,4),
  acled_count_sb_lag1 = public.aggregate_cm_acled(id,TRUE,1,1),
  acled_count_ns_lag1 = public.aggregate_cm_acled(id,TRUE,1,2),
  acled_count_os_lag1 = public.aggregate_cm_acled(id,TRUE,1,3),
  acled_count_pr_lag1 = public.aggregate_cm_acled(id,TRUE,1,4)
WHERE month_id BETWEEN :m1 AND :m2""")
        con.execute(query, m1=month_start, m2=month_end)
        trans.commit()
        print("Updating CM months since...")
        trans = con.begin()
        query = alchemy_text("""UPDATE staging.country_month SET
acled_months_since_last_sb = public.cm_months_since_last_event('acled_count_sb', country_id, month_id),
acled_months_since_last_ns = public.cm_months_since_last_event('acled_count_ns', country_id, month_id),
acled_months_since_last_os = public.cm_months_since_last_event('acled_count_os', country_id, month_id),
acled_months_since_last_pr = public.cm_months_since_last_event('acled_count_pr', country_id, month_id),
acled_months_since_last_sb_lag1 = public.cm_months_since_last_event('acled_count_sb_lag1', country_id, month_id),
acled_months_since_last_ns_lag1 = public.cm_months_since_last_event('acled_count_ns_lag1', country_id, month_id),
acled_months_since_last_os_lag1 = public.cm_months_since_last_event('acled_count_os_lag1', country_id, month_id),
acled_months_since_last_pr_lag1 = public.cm_months_since_last_event('acled_count_pr_lag1', country_id, month_id)
WHERE month_id BETWEEN :m1 AND :m2""")
        con.execute(query, m1=month_start - 12, m2=month_end + 12)
        trans.commit()
        print("Updating CM temporal lags...")
        trans = con.begin()
        query = alchemy_text("""
        SELECT make_country_month_acled_temporal_lags(1, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(2, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(3, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(4, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(5, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(6, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(7, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(8, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(9, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(10, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(11, FALSE, :m1, :m2);
SELECT make_country_month_acled_temporal_lags(12, FALSE, :m1, :m2);
""")
        con.execute(query, m1=month_start, m2=month_end)
        trans.commit()
    print("Update complete!")
Пример #25
0
def prepareGED(db_engine_name='postgresql://VIEWSADMIN@VIEWSHOST:5432/views',
               schema_name='dataprep',
               table_name='ged'):
    engine = create_engine(db_engine_name)

    print("Dropping old attached GED...")

    with engine.connect() as con:
        con.execute("DROP TABLE IF EXISTS preflight.ged_attached_full")
        con.execute("DROP TABLE IF EXISTS preflight.ged_attached")

    print("Attaching new GED...")

    query = alchemy_text("""

CREATE TABLE preflight.ged_attached AS
  (
    WITH month_ged AS
    (
        SELECT
          *,
          EXTRACT(MONTH FROM date_start :: DATE) AS month_start,
          EXTRACT(MONTH FROM date_end :: DATE)   AS month_end
        FROM """ + schema_name + '.' + table_name + """
    ),
        month_ged_start AS
      (
          SELECT
            month_ged.*,
            staging.month.id AS month_id_start
          FROM month_ged, staging.month
          WHERE
            (month_ged.year :: INT = staging.month.year_id AND
             month_ged.month_start = staging.month.month)
      ),
        month_ged_full AS
      (
          SELECT
            month_ged_start.*,
            staging.month.id AS month_id_end
          FROM month_ged_start, staging.month
          WHERE
            (month_ged_start.year :: INT = staging.month.year_id AND
             month_ged_start.month_end = staging.month.month)
      )
    SELECT *
    FROM month_ged_full
  );

""")

    print("Geometrifying...")

    with engine.connect() as con:
        con.execute(query)
        con.execute('ALTER TABLE preflight.ged_attached ADD PRIMARY KEY (id)')
        con.execute(
            'ALTER TABLE preflight.ged_attached ADD COLUMN country_month_id_end bigint'
        )
        con.execute(
            'ALTER TABLE preflight.ged_attached ADD COLUMN country_month_id_start bigint'
        )
        con.execute(
            'ALTER TABLE preflight.ged_attached DROP COLUMN IF EXISTS geom')
        con.execute(
            'ALTER TABLE preflight.ged_attached ADD COLUMN geom geometry (point,4326)'
        )
        con.execute(
            "UPDATE preflight.ged_attached SET geom=st_setsrid(st_geometryfromtext(geom_wkt),4326) WHERE geom_wkt<>''"
        )

    print("Attaching country information...")

    with engine.connect() as con:
        trans = con.begin()
        con.execute(
            'CREATE TABLE preflight.ged_attached_full AS SELECT * FROM preflight.ged_attached'
        )
        trans.commit()

    print("Indexing...")

    with engine.connect() as con:
        con.execute(
            'DELETE FROM preflight.ged_attached WHERE where_prec IN (4,6,7)')
        con.execute(
            'ALTER TABLE preflight.ged_attached_full ADD PRIMARY KEY (id)')
        con.execute(
            'CREATE INDEX ged_attached_gidx ON preflight.ged_attached USING GIST(geom)'
        )
        con.execute(
            'CREATE INDEX ged_attached_idx ON preflight.ged_attached (priogrid_gid,month_id_end, type_of_violence)'
        )
        con.execute(
            'CREATE INDEX ged_attached_s_idx ON preflight.ged_attached (priogrid_gid,month_id_start, type_of_violence)'
        )
        con.execute(
            'CREATE INDEX ged_attached_full_gidx ON preflight.ged_attached_full USING GIST(geom)'
        )
        con.execute(
            'CREATE INDEX ged_attached_fullx_s_idx ON preflight.ged_attached_full (priogrid_gid,month_id_end, type_of_violence)'
        )
        con.execute(
            'CREATE INDEX ged_attached_fullx_gidx ON preflight.ged_attached_full (priogrid_gid,month_id_start, type_of_violence)'
        )

    with engine.connect() as con:
        trans = con.begin()
        con.execute("""with a as
    (SELECT cm.*, c.gwcode FROM staging.country_month cm left join
          staging.country c on (cm.country_id=c.id))
    UPDATE preflight.ged_attached_full SET country_month_id_end=a.id
    FROM a
    WHERE (a.gwcode = ged_attached_full.country_id AND a.month_id = ged_attached_full.month_id_end);
    """)
        trans.commit()

    with engine.connect() as con:
        trans = con.begin()
        con.execute("""with a as
    (SELECT cm.*, c.gwcode FROM staging.country_month cm left join
          staging.country c on (cm.country_id=c.id))
    UPDATE preflight.ged_attached_full SET country_month_id_start=a.id
    FROM a
    WHERE (a.gwcode = ged_attached_full.country_id AND a.month_id = ged_attached_full.month_id_start);
    """)
        trans.commit()
Пример #26
0
def _geoi_assemble(month_start: int, month_end: int):

    log.debug("Started _geoi_assemble")

    lookup_schema = "left_imputation"
    lookup_table = "ged"

    engine = create_engine(CONNECTSTRING)
    db.drop_table(fqtable=f"{lookup_schema}.geoi_out_all")

    query = alchemy_text(f"""
        CREATE TABLE {lookup_schema}.geoi_out_all AS
        SELECT *
        FROM {lookup_schema}.geoi_out_4
        WHERE "1"<>1
        UNION
        SELECT *
        FROM {lookup_schema}.geoi_out_6
        WHERE "1"<>1
        UNION
        SELECT id,
               priogrid_gid AS "1",
               priogrid_gid AS "2",
               priogrid_gid AS "3",
               priogrid_gid AS "4",
               priogrid_gid AS "5",
               priogrid_gid AS "6",
               priogrid_gid AS "7",
               priogrid_gid AS "8",
               priogrid_gid AS "9",
               priogrid_gid AS "10",
               priogrid_gid AS "11",
               priogrid_gid AS "12",
               priogrid_gid AS "13",
               priogrid_gid AS "14",
               priogrid_gid AS "15"
        FROM {lookup_schema}.{lookup_table}
        WHERE month_id_end BETWEEN :m1 AND :m2;
        """)
    with engine.connect() as con:
        con.execute(query, m1=month_start, m2=month_end)

    query = alchemy_text(f"""
        ALTER TABLE {lookup_schema}.geoi_out_all ADD COLUMN month_start BIGINT;
        ALTER TABLE {lookup_schema}.geoi_out_all ADD COLUMN month_end BIGINT;
        ALTER TABLE {lookup_schema}.geoi_out_all ADD COLUMN type_of_violence BIGINT;
        """)
    with engine.connect() as con:
        con.execute(query)

    query = alchemy_text(f"""
        UPDATE {lookup_schema}.geoi_out_all SET
        month_start=c.month_id_start,
        month_end=c.month_id_end,
        type_of_violence=c.type_of_violence
        FROM {lookup_schema}.gedfull c
        WHERE {lookup_schema}.geoi_out_all.id = c.id;
        """)
    with engine.connect() as con:
        con.execute(query)
    log.debug("Finished _geoi_assemble")