def get_last_timestamp(results_table_name):
    # create db object
    p_con = psqlDB()
    # query last timestamp from psql
    timestamp_query = '''
        SELECT
          timestamp
        FROM
          {}
        ORDER BY 
          timestamp DESC
        LIMIT 1
    '''
    timestamp_query = sql.SQL(timestamp_query).format(
        sql.Identifier(results_table_name))

    data = p_con.retr_query(timestamp_query, None)

    # check if there is no last timestamp in db in order to start a complete import
    if len(data) == 0:
        timestamp_psql = 0
    else:
        timestamp_psql = data[0][0]
    # delete/close db connection
    p_con.close()
    return timestamp_psql
def create_all_results_per_user(tasks_per_user, raw_results):
    p_con = psqlDB()

    input_table_name_a = tasks_per_user
    input_table_name_b = raw_results
    output_table_name = raw_results + '_all'
    index_task_id = raw_results + '_all_task_id_index'
    index_group_id = raw_results + '_all_group_id_index'
    index_user_id = raw_results + '_all_user_id_index'

    sql_insert = '''
    DROP TABLE IF EXISTS {};
    CREATE TABLE {} AS
    SELECT
      b.task_id
      ,b.user_id
      ,b.project_id
      --,b.group_id
      ,CASE
        WHEN r.result > 0 THEN r.result
        ELSE 0
       END as result
      ,(b.group_timestamp / 1000)::int as group_timestamp
      ,b.task_geom
    FROM 
      {} as b
      LEFT JOIN {} as r
      ON (b.user_id = r."userId" AND b.task_id = r."taskId" AND b.project_id = r."projectId");

    DROP INDEX IF EXISTS {};
    CREATE INDEX {}
      ON {}
      USING btree
      (task_id); 
       
    DROP INDEX IF EXISTS {};
    CREATE INDEX {}
      ON {}
      USING btree
      (user_id);
    '''

    sql_insert = sql.SQL(sql_insert).format(
        sql.Identifier(output_table_name),
        sql.Identifier(output_table_name),
        sql.Identifier(input_table_name_a),
        sql.Identifier(input_table_name_b),
        sql.Identifier(index_task_id),
        sql.Identifier(index_task_id),
        sql.Identifier(output_table_name),
        sql.Identifier(index_user_id),
        sql.Identifier(index_user_id),
        sql.Identifier(output_table_name),
    )

    p_con.query(sql_insert, None)
    print('created: %s' % output_table_name)
    del p_con

    return output_table_name
def save_users_psql(users_filename, users_table_name):
    ### this functions loads data from csv to psql and updates the group table

    # Open CSV file
    users_file = open(users_filename, 'r')
    columns = ('userid', 'distance', 'contributions', 'username')

    # create table for user data
    p_con = psqlDB()
    sql_insert = '''
        DROP TABLE IF EXISTS {} CASCADE; 
        CREATE TABLE {} (
          userid character varying,
          distance integer DEFAULT 0,
          contributions integer DEFAULT 0,
          username character varying,
          CONSTRAINT pk_user_id PRIMARY KEY (userid)
        )
    '''
    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(users_table_name),
                                            sql.Identifier(users_table_name))
    p_con.query(sql_insert, None)

    # second copy data from csv file to psql table
    p_con.copy_from(users_file, users_table_name, sep=';', columns=columns)
    users_file.close()
    os.remove(users_filename)
    p_con.close()

    return
Exemplo n.º 4
0
def get_existing_projects(project_list, project_table_name):
    ### this function gets all project information that is already stored in the psql database
    ### and returns a json object
    existing_projects = {}

    p_con = psqlDB()
    # each row is converted to json format using psql function row_to_json
    sql_insert = '''
        SELECT
          row_to_json({})
        FROM
          {}
        WHERE
          id = ANY(%s)
    '''
    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(project_table_name),
                                            sql.Identifier(project_table_name))
    data = (project_list, )

    retr_data = p_con.retr_query(sql_insert, data)
    p_con.close()

    for i in range(0, len(retr_data)):
        project_id = retr_data[i][0]["id"]
        existing_projects[project_id] = retr_data[i][0]

    existing_projects = json.dumps(existing_projects)
    existing_projects = json.loads(existing_projects)
    return existing_projects
def check_tasks(project_id, task_table_name):
    task_table_name = task_table_name + '_{}'.format(project_id)

    p_con = psqlDB()
    sql_insert = '''
        SELECT
          taskid
        FROM
          {}
        WHERE
          projectid = %s
        LIMIT 1
    '''
    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(task_table_name))
    data = [project_id]
    try:
        task = p_con.retr_query(sql_insert, data)
        p_con.close()

        if len(task) == 1:
            return True
        else:
            return False
    except:
        return False
Exemplo n.º 6
0
def update_project_info(project_table_name, new_project):
    p_con = psqlDB()

    # update the information in the table
    # we don't update all columns, but just those that might change
    sql_insert = '''
        UPDATE {}
        SET
          contributors = %s
          ,progress = %s
          ,state = %s
          ,isFeatured = %s
          ,corrupt = %s
          ,lastcheck = %s
        WHERE
          id = %s
    '''

    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(project_table_name))
    data = [
        new_project['contributors'], new_project['progress'],
        new_project['state'], new_project['isFeatured'],
        new_project['corrupt'], new_project['last_check'], new_project['id']
    ]
    p_con.query(sql_insert, data)

    # delete database connection
    p_con.close()

    return
def create_projects_table():

    p_con = psqlDB()

    sql_insert = '''
    CREATE EXTENSION postgis;

    CREATE TABLE projects (
      id INT NOT NULL
      ,contributors INT NOT NULL
      ,groupAverage DOUBLE PRECISION NOT NULL
      ,image CHARACTER VARYING NOT NULL
      ,importKey CHARACTER VARYING NOT NULL
      ,isFeatured BOOLEAN NOT NULL
      ,lookFor CHARACTER VARYING NOT NULL
      ,name CHARACTER VARYING NOT NULL
      ,progress INT NOT NULL
      ,projectDetails CHARACTER VARYING NOT NULL
      ,state INT NOT NULL
      ,verificationCount INT NOT NULL
      ,corrupt BOOLEAN NOT NULL
      ,lastCheck TIMESTAMP WITHOUT TIME ZONE
      ,extent geometry
      ,centroid geometry
      ,CONSTRAINT pk_project_id PRIMARY KEY (id)
      );
    '''

    p_con.query(sql_insert, None)
    print('created table: projects')
    p_con.close()
Exemplo n.º 8
0
def select_data_for_project(table_name, projectid):
    p_con = psqlDB()

    input_table_name = table_name
    output_table_name = '{}_{}'.format(input_table_name, projectid)

    sql_insert = '''
        DROP TABLE IF EXISTS {};
        CREATE TABLE {} AS
        SELECT
          *
        FROM
          {}
        WHERE
          projectid = %s'''

    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(output_table_name),
                                            sql.Identifier(output_table_name),
                                            sql.Identifier(input_table_name))
    data = [str(projectid)]

    p_con.query(sql_insert, data)
    print('created: %s' % output_table_name)
    del p_con

    return output_table_name
def create_table_all_redundant_tasks():
    p_con = psqlDB()
    sql_insert = '''
    DROP TABLE IF EXISTS all_redundant_tasks;
    CREATE TABLE all_redundant_tasks
    (
      task_id character varying,
      project_id integer,
      real_completed_count bigint,
      task_geom geometry,
      agreement_1 numeric,
      agreement_2 numeric,
      agreement_3 numeric,
      agreement_4 numeric,
      agreement_5 numeric,
      agreement_6 numeric,
      msi_1 numeric,
      msi_2 numeric,
      msi_3 numeric,
      msi_4 numeric,
      msi_5 numeric,
      msi_6 numeric,
      no_si_1 numeric,
      no_si_2 numeric,
      no_si_3 numeric,
      no_si_4 numeric,
      no_si_5 numeric,
      no_si_6 numeric
    )
    '''

    p_con.query(sql_insert, None)
    del p_con
def calc_agreement_from_results(table_name):
    ####################
    # the table should have the following columns with the names:
    #   task_id
    #   completed_count
    #   yes_count
    #   maybe_count
    #   badimage_count
    #   no_count
    ####################

    p_con = psqlDB()

    sql_insert = '''
    DROP TABLE IF EXISTS {};
    CREATE TABLE {} AS
    SELECT
      b.*
      ,CASE
        WHEN (b.yes_count + b.maybe_count)/b.completed_count::numeric > 0.3 THEN 1
        WHEN b.no_count >= b.badimage_count THEN 0
        WHEN b.badimage_count > b.no_count THEN 2
        ELSE 9
      END as class
      ,CASE
        WHEN b.completed_count = 1 THEN 1.0
        ELSE (
        round(((1.0 / (b.completed_count::numeric * (b.completed_count::numeric - 1.0)))
      *
      (
      ((b.yes_count::numeric ^ 2.0) - b.yes_count::numeric)
      +
      ((b.maybe_count::numeric ^ 2.0) - b.maybe_count::numeric)
      +
      ((b.badimage_count::numeric ^ 2.0) - b.badimage_count::numeric)
      +
      ((b.no_count::numeric ^ 2.0) - b.no_count::numeric)
      )),3)
      ) END as agreement
      ,round(((b.yes_count::numeric + b.maybe_count::numeric)/b.completed_count::numeric),3)
       as msi
      ,round((b.no_count::numeric/b.completed_count::numeric),3)
       as no_si
    FROM
      {} as b'''

    input_table_name = table_name
    output_table_name = table_name + '_agreement'

    # we need to pass table names using the psycopg2 SQL module
    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(output_table_name),
                                            sql.Identifier(output_table_name),
                                            sql.Identifier(input_table_name))

    p_con.query(sql_insert, None)
    print('created: %s' % output_table_name)
    del p_con

    return output_table_name
def create_results_psql(results_csv_filename, results_table_name):

    # Open CSV file
    results_file = open(results_csv_filename, 'r')
    columns = ('taskId', 'userId', 'projectId', 'timestamp', 'result',
               'duplicates')
    raw_results_table_name = 'raw_' + results_table_name

    p_con = psqlDB()
    # first, create table with group id and completed count
    sql_insert = '''
            DROP TABLE IF EXISTS {};
            CREATE TABLE {} (
              taskId VARCHAR NOT NULL
              ,userId VARCHAR NOT NULL
              ,projectId INT NOT NULL
              ,timestamp BIGINT NOT NULL
              ,result INT NOT NULL
              ,duplicates INT NOT NULL
              ,CONSTRAINT pk_result_id_raw PRIMARY KEY (taskId, userId, projectId)
            );
        '''
    sql_insert = sql.SQL(sql_insert).format(
        sql.Identifier(raw_results_table_name),
        sql.Identifier(raw_results_table_name))

    p_con.query(sql_insert, None)

    # copy completed count data to psql
    p_con.copy_from(results_file,
                    raw_results_table_name,
                    sep=',',
                    columns=columns)
    results_file.close()
    os.remove(results_csv_filename)
    print('copied results to temporary psql table.')

    sql_insert = '''
        INSERT INTO {} (taskid, userid, projectid, timestamp, result, duplicates)
          SELECT
            *
          FROM {} as b
          ON CONFLICT ON CONSTRAINT "pk_result_id"
          DO UPDATE
          SET
          (taskid, userid, projectid, timestamp, result, duplicates)
          =
          (results.taskid, results.userid, results.projectid, results.timestamp, results.result, results.duplicates);
        DROP TABLE IF EXISTS {} CASCADE;
    '''
    sql_insert = sql.SQL(sql_insert).format(
        sql.Identifier(results_table_name),
        sql.Identifier(raw_results_table_name),
        sql.Identifier(raw_results_table_name))

    p_con.query(sql_insert, None)
    p_con.close()
    return
Exemplo n.º 12
0
def update_completed_count_psql(completed_count_filename, project_id,
                                task_table_name):
    ### this functions loads data from csv to psql and updates the group table

    # Open CSV file
    completed_count_file = open(completed_count_filename, 'r')
    columns = ('groupid', 'projectid', 'completedcount')
    raw_group_table_name = 'groups_' + task_table_name + '_{}'.format(
        project_id)
    task_table_name = task_table_name + '_{}'.format(project_id)

    p_con = psqlDB()
    # first, create table with group id and completed count
    sql_insert = '''
        DROP TABLE IF EXISTS {};
        CREATE TABLE {} (
          groupid integer
          ,projectid integer
          ,completedcount integer
        )
    '''
    sql_insert = sql.SQL(sql_insert).format(
        sql.Identifier(raw_group_table_name),
        sql.Identifier(raw_group_table_name))

    p_con.query(sql_insert, None)

    # copy completed count data to psql
    p_con.copy_from(completed_count_file,
                    raw_group_table_name,
                    sep=';',
                    columns=columns)

    completed_count_file.close()
    os.remove(completed_count_filename)
    del completed_count_file

    sql_insert = '''
        UPDATE {} as b
        SET
          completedcount = a.completedcount
        FROM {} as a
        WHERE
          a.groupid = b.groupid
          AND
          a.projectid = b.projectid;
        DROP TABLE IF EXISTS {}
    '''
    sql_insert = sql.SQL(sql_insert).format(
        sql.Identifier(task_table_name), sql.Identifier(raw_group_table_name),
        sql.Identifier(raw_group_table_name))
    p_con.query(sql_insert, None)
    p_con.close()

    return
def merge_all_results(project_id):
    p_con = psqlDB()
    sql_insert = '''
    INSERT INTO all_results_raw (task_id, user_id, project_id, result, group_timestamp, task_geom)
    SELECT
      s.*
    FROM {} as s
    '''

    input_table_name = str(project_id) + '_results_all'
    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(input_table_name))

    p_con.query(sql_insert, None)
    del p_con
def merge_all_tasks(project_id):
    p_con = psqlDB()
    sql_insert = '''
    INSERT INTO all_tasks (task_id, project_id, completed_count, msi, no_si, crowd_answer, agreement, task_geom)
    SELECT
      s.*
    FROM {} as s
    '''

    input_table_name = str(project_id) + '_results_all_tasks'
    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(input_table_name))

    p_con.query(sql_insert, None)
    del p_con
def run_stats_export(table_name, path):
    # open db connection
    p_con = psqlDB()


    # check if stats_general view is already created in db and ready to be querried
    check_stats_view = '''
            SELECT EXISTS (
              SELECT 1
              FROM   information_schema.tables 
              WHERE  table_schema = 'public'
              AND    table_name = 'stats_general'
   );'''
    check_view = p_con.retr_query(check_stats_view, None)
    #p_con.close()

    if check_view[0][0] == False:
        create_stats_general_view()
        print('view does not exist and will be created')
    #backup_path = os.getcwd()
    # seperate location from file
    try:
        head, tail = ntpath.split(path)
    except:
        print('slicing of path/file failed')
        print('path: %s' % path)
        print('head: %s' % head)
        print('tail: %s' % tail)
    # check if path is provided
    if head:
        os.chdir(head)
        print('changed dir')

    #  p_con2 = psqlDB()

    # define query to stas view
    sql_insert = '''
            SELECT
              row_to_json({})
            FROM
              {}
        '''
    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(table_name),sql.Identifier(table_name))
    # execute query
    retr_data = p_con.retr_query(sql_insert, None)
    # delete db connection
    p_con.close()
    #w write date to file as json
    with open(tail, 'w') as fo:
        json.dump(retr_data[0][0], fo, sort_keys = False, indent = 2)
def clean_up_database(delete_table_list):
    for i in range(0, len(delete_table_list)):
        p_con = psqlDB()

        sql_insert = '''
        DROP TABLE IF EXISTS {};
        '''

        sql_insert = sql.SQL(sql_insert).format(
            sql.Identifier(delete_table_list[i]))

        p_con.query(sql_insert, None)
        print('deleted: %s' % delete_table_list[i])
        del p_con
Exemplo n.º 17
0
def save_projects_psql(project_table_name, new_projects):
    ### this functions saves the new project information to psql
    p_con = psqlDB()

    # insert new values for each project
    for i in list(new_projects):
        if new_projects[i]['isNew'] == 1:
            insert_project_info(project_table_name, new_projects[i])
            print('insert data in psql for new project:', i)
        # we only delete and insert information for projects that need an update
        elif new_projects[i]['needUpdate'] == 1:
            update_project_info(project_table_name, new_projects[i])
            print('update data in psql for updated project:', i)

    return
def get_attributes_from_table(table):
    p_con = psqlDB()
    sql_insert = '''
    SELECT
      a.attname as column_name
      ,format_type(a.atttypid, a.atttypmod) AS data_type
    FROM pg_attribute a
    JOIN pg_class b ON (a.attrelid = b.relfilenode)
    WHERE
      b.relname = %s
      AND
      a.attstattarget = -1;
    '''
    data = [table]
    attributes_raw = p_con.retr_query(sql_insert, data)
    return attributes_raw
Exemplo n.º 19
0
def get_all_user_ids():
    p_con = psqlDB()

    sql_insert = '''
    SELECT
      user_id
    FROM 
      user_matrix
    '''

    content = p_con.retr_query(sql_insert, None)

    user_ids = []
    for i in range(0, len(content)):
        user_ids.append(content[i][0])

    return user_ids
def create_table_all_results():
    p_con = psqlDB()
    sql_insert = '''
    DROP TABLE IF EXISTS all_results_raw;
    CREATE TABLE all_results_raw
    (
    task_id character varying,
    user_id character varying,
    project_id integer,
    result integer,
    group_timestamp integer,
    task_geom geometry
    )
    '''

    p_con.query(sql_insert, None)
    del p_con
def create_table_all_tasks():
    p_con = psqlDB()
    sql_insert = '''
    DROP TABLE IF EXISTS all_tasks;
    CREATE TABLE all_tasks
    (
    task_id character varying,
    project_id integer,
    completed_count bigint,
    msi numeric,
    no_si numeric,
    crowd_answer integer,
    agreement numeric,
    task_geom geometry
    )
    '''

    p_con.query(sql_insert, None)
    del p_con
Exemplo n.º 22
0
def create_user_contributions(project_results, user_tasks):
    p_con = psqlDB()

    input_table_name_a = user_tasks
    input_table_name_b = project_results
    output_table_name = '{}_{}'.format('user_contributions',
                                       project_results.split('_')[-1])

    sql_insert = '''
        DROP TABLE IF EXISTS {};
        CREATE TABLE {} AS
        SELECT
          b.taskid
          ,b.userid
          ,b.projectid
          --,b.groupid
          ,CASE
            WHEN r.result > 0 THEN r.result
            ELSE 0
           END as result
          ,(b.group_timestamp / 1000)::int as group_timestamp
          ,b.geo
        FROM 
          {} as b
          LEFT JOIN {} as r
          ON (b.userid = r.userid
              AND
              b.taskid = r.taskid
              AND
              b.projectid::int = r.projectid)
    '''

    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(output_table_name),
                                            sql.Identifier(output_table_name),
                                            sql.Identifier(input_table_name_a),
                                            sql.Identifier(input_table_name_b))

    p_con.query(sql_insert, None)
    print('created: %s' % output_table_name)
    del p_con

    return output_table_name
def create_all_contributions(project_id, unique_task_table_name, user_contributions_table):
    # user contributions and unique tasks are joined
    # this step is necessary since user contributions may leave out tasks where no user contributed any data

    p_con = psqlDB()

    input_table_name = user_contributions_table
    output_table_name = 'contributions_{}'.format(project_id)
    tasks_table = unique_task_table_name


    sql_insert = '''
        DROP TABLE IF EXISTS {};
        CREATE TABLE {} AS
        SELECT
          t.taskid
          ,t.projectid
          ,t.completedCount
          ,c.userid
          ,c.group_timestamp
          ,c.result
          ,t.st_geomfromtext as geo
        FROM
          {} as t
        LEFT JOIN
          {} as c ON (t.taskid = c.taskid AND t.projectid::int = c.projectid::int)
        WHERE
          t.projectid = %s 
        '''

    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(output_table_name),
                                            sql.Identifier(output_table_name),
                                            sql.Identifier(tasks_table),
                                            sql.Identifier(input_table_name))
    data = [str(project_id)]

    p_con.query(sql_insert, data)
    print('created: %s' % output_table_name)
    del p_con

    return output_table_name
def aggregate_results_using_array(results_per_user):
    ####################
    # input is a table that contains individual user results encoded as 0,1,2,3 etc.
    # e.g. 'all_results_per_user' table
    # the table should have the following columns with the names:
    #   task_id
    #   project_id
    #   result
    #   task_geom
    ####################

    p_con = psqlDB()

    sql_insert = '''
        DROP TABLE IF EXISTS {};
        CREATE TABLE {} AS 
          SELECT
            b.task_id
            ,count(task_id) as completed_count
            ,array_agg(result) as results
            ,b.project_id
            ,b.task_geom
        FROM
          {} as b
        GROUP BY
          b.project_id
          ,b.task_id
          ,b.task_geom'''

    input_table_name = results_per_user
    output_table_name = results_per_user + '_' + 'array'

    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(output_table_name),
                                            sql.Identifier(output_table_name),
                                            sql.Identifier(input_table_name))

    p_con.query(sql_insert, None)
    print('created: %s' % output_table_name)
    del p_con

    return output_table_name
def get_layer_bbox(project_table_name, project_id):

    p_con = psqlDB()
    sql_insert = '''
    SELECT
      id
      ,st_XMin(extent)
      ,st_XMax(extent)
      ,st_YMin(extent)
      ,st_YMax(extent)
    FROM {}
    WHERE
      id = %s
    '''
    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(project_table_name))
    data = [project_id]

    bbox_raw = p_con.retr_query(sql_insert, data)
    # the function returns a bounding box as a list of minx, maxx, miny, maxy
    bbox = [bbox_raw[0][1], bbox_raw[0][2], bbox_raw[0][3], bbox_raw[0][4]]
    return bbox
Exemplo n.º 26
0
def get_psql_projects(project_table_name):
    psq_projects = []

    p_con = psqlDB()
    # each row is converted to json format using psql function row_to_json
    sql_insert = '''
        SELECT
          id
        FROM
          {}
        WHERE
          corrupt is False
    '''
    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(project_table_name))

    retr_data = p_con.retr_query(sql_insert, None)
    p_con.close()

    for id in retr_data:
        psq_projects.append(id[0])

    return psq_projects
Exemplo n.º 27
0
def get_all_non_corrupt_projects(projects):
    # this function looks for the project ids of all projecst in our database
    # the projects need to be non-corrupt
    # the projects need to be enrichted (e.g. final_5519 table must be existing)
    # input is the project ids list, output is the project ids list without projects that are corrupt or not existing

    p_con = psqlDB()
    sql_insert = '''
    SELECT
      p.id
      ,i.table_name
    FROM
      projects as p, information_schema.tables as i  
    WHERE
      not p.corrupt
      AND
      i.table_schema = 'public'
      AND
      left(i.table_name, 6) = 'final_' 
      AND
      ltrim(i.table_name, 'final_')::int = p.id
    ORDER BY
      p.id
    '''

    retr = p_con.retr_query(sql_insert, None)
    existing_projects = []
    for i in range(0, len(retr)):
        existing_projects.append(retr[i][0])

    # intersect existing projects and input projects
    filtered_projects = list(
        set(existing_projects).intersection(set(projects)))
    print('filtered projects. original input: %s, remaining in list: %s' %
          (projects, filtered_projects))
    logging.warning(
        'filtered projects. original input: %s, remaining in list: %s' %
        (projects, filtered_projects))
    return filtered_projects
def create_results_table():

    p_con = psqlDB()

    sql_insert = '''
        CREATE TABLE results (
          taskId VARCHAR NOT NULL
          ,userId VARCHAR NOT NULL
          ,projectId INT NOT NULL
          ,timestamp BIGINT NOT NULL
          ,result INT NOT NULL
          ,duplicates INT NOT NULL
          ,CONSTRAINT pk_result_id PRIMARY KEY (taskId, userId, projectId)
        );
        
        CREATE INDEX results_taskId_index
          ON public.results
          USING BTREE
          (taskId);
        
        CREATE INDEX results_timestamp_index
          ON public.results
          USING BTREE
          (timestamp);
        
        CREATE INDEX results_projectId_index
          ON public.results
          USING BTREE
          (projectId);
        
        CREATE INDEX results_index
          ON public.results
          USING BTREE
          (result);
    '''
    p_con.query(sql_insert, None)
    print('crated table: results')
    p_con.close()
def get_unique_tasks(project_id, task_table_name):
    # some tasks might be duplicated in the database since they are part of two different groups
    # the completed count of these tasks will be merged

    p_con = psqlDB()

    input_table_name = task_table_name
    output_table_name = 'tasks_unique_{}'.format(project_id)

    sql_insert = '''
    DROP TABLE IF EXISTS {};
    CREATE TABLE {} AS
    SELECT
      t.taskid
      ,t.projectid
      ,Sum(t.completedcount) as completedcount
      -- don't forget the geometry
      ,t.st_geomfromtext
    FROM
      {} as t
    WHERE
     projectid = %s
    GROUP BY
      t.taskid
      ,t.projectid
      ,t.st_geomfromtext
    '''

    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(output_table_name),
                                            sql.Identifier(output_table_name),
                                            sql.Identifier(input_table_name))
    data = [str(project_id)]

    p_con.query(sql_insert, data)
    print('created: %s' % output_table_name)
    del p_con

    return output_table_name
def create_all_tasks_per_user(groups_per_user, task_geom):
    p_con = psqlDB()

    input_table_name_a = groups_per_user
    input_table_name_b = task_geom
    output_table_name = task_geom + '_per_user'

    sql_insert = '''
    DROP TABLE IF EXISTS {};
    CREATE TABLE {} AS
    SELECT
      b."userId" as user_id
      ,t.task_id
      ,b.project_id
      ,Min(b.group_timestamp) as group_timestamp
      --,b.group_id
      --,b.count
      --,b.edge_count
      ,t.task_geom
    FROM
      {} as b,
      {} as t
    WHERE
      b.group_id = t.group_id
    GROUP BY
      -- we need to group by task_id so that we avoid duplicates
      user_id, t.task_id, b.project_id, t.task_geom
    -- ORDER BY user_id, t.task_id'''

    sql_insert = sql.SQL(sql_insert).format(sql.Identifier(output_table_name),
                                            sql.Identifier(output_table_name),
                                            sql.Identifier(input_table_name_a),
                                            sql.Identifier(input_table_name_b))
    p_con.query(sql_insert, None)
    print('created: %s' % output_table_name)
    del p_con

    return output_table_name