Exemplo n.º 1
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            cartosql.dropTable(CARTO_TABLE)

    # 1. Check if table exists and create table
    existing_ids = []
    if cartosql.tableExists(CARTO_TABLE):
        logging.info('Fetching existing ids')
        existing_ids = getIds(CARTO_TABLE, UID_FIELD)
    else:
        logging.info('Table {} does not exist, creating'.format(CARTO_TABLE))
        createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    new_ids = processNewData(existing_ids)

    new_count = len(new_ids)
    existing_count = new_count + len(existing_ids)
    logging.info('Total rows: {}, New: {}, Max: {}'.format(
        existing_count, new_count, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD)  # MAXAGE)

    logging.info('SUCCESS')
Exemplo n.º 2
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            logging.info("Clearing table")
            cartosql.dropTable(CARTO_TABLE)

    # 1. Check if table exists and create table
    existing_ids = []
    if cartosql.tableExists(CARTO_TABLE):
        existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD)
    else:
        createTableWithIndices(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    num_new = processNewData(existing_ids)

    existing_count = num_new + len(existing_ids)
    logging.info('Total rows: {}, New: {}, Max: {}'.format(
        existing_count, num_new, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD, MAXAGE)

    ###
    logging.info('SUCCESS')
Exemplo n.º 3
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            cartosql.deleteRows(CARTO_TABLE,
                                'cartodb_id IS NOT NULL',
                                user=os.getenv('CARTO_USER'),
                                key=os.getenv('CARTO_KEY'))

    # 1. Check if table exists and create table
    existing_ids = []
    if cartosql.tableExists(CARTO_TABLE):
        logging.info('Fetching existing ids')
        existing_ids = getIds(CARTO_TABLE, UID_FIELD)
    else:
        logging.info('Table {} does not exist, creating'.format(CARTO_TABLE))
        createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    new_count = processNewData(existing_ids)
    total_count = len(existing_ids)
    logging.info('Total rows: {}, New: {}, Max: {}'.format(
        total_count, new_count, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD)

    # Get most recent update date
    if new_count > 0:
        most_recent_date = datetime.datetime.utcnow()
        lastUpdateDate(DATASET_ID, most_recent_date)

    logging.info('SUCCESS')
Exemplo n.º 4
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            logging.info("Clearing table")
            cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY'))

    # 1. Check if table exists and create table
    existing_ids = []
    if cartosql.tableExists(CARTO_TABLE):
        existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD)
    else:
        createTableWithIndices(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    num_new = processNewData(existing_ids)

    existing_count = num_new + len(existing_ids)
    logging.info('Total rows: {}, New: {}, Max: {}'.format(existing_count, num_new, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD, MAXAGE)

    # Get most recent update date
    most_recent_date = get_most_recent_date(CARTO_TABLE)
    lastUpdateDate(DATASET_ID, most_recent_date)

    ###
    logging.info('SUCCESS')
Exemplo n.º 5
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        logging.info('Clearing explore table')
        cartosql.dropTable(CARTO_TABLE_EXPLORE)
        cartosql.dropTable(CARTO_TABLE_PLANETPULSE)

    # 1. Check if table exists and create table

    ## EXPLORE TABLE
    existing_explore_ids = []
    if cartosql.tableExists(CARTO_TABLE_EXPLORE):
        logging.info('Fetching existing explore ids')
        existing_explore_ids = getIds(CARTO_TABLE_EXPLORE, UID_FIELD)
    else:
        logging.info(
            'Table {} does not exist, creating'.format(CARTO_TABLE_EXPLORE))
        createTableWithIndex(CARTO_TABLE_EXPLORE, CARTO_SCHEMA, UID_FIELD,
                             TIME_FIELD)

    ## PLANET PULSE TABLE
    existing_pp_ids = []
    if cartosql.tableExists(CARTO_TABLE_PLANETPULSE):
        logging.info('Fetching existing planet pulse ids')
        existing_pp_ids = getIds(CARTO_TABLE_PLANETPULSE, UID_FIELD)
    else:
        logging.info('Table {} does not exist, creating'.format(
            CARTO_TABLE_PLANETPULSE))
        createTableWithIndex(CARTO_TABLE_PLANETPULSE, CARTO_SCHEMA, UID_FIELD,
                             TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    num_new_explore, num_new_pp = processNewData(existing_explore_ids,
                                                 existing_pp_ids)
    existing_count_explore = num_new_explore + len(existing_explore_ids)
    existing_count_pp = num_new_pp + len(existing_pp_ids)

    logging.info('Total rows in Explore table: {}, New: {}, Max: {}'.format(
        existing_count_explore, num_new_explore, MAXROWS_EXPLORE))

    logging.info(
        'Total rows in Planet Pulse table: {}, New: {}, Max: {}'.format(
            existing_count_pp, num_new_pp, MAXROWS_PLANETPULSE))

    # 3. Remove old observations
    deleteExcessRows(CARTO_TABLE_EXPLORE, MAXROWS_EXPLORE, TIME_FIELD,
                     MAXAGE_EXPLORE)
    deleteExcessRows(CARTO_TABLE_PLANETPULSE, MAXROWS_PLANETPULSE, TIME_FIELD,
                     MAXAGE_PLANETPULSE)

    logging.info('SUCCESS')
Exemplo n.º 6
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_MARKET_TABLE):
            cartosql.dropTable(CARTO_MARKET_TABLE)
        if cartosql.tableExists(CARTO_ALPS_TABLE):
            cartosql.dropTable(CARTO_ALPS_TABLE)

    # 1. Check if table exists and create table
    existing_markets = []
    if cartosql.tableExists(CARTO_MARKET_TABLE):
        logging.info('Fetching existing ids')
        existing_markets = getIds(CARTO_MARKET_TABLE, UID_FIELD)
    else:
        logging.info(
            'Table {} does not exist, creating'.format(CARTO_MARKET_TABLE))
        createTableWithIndex(CARTO_MARKET_TABLE, CARTO_MARKET_SCHEMA,
                             UID_FIELD)

    existing_alps = []
    if cartosql.tableExists(CARTO_ALPS_TABLE):
        logging.info('Fetching existing ids')
        existing_alps = getIds(CARTO_ALPS_TABLE, UID_FIELD)
    else:
        logging.info(
            'Table {} does not exist, creating'.format(CARTO_ALPS_TABLE))
        createTableWithIndex(CARTO_ALPS_TABLE, CARTO_ALPS_SCHEMA, UID_FIELD,
                             TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    num_new_markets, num_new_alps = processNewData(existing_markets,
                                                   existing_alps)

    existing_markets = num_new_markets + len(existing_markets)
    logging.info('Total market rows: {}, New: {}, Max: {}'.format(
        existing_markets, num_new_markets, MAXROWS))

    existing_alps = num_new_alps + len(existing_alps)
    logging.info('Total alps rows: {}, New: {}, Max: {}'.format(
        existing_alps, num_new_alps, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_ALPS_TABLE, MAXROWS, TIME_FIELD)  # MAXAGE)

    # Get most recent update date
    most_recent_date = get_most_recent_date(CARTO_ALPS_TABLE)
    lastUpdateDate(DATASET_ID, most_recent_date)

    logging.info('SUCCESS')
Exemplo n.º 7
0
def checkCreateTable(table, schema, id_field='', time_field=''):
    '''
    Create the table if it does not exist, and pull list of IDs already in the table if it does
    INPUT   table: Carto table to check or create (string)
            schema: dictionary of column names and types, used if we are creating the table for the first time (dictionary)
            id_field: optional, name of column that we want to use as a unique ID for this table; this will be used to compare the
                    source data to the our table each time we run the script so that we only have to pull data we
                    haven't previously uploaded (string)
            time_field:  optional, name of column that will store datetime information (string)
    '''
    # check it the table already exists in Carto
    if cartosql.tableExists(table, user=CARTO_USER, key=CARTO_KEY):
        # if the table does exist, get a list of all the values in the id_field column
        print('Carto table already exists.')
    else:
        # if the table does not exist, create it with columns based on the schema input
        print('Table {} does not exist, creating'.format(table))
        cartosql.createTable(table, schema, user=CARTO_USER, key=CARTO_KEY)
        # if a unique ID field is specified, set it as a unique index in the Carto table; when you upload data, Carto
        # will ensure no two rows have the same entry in this column and return an error if you try to upload a row with
        # a duplicate unique ID
        if id_field:
            cartosql.createIndex(table,
                                 id_field,
                                 unique=True,
                                 user=CARTO_USER,
                                 key=CARTO_KEY)
        # if a time_field is specified, set it as an index in the Carto table; this is not a unique index
        if time_field:
            cartosql.createIndex(table,
                                 time_field,
                                 user=CARTO_USER,
                                 key=CARTO_KEY)
Exemplo n.º 8
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            cartosql.dropTable(CARTO_TABLE)

    ### 1. Check if table exists, if not, create it
    checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    ### 2. Retrieve existing data
    r = cartosql.getFields(UID_FIELD, CARTO_TABLE,
                           order='{} desc'.format(TIME_FIELD), f='csv')
    existing_ids = r.text.split('\r\n')[1:-1]
    num_existing = len(existing_ids)

    ### 3. Fetch data from FTP, dedupe, process
    num_new = processData(existing_ids)

    ### 4. Delete data to get back to MAX_ROWS
    num_dropped = deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD, MAX_AGE)

    ### 5. Notify results
    total = num_existing + num_new - num_dropped

    # Get most recent update date
    most_recent_date = get_most_recent_date(CARTO_TABLE)
    lastUpdateDate(DATASET_ID, most_recent_date)
    
    logging.info('Existing rows: {},  New rows: {}, Max: {}'.format(total, num_new, MAX_ROWS))
    logging.info("SUCCESS")
Exemplo n.º 9
0
def checkCreateTable(table, schema, id_field, time_field):
    '''Get existing ids or create table'''
    if cartosql.tableExists(table,
                            user=os.getenv('CARTO_USER'),
                            key=os.getenv('CARTO_KEY')):
        logging.info('Fetching existing IDs')
        r = cartosql.getFields(id_field,
                               table,
                               f='csv',
                               user=os.getenv('CARTO_USER'),
                               key=os.getenv('CARTO_KEY'))
        return r.text.split('\r\n')[1:-1]
    else:
        logging.info('Table {} does not exist, creating'.format(table))
        cartosql.createTable(table,
                             schema,
                             user=os.getenv('CARTO_USER'),
                             key=os.getenv('CARTO_KEY'))
        cartosql.createIndex(table,
                             id_field,
                             unique=True,
                             user=os.getenv('CARTO_USER'),
                             key=os.getenv('CARTO_KEY'))
        cartosql.createIndex(table,
                             time_field,
                             user=os.getenv('CARTO_USER'),
                             key=os.getenv('CARTO_KEY'))
    return []
Exemplo n.º 10
0
def cleanOldRows(table, time_field, max_age, date_format='%Y-%m-%d %H:%M:%S'):
    ''' 
    Delete rows that are older than a certain threshold
    INPUT   table: name of table in Carto from which we will delete the old data (string)
            time_field: column that stores datetime information (string) 
            max_age: oldest date that can be stored in the Carto table (datetime object)
            date_format: format of dates in Carto table (string)
    RETURN  num_expired: number of rows that have been dropped from the table (integer)
    '''
    # initialize number of rows that will be dropped as 0
    num_expired = 0
    # if the table exists
    if cartosql.tableExists(table, CARTO_USER, CARTO_KEY):
        # check if max_age variable is a datetime object
        if isinstance(max_age, datetime.datetime):
            # convert datetime object to string formatted according to date_format
            max_age = max_age.strftime(date_format)
        elif isinstance(max_age, str):
            # raise an error if max_age is a string
            logging.error(
                'Max age must be expressed as a datetime.datetime object')
        # delete rows from table which are older than the max_age
        r = cartosql.deleteRows(table, "{} < '{}'".format(time_field, max_age),
                                CARTO_USER, CARTO_KEY)
        # get the number of rows that were dropped from the table
        num_expired = r.json()['total_rows']
    else:
        # raise an error if the table doesn't exist
        logging.error("{} table does not exist yet".format(table))

    return (num_expired)
Exemplo n.º 11
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            cartosql.dropTable(CARTO_TABLE)

    ### 1. Check if table exists and create table
    checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)
    existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD)
    num_existing = len(existing_ids)

    ### 2. Fetch data from FTP, dedupe, process
    num_new = processData(existing_ids)
    num_total = num_existing + num_new

    ### 3. Notify results
    logging.info('Total rows: {}, New rows: {}, Max: {}'.format(num_total, num_new, MAX_ROWS))
    deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD)

    # Get most recent update date
    #if new rows were added to table, make today the most recent update date
    if num_new > 0:
        most_recent_date = datetime.datetime.utcnow()
        lastUpdateDate(DATASET_ID, most_recent_date)

    logging.info("SUCCESS")
Exemplo n.º 12
0
def main():
    logging.basicConfig(stream=sys.stderr, level=logging.INFO)
    logging.info('STARTING')

    # clear the table before starting, if specified
    if CLEAR_TABLE_FIRST:
        logging.info('Clearing Table')
        # if the table exists
        if cartosql.tableExists(CARTO_TABLE, user=CARTO_USER, key=CARTO_KEY):
            # delete all the rows
            cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=CARTO_USER, key=CARTO_KEY)
            # note: we do not delete the entire table because this will cause the dataset visualization on Resource Watch
            # to disappear until we log into Carto and open the table again. If we simply delete all the rows, this
            # problem does not occur

    # Check if table exists, create it if it does not
    logging.info('Checking if table exists and getting existing IDs.')
    existing_ids = checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD)

    # Fetch, process, and upload the new data
    logging.info('Fetching new data')
    num_new = processData(existing_ids)
    logging.info('Previous rows: {},  New rows: {}'.format(len(existing_ids), num_new))

    # Update Resource Watch
    updateResourceWatch(num_new)

    # Delete local files in Docker container
    delete_local()

    logging.info('SUCCESS')
Exemplo n.º 13
0
def checkCreateTable(table, schema, id_field, time_field=''):
    '''
    Create the table if it does not exist, and pull list of IDs already in the table if it does
    INPUT   table: Carto table to check or create (string)
            schema: dictionary of column names and types, used if we are creating the table for the first time (dictionary)
            id_field: name of column that we want to use as a unique ID for this table; this will be used to compare the
                    source data to the our table each time we run the script so that we only have to pull data we
                    haven't previously uploaded (string)
            time_field:  optional, name of column that will store datetime information (string)
    RETURN  list of existing IDs in the table, pulled from the id_field column (list of strings)
    '''
    # check it the table already exists in Carto
    if cartosql.tableExists(table, user=CARTO_USER, key=CARTO_KEY):
        # if the table does exist, get a list of all the values in the id_field column
        logging.info('Fetching existing IDs')
        r = cartosql.getFields(id_field, table, f='csv', post=True, user=CARTO_USER, key=CARTO_KEY)
        # turn the response into a list of strings, removing the first and last entries (header and an empty space at end)
        return r.text.split('\r\n')[1:-1]
    else:
        # if the table does not exist, create it with columns based on the schema input
        logging.info('Table {} does not exist, creating'.format(table))
        cartosql.createTable(table, schema, user=CARTO_USER, key=CARTO_KEY)
        # if a unique ID field is specified, set it as a unique index in the Carto table; when you upload data, Carto
        # will ensure no two rows have the same entry in this column and return an error if you try to upload a row with
        # a duplicate unique ID
        if id_field:
            cartosql.createIndex(table, id_field, unique=True, user=CARTO_USER, key=CARTO_KEY)
        # if a time_field is specified, set it as an index in the Carto table; this is not a unique index
        if time_field:
            cartosql.createIndex(table, time_field, user=CARTO_USER, key=CARTO_KEY)
        # return an empty list because there are no IDs in the new table yet
        return []
Exemplo n.º 14
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        logging.info('Clearing table')
        cartosql.dropTable(CARTO_TABLE)

    # 1. Check if table exists and create table
    existing_ids = []
    if cartosql.tableExists(CARTO_TABLE):
        logging.info('Fetching existing ids')
        existing_ids = getIds(CARTO_TABLE, UID_FIELD)
    else:
        logging.info('Table {} does not exist, creating'.format(CARTO_TABLE))
        createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    num_new = processNewData(existing_ids)

    existing_count = num_new + len(existing_ids)
    logging.info('Total rows: {}, New: {}, Max: {}'.format(
        existing_count, num_new, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD, datetime.datetime(year=MAXAGE, month=1, day=1))

    # Get most recent update date
    most_recent_date = get_most_recent_date(CARTO_TABLE)
    lastUpdateDate(DATASET_ID, most_recent_date)

    logging.info('SUCCESS')
Exemplo n.º 15
0
def processData():
    '''
    Function to download data and upload it to Carto
    Will first try to get the data for today three times
    Then decrease a day up until 8 tries until it finds one
    '''
    date = datetime.date.today() - datetime.timedelta(days=1)
    success = False
    tries = 0
    while tries < MAX_TRIES and success == False:
        logging.info("Fetching data for {}".format(str(date)))
        f = getFilename(date)
        url = SOURCE_URL.format(date=date.strftime('%Y%m%d'))
        try:
            urllib.request.urlretrieve(url, f)

        except Exception as inst:
            logging.info("Error fetching data for {}".format(str(date)))
            if tries >= 2:
                date = date - datetime.timedelta(days=1)
            tries = tries + 1
            if tries == MAX_TRIES:
                logging.error(
                    "Error fetching data for {}, and max tries reached. See source for last data update."
                    .format(str(datetime.date.today())))
            success = False
        else:
            df = pd.read_csv(f,
                             header=0,
                             usecols=[
                                 'Lat_DNB', 'Lon_DNB', 'Date_Mscan',
                                 'Date_LTZ', 'QF_Detect', 'EEZ', 'Land_Mask'
                             ])
            df = df.drop(df[df.QF_Detect == 999999].index)
            df['the_geom'] = df.apply(
                lambda row: getGeom(row['Lon_DNB'], row['Lat_DNB']), axis=1)

            df = df[[
                'the_geom', 'QF_Detect', 'Date_Mscan', 'Date_LTZ', 'Land_Mask',
                'Lon_DNB', 'Lat_DNB', 'EEZ'
            ]]
            if not cartosql.tableExists(CARTO_TABLE):
                logging.info('Table {} does not exist'.format(CARTO_TABLE))
                cartosql.createTable(CARTO_TABLE, CARTO_SCHEMA)
            else:
                cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL')
                cartosql.createTable(CARTO_TABLE, CARTO_SCHEMA)

                rows = df.values.tolist()
                logging.info('Success!')
                #logging.info('The following includes the first ten rows added to Carto:')
                #logging.info(rows[:10])
                if len(rows):
                    cartosql.blockInsertRows(CARTO_TABLE, CARTO_SCHEMA.keys(),
                                             CARTO_SCHEMA.values(), rows)
            tries = tries + 1
            success = True
Exemplo n.º 16
0
def checkCreateTable(table, schema, id_field, time_field):
    '''Get existing ids or create table'''
    if cartosql.tableExists(table):
        logging.info('Fetching existing IDs')
        return getFieldAsList(id_field, table)
    else:
        logging.info('Table {} does not exist, creating'.format(table))
        cartosql.createTable(table, schema)
        cartosql.createIndex(table, id_field, unique=True)
        cartosql.createIndex(table, time_field)
    return []
Exemplo n.º 17
0
def checkCreateTable(table, schema, id_field, time_field):
    '''
    Get existing ids or create table
    Return a list of existing ids in time order
    '''
    if cartosql.tableExists(table):
        logging.info('Table {} already exists'.format(table))
    else:
        logging.info('Creating Table {}'.format(table))
        cartosql.createTable(table, schema)
        cartosql.createIndex(table, id_field, unique=True)
        if id_field != time_field:
            cartosql.createIndex(table, time_field)
Exemplo n.º 18
0
def checkCreateTable(table, schema, id_field, time_field=''):
    '''Get existing ids or create table'''
    if cartosql.tableExists(table):
        logging.info('Fetching existing IDs')
        r = cartosql.getFields(id_field, table, f='csv', post=True)
        return r.text.split('\r\n')[1:-1]
    else:
        logging.info('Table {} does not exist, creating'.format(table))
        cartosql.createTable(table, schema)
        cartosql.createIndex(table, id_field, unique=True)
        if time_field:
            cartosql.createIndex(table, time_field)
    return []
Exemplo n.º 19
0
def processArgs(args):
    opts = {}
    if args['-u']:
        opts['user'] = args['-u']
    if args['-k']:
        opts['key'] = args['-k']
    f = args['-f']
    if f:
        opts['f'] = f
    if args['--help'] or not cartosql.init():
        return __doc__
    if args['<sql>']:
        if args['post']:
            r = cartosql.post(args['<sql>'], **opts)
            return returnFormat(r, f)
        elif args['get']:
            r = cartosql.get(args['<sql>'], **opts)
            return returnFormat(r, f)
    elif args['select']:
        if args['<fields>'] and args['<table>']:
            if args['-w']:
                opts['where'] = args['-w']
            if args['-o']:
                opts['order'] = args['-o']
            if args['-l']:
                opts['limit'] = args['-l']
            r = cartosql.getFields(args['<fields>'], args['<table>'], **opts)
            return returnFormat(r, f)
    elif args['ls']:
        r = cartosql.getTables(**opts)
        if f is None or f == 'csv':
            return prettyJson(r)
        return returnFormat(r, f)
    elif args['exists'] and args['<table>']:
        r = cartosql.tableExists(args['<table>'], **opts)
        return r
    elif args['drop'] and args['<table>']:
        confirm = args['--confirm']
        if not confirm:
            confirm = input('Drop table {}? (y/N)'.format(args['<table>'])) == 'y'
        if confirm:
            r = cartosql.dropTable(args['<table>'], **opts)
            return returnFormat(r, f)
        else:
            print('Pass option --confirm to drop table')
    return __doc__
Exemplo n.º 20
0
def processData():
    '''
    Function to download data and upload it to Carto
    Will first try to get the data for MAX_TRIES then quits
    '''
    success = False
    tries = 0
    df = None
    while tries < MAX_TRIES and success == False:
        logging.info('Try running feeds, try number = {}'.format(tries))
        try:
            df = feeds()
            success = True
        except Exception as inst:
            logging.info(inst)
            logging.info("Error fetching data trying again")
            tries = tries + 1
            if tries == MAX_TRIES:
                logging.error(
                    "Error fetching data, and max tries reached. See source for last data update."
                )
            success = False
    if success == True:
        if not cartosql.tableExists(CARTO_TABLE,
                                    user=os.getenv('CARTO_USER'),
                                    key=os.getenv('CARTO_KEY')):
            logging.info('Table {} does not exist'.format(CARTO_TABLE))
            cartosql.createTable(CARTO_TABLE, CARTO_SCHEMA)
            # Send dataframe to Carto
            logging.info('Writing to Carto')
            cc = cartoframes.CartoContext(
                base_url="https://{user}.carto.com/".format(user=CARTO_USER),
                api_key=CARTO_KEY)
            cc.write(df, CARTO_TABLE, overwrite=True, privacy='public')
        else:
            cartosql.deleteRows(CARTO_TABLE,
                                'cartodb_id IS NOT NULL',
                                user=os.getenv('CARTO_USER'),
                                key=os.getenv('CARTO_KEY'))
            # Send dataframe to Carto
            logging.info('Writing to Carto')
            cc = cartoframes.CartoContext(
                base_url="https://{user}.carto.com/".format(user=CARTO_USER),
                api_key=CARTO_KEY)
            cc.write(df, CARTO_TABLE, overwrite=True, privacy='public')
Exemplo n.º 21
0
def cleanOldRows(table, time_field, max_age, date_format='%Y-%m-%d %H:%M:%S'):
    '''
    Delete excess rows by age
    Max_Age should be a datetime object or string
    Return number of dropped rows
    '''
    num_expired = 0
    if cartosql.tableExists(table):
        if isinstance(max_age, datetime.datetime):
            max_age = max_age.strftime(date_format)
        elif isinstance(max_age, str):
            logging.error('Max age must be expressed as a datetime.datetime object')

        r = cartosql.deleteRows(table, "{} < '{}'".format(time_field, max_age))
        num_expired = r.json()['total_rows']
    else:
        logging.error("{} table does not exist yet".format(table))

    return(num_expired)
Exemplo n.º 22
0
def main():
    logging.basicConfig(stream=sys.stderr, level=logging.INFO)
    logging.info('STARTING')

    # clear the table before starting, if specified
    if CLEAR_TABLE_FIRST:
        logging.info("clearing table")
        # if the table exists
        if cartosql.tableExists(CARTO_TABLE, user=CARTO_USER, key=CARTO_KEY):
            # delete all the rows
            cartosql.deleteRows(CARTO_TABLE,
                                'cartodb_id IS NOT NULL',
                                user=CARTO_USER,
                                key=CARTO_KEY)
            # note: we do not delete the entire table because this will cause the dataset visualization on Resource Watch
            # to disappear until we log into Carto and open the table again. If we simply delete all the rows, this
            # problem does not occur

    # Check if table exists, create it if it does not
    logging.info('Checking if table exists and getting existing IDs.')
    existing_ids = checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD,
                                    TIME_FIELD)

    # Delete rows that are older than a certain threshold
    num_expired = cleanOldRows(CARTO_TABLE, TIME_FIELD, MAX_AGE)

    # Get the filename from source url for which we want to download data
    filename = fetchDataFileName(SOURCE_URL)

    # Fetch, process, and upload new data
    logging.info('Fetching new data')
    num_new = processData(SOURCE_URL, filename, existing_ids)
    logging.info('Previous rows: {},  New rows: {}'.format(
        len(existing_ids), num_new))

    # Delete data to get back to MAX_ROWS
    num_deleted = deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD)

    # Update Resource Watch
    updateResourceWatch(num_new)

    logging.info("SUCCESS")
Exemplo n.º 23
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            cartosql.dropTable(CARTO_TABLE)

    ### 1. Check if table exists and create table
    checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)
    existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD)
    num_existing = len(existing_ids)

    ### 2. Fetch data from FTP, dedupe, process
    num_new = processData(existing_ids)
    num_total = num_existing + num_new

    ### 3. Notify results
    logging.info('Total rows: {}, New rows: {}, Max: {}'.format(
        num_total, num_new, MAX_ROWS))
    deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD)

    logging.info("SUCCESS")
Exemplo n.º 24
0
def processArgs(args):
    opts = {}
    if args['-u']:
        opts['user'] = args['-u']
    if args['-k']:
        opts['key'] = args['-k']
    f = args['-f']
    if f:
        opts['f'] = f
    if args['--help']:
        return __doc__
    elif args['post'] and args['<sql>']:
        r = cartosql.post(args['<sql>'], **opts)
        return returnFormat(r, f)
    elif args['get']:
        if args['<sql>']:
            r = cartosql.get(args['<sql>'], **opts)
            return returnFormat(r, f)
        if args['<fields>'] and args['<table>']:
            if args['-w']:
                opts['where'] = args['-w']
            if args['-o']:
                opts['order'] = args['-o']
            r = cartosql.getFields(args['<fields>'], args['<table>'], **opts)
            return returnFormat(r, f)
    elif args['ls']:
        r = cartosql.getTables(**opts)
        if f is None or f == 'csv':
            return prettyJson(r)
        return returnFormat(r, f)
    elif args['exists'] and args['<table>']:
        r = cartosql.tableExists(args['<table>'], **opts)
        return returnFormat(r, f)
    elif args['drop'] and args['<table>']:
        r = cartosql.dropTable(args['<table>'], **opts)
        return returnFormat(r, f)
    return __doc__
Exemplo n.º 25
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            cartosql.deleteRows(CARTO_TABLE,
                                'cartodb_id IS NOT NULL',
                                user=os.getenv('CARTO_USER'),
                                key=os.getenv('CARTO_KEY'))

    ### 1. Check if table exists, if not, create it
    logging.info('Checking if table exists and getting existing IDs.')
    existing_ids = checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD)
    num_existing = len(existing_ids)

    ### 2. Fetch data from FTP, dedupe, process
    logging.info('Fetching new data')
    num_new = processData(existing_ids)
    logging.info('Processing interactions')
    processInteractions()

    ### 3. Delete data to get back to MAX_ROWS
    logging.info('Deleting excess rows')
    num_dropped = deleteExcessRows(CARTO_TABLE, MAX_ROWS, AGE_FIELD)

    ### 4. Notify results
    total = num_existing + num_new - num_dropped

    # If updates, change update date on RW
    if num_new > 0:
        lastUpdateDate(DATASET_ID, datetime.datetime.utcnow())

    logging.info('Existing rows: {},  New rows: {}, Max: {}'.format(
        total, num_new, MAX_ROWS))
    logging.info("SUCCESS")
Exemplo n.º 26
0
def main():
    '''Ingest new data into EE and delete old data'''
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        cartosql.dropTable(CARTO_TABLE)

    # 1. Check collection, create table if necessary
    existing_ids = []
    if cartosql.tableExists(CARTO_TABLE):
        logging.info('Fetching existing ids')
        existing_ids = getIds(CARTO_TABLE, UID_FIELD)
    else:
        logging.info('Table {} does not exist, creating'.format(CARTO_TABLE))
        createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD)

    # 2. Ingest new data
    num_new_vectors = processNewVectorData(existing_ids)

    # 3. Report results
    existing_count = num_new_vectors + len(existing_ids)
    logging.info('Total rows: {}, New: {}, Max: {}'.format(
        existing_count, num_new_vectors, 'none'))
Exemplo n.º 27
0
def processInteractions():
    r = cartosql.get(
        "SELECT * FROM {} WHERE current='True'".format(CARTO_TABLE),
        user=os.getenv('CARTO_USER'),
        key=os.getenv('CARTO_KEY'))
    interaction_data = r.json()['rows']
    try_num = 0
    #if we didn't get data back, wait a few minutes and try again
    while not len(interaction_data):
        logging.info('Sleeping and trying again.')
        try_num += 1
        time.sleep(300)
        interaction_data = r.json()['rows']
        if try_num > 5:
            logging.error('Problem fetching data to generate interactions')
            exit()

    countries_with_interaction = []
    for interaction in interaction_data:
        ctry = interaction['country_iso3']
        if ctry not in countries_with_interaction:
            countries_with_interaction.append(ctry)
    if cartosql.tableExists(CARTO_TABLE_INTERACTION,
                            user=os.getenv('CARTO_USER'),
                            key=os.getenv('CARTO_KEY')):
        cartosql.deleteRows(CARTO_TABLE_INTERACTION,
                            'cartodb_id IS NOT NULL',
                            user=os.getenv('CARTO_USER'),
                            key=os.getenv('CARTO_KEY'))
    #run to create new table
    #existing_interaction_ids = checkCreateTable(CARTO_TABLE_INTERACTION, CARTO_SCHEMA_INTERACTION, UID_FIELD)
    new_interactions = []
    for ctry in countries_with_interaction:
        r = cartosql.get(
            "SELECT * FROM {} WHERE current='True' AND country_iso3='{}'".
            format(CARTO_TABLE, ctry),
            user=os.getenv('CARTO_USER'),
            key=os.getenv('CARTO_KEY'))
        ctry_interaction_data = r.json()['rows']
        event_num = 1
        for interaction in ctry_interaction_data:
            event = interaction['event_name'].split(": ", 1)
            if event_num == 1:
                if len(event) == 1:
                    interaction_str = '{} ({})'.format(event[0],
                                                       interaction['url'])
                else:
                    interaction_str = '{} ({})'.format(event[1],
                                                       interaction['url'])
            else:
                if len(event) == 1:
                    interaction_str = interaction_str + '; ' + '{} ({})'.format(
                        event[0], interaction['url'])
                else:
                    interaction_str = interaction_str + '; ' + '{} ({})'.format(
                        event[1], interaction['url'])
            event_num += 1
        #uid = gen_interaction_uid(ctry)
        if ctry_interaction_data:
            row = []
            for key in CARTO_SCHEMA_INTERACTION.keys():
                try:
                    if key == 'the_geom':
                        lon = ctry_interaction_data[0]['lon']
                        lat = ctry_interaction_data[0]['lat']
                        item = {'type': 'Point', 'coordinates': [lon, lat]}
                    elif key == 'interaction':
                        item = interaction_str
                    else:
                        item = ctry_interaction_data[0][key]
                except KeyError:
                    item = None
                row.append(item)
            new_interactions.append(row)
    logging.info('Adding {} new interactions'.format(len(new_interactions)))
    cartosql.blockInsertRows(CARTO_TABLE_INTERACTION,
                             CARTO_SCHEMA_INTERACTION.keys(),
                             CARTO_SCHEMA_INTERACTION.values(),
                             new_interactions,
                             user=os.getenv('CARTO_USER'),
                             key=os.getenv('CARTO_KEY'))
Exemplo n.º 28
0
def processNewData(url):
    '''
    Fetch, process and upload new data
    INPUT   url: url where you can find the download link for the source data (string)
    RETURN  num_new: number of rows of new data sent to Carto table (integer)
    '''
    # specify the starting page of source url we want to pull
    page = 1
    # generate the url and pull data for this page
    r = requests.get(url.format(page=page))
    # pull data from request response json
    raw_data = r.json()['data']
    # if data is available from source url
    if len(raw_data) > 0:
        # if the table exists
        if cartosql.tableExists(CARTO_TABLE, user=CARTO_USER, key=CARTO_KEY):
            # delete all the rows
            cartosql.deleteRows(CARTO_TABLE,
                                'cartodb_id IS NOT NULL',
                                user=CARTO_USER,
                                key=CARTO_KEY)
        logging.info('Updating {}'.format(CARTO_TABLE))
    else:
        # raise an error that data is not available from source url
        logging.error("Source data missing. Table will not update.")
    # create an empty list to store new data
    new_data = []
    # if data is available from source url
    while len(raw_data) > 0:
        logging.info('Processing page {}'.format(page))
        # read in source data as a pandas dataframe
        df = pd.DataFrame(raw_data)
        # go through each rows in the dataframe
        for row_num in range(df.shape[0]):
            # get the row of data
            row = df.iloc[row_num]
            # create an empty list to store data from this row
            new_row = []
            # go through each column in the Carto table
            for field in CARTO_SCHEMA:
                # if we are fetching data for unique id column
                if field == 'uid':
                    # add the unique id to the list of data from this row
                    new_row.append(row[UID_FIELD])
                # for any other column, check if there are values available from the source for this row
                else:
                    # if data available from source for this field, populate the field with the data
                    # else populate with None
                    val = row[field] if row[field] != '' else None
                    # add this value to the list of data from this row
                    new_row.append(val)
            # add the list of values from this row to the list of new data
            new_data.append(new_row)
        # go to the next page and check for data
        page += 1
        # generate the url and pull data for this page
        r = requests.get(url.format(page=page))
        # pull data from request response json
        raw_data = r.json()['data']

    # find the length (number of rows) of new_data
    num_new = len(new_data)
    # if we have found new dates to process
    if num_new:
        # insert new data into the carto table
        cartosql.blockInsertRows(CARTO_TABLE,
                                 CARTO_SCHEMA.keys(),
                                 CARTO_SCHEMA.values(),
                                 new_data,
                                 user=CARTO_USER,
                                 key=CARTO_KEY)

    return num_new
Exemplo n.º 29
0
final_df = final_df.where((pd.notnull(final_df)), None)

# UPLOAD
# specify column names and types
CARTO_SCHEMA = {
    'iso3': 'text',
    'country': 'text',
    'year': 'numeric',
    'vulnerability': 'numeric',
    'readiness': 'numeric',
    'gain': 'numeric'
}

# check if table exists
if cartosql.tableExists(CARTO_TABLE,
                        user=os.getenv('CARTO_WRI_RW_USER'),
                        key=os.getenv('CARTO_WRI_RW_KEY')):
    print('This table already exists. Please change the name and try again.')
else:
    # create table with appropriate columns
    cartosql.createTable(CARTO_TABLE,
                         CARTO_SCHEMA,
                         user=os.getenv('CARTO_WRI_RW_USER'),
                         key=os.getenv('CARTO_WRI_RW_KEY'))
    # send processed data to table
    cartosql.blockInsertRows(CARTO_TABLE,
                             CARTO_SCHEMA.keys(),
                             CARTO_SCHEMA.values(),
                             final_df.values.tolist(),
                             user=os.getenv('CARTO_WRI_RW_USER'),
                             key=os.getenv('CARTO_WRI_RW_KEY'))
Exemplo n.º 30
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_MARKET_TABLE):
            cartosql.deleteRows(CARTO_MARKET_TABLE,
                                'cartodb_id IS NOT NULL',
                                user=os.getenv('CARTO_USER'),
                                key=os.getenv('CARTO_KEY'))
        if cartosql.tableExists(CARTO_ALPS_TABLE):
            cartosql.deleteRows(CARTO_ALPS_TABLE,
                                'cartodb_id IS NOT NULL',
                                user=os.getenv('CARTO_USER'),
                                key=os.getenv('CARTO_KEY'))
        if cartosql.tableExists(CARTO_INTERACTION_TABLE):
            cartosql.deleteRows(CARTO_INTERACTION_TABLE,
                                'cartodb_id IS NOT NULL',
                                user=os.getenv('CARTO_USER'),
                                key=os.getenv('CARTO_KEY'))

    # 1. Check if table exists and create table
    existing_markets = []
    if cartosql.tableExists(CARTO_MARKET_TABLE,
                            user=os.getenv('CARTO_USER'),
                            key=os.getenv('CARTO_KEY')):
        logging.info('Fetching existing market ids')
        existing_markets = getIds(CARTO_MARKET_TABLE, UID_FIELD)
    else:
        logging.info(
            'Table {} does not exist, creating'.format(CARTO_MARKET_TABLE))
        createTableWithIndex(CARTO_MARKET_TABLE, CARTO_MARKET_SCHEMA,
                             UID_FIELD)

    existing_alps = []
    if cartosql.tableExists(CARTO_ALPS_TABLE,
                            user=os.getenv('CARTO_USER'),
                            key=os.getenv('CARTO_KEY')):
        logging.info('Fetching existing ALPS ids')
        existing_alps = getIds(CARTO_ALPS_TABLE, UID_FIELD)
    else:
        logging.info(
            'Table {} does not exist, creating'.format(CARTO_ALPS_TABLE))
        createTableWithIndex(CARTO_ALPS_TABLE, CARTO_ALPS_SCHEMA, UID_FIELD,
                             TIME_FIELD)

    existing_interactions = []
    if cartosql.tableExists(CARTO_INTERACTION_TABLE,
                            user=os.getenv('CARTO_USER'),
                            key=os.getenv('CARTO_KEY')):
        logging.info('Fetching existing interaction ids')
        existing_interactions = getIds(CARTO_INTERACTION_TABLE, UID_FIELD)
    else:
        logging.info('Table {} does not exist, creating'.format(
            CARTO_INTERACTION_TABLE))
        createTableWithIndex(CARTO_INTERACTION_TABLE, CARTO_INTERACTION_SCHEMA,
                             UID_FIELD, INTERACTION_TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    num_new_markets, num_new_alps, markets_updated = processNewData(
        existing_markets, existing_alps)

    # Update Interaction table
    num_new_interactions = processInteractions(markets_updated)

    # Report new data count
    num_existing_markets = num_new_markets + len(existing_markets)
    logging.info('Total market rows: {}, New: {}, Max: {}'.format(
        num_existing_markets, num_new_markets, MAXROWS))

    num_existing_alps = num_new_alps + len(existing_alps)
    logging.info('Total alps rows: {}, New: {}, Max: {}'.format(
        num_existing_alps, num_new_alps, MAXROWS))

    num_existing_interactions = num_new_interactions + len(
        existing_interactions)
    logging.info('Total interaction rows: {}, New: {}, Max: {}'.format(
        num_existing_interactions, num_new_interactions, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_ALPS_TABLE, MAXROWS, TIME_FIELD)  # MAXAGE)

    # Get most recent update date
    most_recent_date = get_most_recent_date(CARTO_ALPS_TABLE)
    lastUpdateDate(DATASET_ID, most_recent_date)

    logging.info('SUCCESS')