Exemplo n.º 1
0
def main():
    logging.basicConfig(stream=sys.stderr, level=logging.INFO)

    if CLEAR_TABLE_FIRST:
        cartosql.dropTable(CARTO_TABLE)

    ### 1. Check if table exists, if not, create it
    checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    ### 2. Delete old rows
    num_expired = cleanOldRows(CARTO_TABLE, TIME_FIELD, MAX_AGE)

    ### 3. Retrieve existing data
    r = cartosql.getFields(UID_FIELD,
                           CARTO_TABLE,
                           order='{} desc'.format(TIME_FIELD),
                           f='csv')
    existing_ids = r.text.split('\r\n')[1:-1]
    num_existing = len(existing_ids)

    logging.debug("First 10 IDs already in table: {}".format(
        existing_ids[:10]))

    ### 4. Fetch data from FTP, dedupe, process
    filename = fetchDataFileName(SOURCE_URL)
    num_new = processData(SOURCE_URL, filename, existing_ids)

    ### 5. Delete data to get back to MAX_ROWS
    num_deleted = deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD)

    ### 6. Notify results
    logging.info(
        'Expired rows: {}, Previous rows: {},  New rows: {}, Dropped rows: {}, Max: {}'
        .format(num_expired, num_existing, num_new, num_deleted, MAX_ROWS))
    logging.info("SUCCESS")
Exemplo n.º 2
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            logging.info("Clearing table")
            cartosql.dropTable(CARTO_TABLE)

    # 1. Check if table exists and create table
    existing_ids = []
    if cartosql.tableExists(CARTO_TABLE):
        existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD)
    else:
        createTableWithIndices(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    num_new = processNewData(existing_ids)

    existing_count = num_new + len(existing_ids)
    logging.info('Total rows: {}, New: {}, Max: {}'.format(
        existing_count, num_new, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD, MAXAGE)

    ###
    logging.info('SUCCESS')
Exemplo n.º 3
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            cartosql.dropTable(CARTO_TABLE)

    # 1. Check if table exists and create table
    existing_ids = []
    if cartosql.tableExists(CARTO_TABLE):
        logging.info('Fetching existing ids')
        existing_ids = getIds(CARTO_TABLE, UID_FIELD)
    else:
        logging.info('Table {} does not exist, creating'.format(CARTO_TABLE))
        createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    new_ids = processNewData(existing_ids)

    new_count = len(new_ids)
    existing_count = new_count + len(existing_ids)
    logging.info('Total rows: {}, New: {}, Max: {}'.format(
        existing_count, new_count, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD)  # MAXAGE)

    logging.info('SUCCESS')
Exemplo n.º 4
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            cartosql.dropTable(CARTO_TABLE)

    ### 1. Check if table exists, if not, create it
    checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    ### 2. Retrieve existing data
    r = cartosql.getFields(UID_FIELD, CARTO_TABLE,
                           order='{} desc'.format(TIME_FIELD), f='csv')
    existing_ids = r.text.split('\r\n')[1:-1]
    num_existing = len(existing_ids)

    ### 3. Fetch data from FTP, dedupe, process
    num_new = processData(existing_ids)

    ### 4. Delete data to get back to MAX_ROWS
    num_dropped = deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD, MAX_AGE)

    ### 5. Notify results
    total = num_existing + num_new - num_dropped

    # Get most recent update date
    most_recent_date = get_most_recent_date(CARTO_TABLE)
    lastUpdateDate(DATASET_ID, most_recent_date)
    
    logging.info('Existing rows: {},  New rows: {}, Max: {}'.format(total, num_new, MAX_ROWS))
    logging.info("SUCCESS")
Exemplo n.º 5
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            cartosql.dropTable(CARTO_TABLE)

    ### 1. Check if table exists and create table
    checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)
    existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD)
    num_existing = len(existing_ids)

    ### 2. Fetch data from FTP, dedupe, process
    num_new = processData(existing_ids)
    num_total = num_existing + num_new

    ### 3. Notify results
    logging.info('Total rows: {}, New rows: {}, Max: {}'.format(num_total, num_new, MAX_ROWS))
    deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD)

    # Get most recent update date
    #if new rows were added to table, make today the most recent update date
    if num_new > 0:
        most_recent_date = datetime.datetime.utcnow()
        lastUpdateDate(DATASET_ID, most_recent_date)

    logging.info("SUCCESS")
Exemplo n.º 6
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        logging.info('Clearing table')
        cartosql.dropTable(CARTO_TABLE)

    # 1. Check if table exists and create table
    existing_ids = []
    if cartosql.tableExists(CARTO_TABLE):
        logging.info('Fetching existing ids')
        existing_ids = getIds(CARTO_TABLE, UID_FIELD)
    else:
        logging.info('Table {} does not exist, creating'.format(CARTO_TABLE))
        createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    num_new = processNewData(existing_ids)

    existing_count = num_new + len(existing_ids)
    logging.info('Total rows: {}, New: {}, Max: {}'.format(
        existing_count, num_new, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD, datetime.datetime(year=MAXAGE, month=1, day=1))

    # Get most recent update date
    most_recent_date = get_most_recent_date(CARTO_TABLE)
    lastUpdateDate(DATASET_ID, most_recent_date)

    logging.info('SUCCESS')
Exemplo n.º 7
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        logging.info('Clearing explore table')
        cartosql.dropTable(CARTO_TABLE_EXPLORE)
        cartosql.dropTable(CARTO_TABLE_PLANETPULSE)

    # 1. Check if table exists and create table

    ## EXPLORE TABLE
    existing_explore_ids = []
    if cartosql.tableExists(CARTO_TABLE_EXPLORE):
        logging.info('Fetching existing explore ids')
        existing_explore_ids = getIds(CARTO_TABLE_EXPLORE, UID_FIELD)
    else:
        logging.info(
            'Table {} does not exist, creating'.format(CARTO_TABLE_EXPLORE))
        createTableWithIndex(CARTO_TABLE_EXPLORE, CARTO_SCHEMA, UID_FIELD,
                             TIME_FIELD)

    ## PLANET PULSE TABLE
    existing_pp_ids = []
    if cartosql.tableExists(CARTO_TABLE_PLANETPULSE):
        logging.info('Fetching existing planet pulse ids')
        existing_pp_ids = getIds(CARTO_TABLE_PLANETPULSE, UID_FIELD)
    else:
        logging.info('Table {} does not exist, creating'.format(
            CARTO_TABLE_PLANETPULSE))
        createTableWithIndex(CARTO_TABLE_PLANETPULSE, CARTO_SCHEMA, UID_FIELD,
                             TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    num_new_explore, num_new_pp = processNewData(existing_explore_ids,
                                                 existing_pp_ids)
    existing_count_explore = num_new_explore + len(existing_explore_ids)
    existing_count_pp = num_new_pp + len(existing_pp_ids)

    logging.info('Total rows in Explore table: {}, New: {}, Max: {}'.format(
        existing_count_explore, num_new_explore, MAXROWS_EXPLORE))

    logging.info(
        'Total rows in Planet Pulse table: {}, New: {}, Max: {}'.format(
            existing_count_pp, num_new_pp, MAXROWS_PLANETPULSE))

    # 3. Remove old observations
    deleteExcessRows(CARTO_TABLE_EXPLORE, MAXROWS_EXPLORE, TIME_FIELD,
                     MAXAGE_EXPLORE)
    deleteExcessRows(CARTO_TABLE_PLANETPULSE, MAXROWS_PLANETPULSE, TIME_FIELD,
                     MAXAGE_PLANETPULSE)

    logging.info('SUCCESS')
Exemplo n.º 8
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_MARKET_TABLE):
            cartosql.dropTable(CARTO_MARKET_TABLE)
        if cartosql.tableExists(CARTO_ALPS_TABLE):
            cartosql.dropTable(CARTO_ALPS_TABLE)

    # 1. Check if table exists and create table
    existing_markets = []
    if cartosql.tableExists(CARTO_MARKET_TABLE):
        logging.info('Fetching existing ids')
        existing_markets = getIds(CARTO_MARKET_TABLE, UID_FIELD)
    else:
        logging.info(
            'Table {} does not exist, creating'.format(CARTO_MARKET_TABLE))
        createTableWithIndex(CARTO_MARKET_TABLE, CARTO_MARKET_SCHEMA,
                             UID_FIELD)

    existing_alps = []
    if cartosql.tableExists(CARTO_ALPS_TABLE):
        logging.info('Fetching existing ids')
        existing_alps = getIds(CARTO_ALPS_TABLE, UID_FIELD)
    else:
        logging.info(
            'Table {} does not exist, creating'.format(CARTO_ALPS_TABLE))
        createTableWithIndex(CARTO_ALPS_TABLE, CARTO_ALPS_SCHEMA, UID_FIELD,
                             TIME_FIELD)

    # 2. Iterively fetch, parse and post new data
    num_new_markets, num_new_alps = processNewData(existing_markets,
                                                   existing_alps)

    existing_markets = num_new_markets + len(existing_markets)
    logging.info('Total market rows: {}, New: {}, Max: {}'.format(
        existing_markets, num_new_markets, MAXROWS))

    existing_alps = num_new_alps + len(existing_alps)
    logging.info('Total alps rows: {}, New: {}, Max: {}'.format(
        existing_alps, num_new_alps, MAXROWS))

    # 3. Remove old observations
    deleteExcessRows(CARTO_ALPS_TABLE, MAXROWS, TIME_FIELD)  # MAXAGE)

    # Get most recent update date
    most_recent_date = get_most_recent_date(CARTO_ALPS_TABLE)
    lastUpdateDate(DATASET_ID, most_recent_date)

    logging.info('SUCCESS')
Exemplo n.º 9
0
def test():
    ''''''
    logging.basicConfig(level=logging.DEBUG)
    lyr = 'b4f1bd67-d0b7-4b53-a815-1638761ca70f'
    end_time = datetime.datetime.utcnow()
    start_time = end_time - datetime.timedelta(days=1)
    time_field = 'utc'
    table_name = 'cit_003a_air_quality_pm25'
    layerId, table = freezeLayer(lyr, start_time, end_time, time_field,
                                 table_name, True)
    print(('Created: ', layerId, table))
    rw_api.Layer(layerId).delete()
    csql.dropTable(table)
    print(('Deleted: ', layerId, table))
Exemplo n.º 10
0
def main():
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)

    if CLEAR_TABLE_FIRST:
        if cartosql.tableExists(CARTO_TABLE):
            cartosql.dropTable(CARTO_TABLE)

    ### 1. Check if table exists and create table
    checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD)
    existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD)
    num_existing = len(existing_ids)

    ### 2. Fetch data from FTP, dedupe, process
    num_new = processData(existing_ids)
    num_total = num_existing + num_new

    ### 3. Notify results
    logging.info('Total rows: {}, New rows: {}, Max: {}'.format(
        num_total, num_new, MAX_ROWS))
    deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD)

    logging.info("SUCCESS")
Exemplo n.º 11
0
def main():
    '''Ingest new data into EE and delete old data'''
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    if CLEAR_TABLE_FIRST:
        cartosql.dropTable(CARTO_TABLE)

    # 1. Check collection, create table if necessary
    existing_ids = []
    if cartosql.tableExists(CARTO_TABLE):
        logging.info('Fetching existing ids')
        existing_ids = getIds(CARTO_TABLE, UID_FIELD)
    else:
        logging.info('Table {} does not exist, creating'.format(CARTO_TABLE))
        createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD)

    # 2. Ingest new data
    num_new_vectors = processNewVectorData(existing_ids)

    # 3. Report results
    existing_count = num_new_vectors + len(existing_ids)
    logging.info('Total rows: {}, New: {}, Max: {}'.format(
        existing_count, num_new_vectors, 'none'))
Exemplo n.º 12
0
def processArgs(args):
    opts = {}
    if args['-u']:
        opts['user'] = args['-u']
    if args['-k']:
        opts['key'] = args['-k']
    f = args['-f']
    if f:
        opts['f'] = f
    if args['--help'] or not cartosql.init():
        return __doc__
    if args['<sql>']:
        if args['post']:
            r = cartosql.post(args['<sql>'], **opts)
            return returnFormat(r, f)
        elif args['get']:
            r = cartosql.get(args['<sql>'], **opts)
            return returnFormat(r, f)
    elif args['select']:
        if args['<fields>'] and args['<table>']:
            if args['-w']:
                opts['where'] = args['-w']
            if args['-o']:
                opts['order'] = args['-o']
            if args['-l']:
                opts['limit'] = args['-l']
            r = cartosql.getFields(args['<fields>'], args['<table>'], **opts)
            return returnFormat(r, f)
    elif args['ls']:
        r = cartosql.getTables(**opts)
        if f is None or f == 'csv':
            return prettyJson(r)
        return returnFormat(r, f)
    elif args['exists'] and args['<table>']:
        r = cartosql.tableExists(args['<table>'], **opts)
        return r
    elif args['drop'] and args['<table>']:
        confirm = args['--confirm']
        if not confirm:
            confirm = input('Drop table {}? (y/N)'.format(args['<table>'])) == 'y'
        if confirm:
            r = cartosql.dropTable(args['<table>'], **opts)
            return returnFormat(r, f)
        else:
            print('Pass option --confirm to drop table')
    return __doc__
Exemplo n.º 13
0
def processArgs(args):
    opts = {}
    if args['-u']:
        opts['user'] = args['-u']
    if args['-k']:
        opts['key'] = args['-k']
    f = args['-f']
    if f:
        opts['f'] = f
    if args['--help']:
        return __doc__
    elif args['post'] and args['<sql>']:
        r = cartosql.post(args['<sql>'], **opts)
        return returnFormat(r, f)
    elif args['get']:
        if args['<sql>']:
            r = cartosql.get(args['<sql>'], **opts)
            return returnFormat(r, f)
        if args['<fields>'] and args['<table>']:
            if args['-w']:
                opts['where'] = args['-w']
            if args['-o']:
                opts['order'] = args['-o']
            r = cartosql.getFields(args['<fields>'], args['<table>'], **opts)
            return returnFormat(r, f)
    elif args['ls']:
        r = cartosql.getTables(**opts)
        if f is None or f == 'csv':
            return prettyJson(r)
        return returnFormat(r, f)
    elif args['exists'] and args['<table>']:
        r = cartosql.tableExists(args['<table>'], **opts)
        return returnFormat(r, f)
    elif args['drop'] and args['<table>']:
        r = cartosql.dropTable(args['<table>'], **opts)
        return returnFormat(r, f)
    return __doc__
Exemplo n.º 14
0
def freezeLayer(layerId,
                start_date,
                end_date,
                time_field=None,
                table_name=None,
                ignore_future=False):
    """
    Copies a CARTO layer's data to new table, and creates an idential layer
    pointing to the new table.

    @params
    layerId      string    the layer to be copied
    start_date   datetime  the start of the period of data to copy
    end_date     datetime  the end of the period of data to copy
    [time_field] string    the field in which datetime information is stored
    [table_name] string    the main table name containing time data
    ignore_future  bool    do not warn if trying to query data in the future

    The table_name and time_field are read from Dataset definition if None.

    @return
    tuple(<rw_api.Layer>, string) the new layer object and tableName
    """

    # 1. Fetch layer and dataset defition
    logging.info('Fetching layer definition for {}'.format(layerId))
    layer = rw_api.getLayer(layerId)
    if not layer.provider == 'cartodb':
        raise ("Layer must be of type 'cartodb'")
    if not time_field or not table_name:
        dataset = layer.getDataset()
        time_field = time_field or dataset.mainDateField
        table_name = table_name or dataset.tableName
    sql = layer.layerConfig['options']['sql'].lower()

    # 2. Check if end_date is in future or more recent than the
    # most recent data in the dataset

    # Truncate times to minutes, no need to be too accurate
    start_date = asUTC(start_date).replace(second=0, microsecond=0)
    end_date = asUTC(end_date).replace(second=0, microsecond=0)
    if start_date > end_date:
        start_date, end_date = (end_date, start_date)
    if not ignore_future:
        checkFutureData(end_date, table_name, time_field)

    # 3. Modify the layer SQL query, replacing any where clauses referring to
    # time_field with new ones selecting for the start and end date
    logging.debug('Query: {}'.format(sql))
    start = start_date.isoformat()
    end = end_date.isoformat()
    time_clauses = findTimeClauses(sql, time_field)
    new_cls = " {time_field} >= '{start}' and {time_field} < '{end}'".format(
        time_field=time_field, start=start, end=end)
    for cls in time_clauses:
        sql = sql.replace(cls, new_cls)
    logging.debug('New query: {}'.format(sql))

    # 4. Create the table from the updated layer SQL query

    # Name new table with start and end dates, make sure it isn't too long
    new_table = "{}_{}_{}".format(table_name,
                                  start_date.strftime("%Y%m%d_%H%M"),
                                  end_date.strftime("%Y%m%d_%H%M"))
    if len(new_table) > 62:
        new_table = "{}_{}".format(new_table[:32], hashlib.md5(new_table[32:]))

    # If we've made this exact query before, replace it
    logging.info("Coping data to table: {}".format(table_name))
    if csql.tableExists(new_table):
        logging.info("Table {} exists, overwriting".format(new_table))
        csql.dropTable(new_table)
    csql.createTableFromQuery(new_table, sql)

    # 5. Create layer copy and update SQL to refer to new table
    layer_name = "{} ({} to {})".format(layer.name, start, end)
    new_lyr = layer.copy(layer_name)
    new_lyr.layerConfig['options']['sql'] = "SELECT * FROM {}".format(
        new_table)
    new_lyr.published = False

    logging.info("Uploading new layer {}".format(layer_name))
    new_lyr.push()

    return (new_lyr, new_table)