示例#1
0
def edit_shape(dataset_name):
    form = EditShapeForm()
    meta = session.query(ShapeMetadata).get(dataset_name)

    if form.validate_on_submit():
        upd = {
            'human_name': form.human_name.data,
            'description': form.description.data,
            'attribution': form.attribution.data,
            'update_freq': form.update_freq.data,
        }
        session.query(ShapeMetadata)\
            .filter(ShapeMetadata.dataset_name == meta.dataset_name)\
            .update(upd)
        session.commit()

        if not meta.approved_status:
            approve_shape(dataset_name)

        flash('%s updated successfully!' % meta.human_name, 'success')
        return redirect(url_for('views.view_datasets'))
    else:
        pass

    context = {
        'form': form,
        'meta': meta,
    }
    return render_template('admin/edit-shape.html', **context)
示例#2
0
def update_meta(metatable, table):
    """
    After ingest/update, update the metatable registry to reflect table information.

    :param metatable: MetaTable instance to update.
    :param table: Table instance to update from.

    :returns: None
    """

    metatable.update_date_added()

    metatable.obs_from, metatable.obs_to = session.query(
        func.min(table.c.point_date),
        func.max(table.c.point_date)
    ).first()

    metatable.bbox = session.query(
        func.ST_SetSRID(
            func.ST_Envelope(func.ST_Union(table.c.geom)),
            4326
        )
    ).first()[0]

    session.add(metatable)

    try:
        session.commit()
    except:
        session.rollback()
        raise
示例#3
0
    def test_delete_shape(self):
        # Can we remove a shape that's fully ingested?
        city_meta = session.query(ShapeMetadata).get(fixtures['city'].table_name)
        self.assertIsNotNone(city_meta)
        city_meta.remove_table()
        session.commit()
        city_meta = session.query(ShapeMetadata).get(fixtures['city'].table_name)
        self.assertIsNone(city_meta)

        # Can we remove a shape that's only in the metadata?
        dummy_meta = session.query(ShapeMetadata).get(self.dummy_name)
        self.assertIsNotNone(dummy_meta)
        dummy_meta.remove_table()
        session.commit()
        dummy_meta = session.query(ShapeMetadata).get(self.dummy_name)
        self.assertIsNone(dummy_meta)

        # Add them back to return to original test state
        ShapeTests.ingest_fixture(fixtures['city'])
        ShapeMetadata.add(human_name=u'Dummy Name',
                          source_url=None,
                          update_freq='yearly',
                          approved_status=False)

        session.commit()
示例#4
0
def edit_shape(dataset_name):
    form = EditShapeForm()
    meta = session.query(ShapeMetadata).get(dataset_name)

    if form.validate_on_submit():
        upd = {
            'human_name': form.human_name.data,
            'description': form.description.data,
            'attribution': form.attribution.data,
            'update_freq': form.update_freq.data,
        }
        session.query(ShapeMetadata)\
            .filter(ShapeMetadata.dataset_name == meta.dataset_name)\
            .update(upd)
        session.commit()

        if not meta.approved_status:
            approve_shape(dataset_name)

        flash('%s updated successfully!' % meta.human_name, 'success')
        return redirect(url_for('views.view_datasets'))
    else:
        pass

    context = {
        'form': form,
        'meta': meta,
    }
    return render_template('admin/edit-shape.html', **context)
示例#5
0
def view_datasets():
    datasets_pending = session.query(MetaTable)\
        .filter(MetaTable.approved_status != 'true')\
        .all()

    counts = {
        'master_row_count': table_row_estimate('dat_master'),
        'weather_daily_row_count': table_row_estimate('dat_weather_observations_daily'),
        'weather_hourly_row_count': table_row_estimate('dat_weather_observations_hourly'),
        'census_block_row_count': table_row_estimate('census_blocks'),
    }

    try:
        celery_table = Table('celery_taskmeta', Base.metadata, 
                             autoload=True, autoload_with=engine)
        q = text(''' 
            SELECT m.*, c.status, c.task_id
            FROM meta_master AS m 
            LEFT JOIN celery_taskmeta AS c 
              ON c.id = (
                SELECT id FROM celery_taskmeta 
                WHERE task_id = ANY(m.result_ids) 
                ORDER BY date_done DESC 
                LIMIT 1
              )
            WHERE m.approved_status = 'true'
        ''')
        datasets = []
        with engine.begin() as c:
            datasets = list(c.execute(q))
    except NoSuchTableError, e:
        datasets = session.query(MetaTable)\
        .filter(MetaTable.approved_status == 'true')\
        .all()
示例#6
0
    def test_delete_shape(self):
        # Can we remove a shape that's fully ingested?
        city_meta = session.query(ShapeMetadata).get(
            fixtures['city'].table_name)
        self.assertIsNotNone(city_meta)
        city_meta.remove_table()
        session.commit()
        city_meta = session.query(ShapeMetadata).get(
            fixtures['city'].table_name)
        self.assertIsNone(city_meta)

        # Can we remove a shape that's only in the metadata?
        dummy_meta = session.query(ShapeMetadata).get(self.dummy_name)
        self.assertIsNotNone(dummy_meta)
        dummy_meta.remove_table()
        session.commit()
        dummy_meta = session.query(ShapeMetadata).get(self.dummy_name)
        self.assertIsNone(dummy_meta)

        # Add them back to return to original test state
        ShapeTests.ingest_fixture(fixtures['city'])
        ShapeMetadata.add(human_name=u'Dummy Name',
                          source_url=None,
                          update_freq='yearly',
                          approved_status=False)

        session.commit()
示例#7
0
def meta():
    status_code = 200
    resp = {
            'meta': {
                'status': 'ok',
                'message': '',
            },
            'objects': []
        }
    dataset_name = request.args.get('dataset_name')
    if dataset_name:
        metas = session.query(MetaTable)\
                       .filter(MetaTable.dataset_name == dataset_name)
    else:
        metas = session.query(MetaTable)

    metas=metas.filter(MetaTable.approved_status == 'true')

    for m in metas.all():
        keys = m.as_dict()
        for e in METATABLE_KEYS_TO_EXCLUDE: del keys[e]
        resp['objects'].append(keys)

    resp['meta']['total'] = len(resp['objects'])
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp
示例#8
0
def edit_dataset(source_url_hash):
    form = EditDatasetForm()
    meta = session.query(MetaTable).get(source_url_hash)
    table = Table('dat_%s' % meta.dataset_name,
                  Base.metadata,
                  autoload=True,
                  autoload_with=engine)
    fieldnames = table.columns.keys()
    if form.validate_on_submit():
        upd = {
            'human_name': form.human_name.data,
            'description': form.description.data,
            'attribution': form.attribution.data,
            'obs_from': form.obs_from.data,
            'obs_to': form.obs_to.data,
            'update_freq': form.update_freq.data,
            'business_key': form.business_key.data,
            'latitude': form.latitude.data,
            'longitude': form.longitude.data,
            'location': form.location.data,
            'observed_date': form.observed_date.data,
        }
        session.query(MetaTable)\
            .filter(MetaTable.source_url_hash == meta.source_url_hash)\
            .update(upd)
        session.commit()
        flash('%s updated successfully!' % meta.human_name, 'success')
        return redirect(url_for('views.view_datasets'))
    context = {
        'form': form,
        'meta': meta,
        'fieldnames': fieldnames,
    }
    return render_template('edit-dataset.html', **context)
示例#9
0
def edit_dataset(source_url_hash):
    form = EditDatasetForm()
    meta = session.query(MetaTable).get(source_url_hash)
    fieldnames = meta.column_names
    num_rows = 0

    if meta.approved_status:
        try:
            table_name = meta.dataset_name
            table = Table(table_name,
                          Base.metadata,
                          autoload=True,
                          autoload_with=engine)

            # Would prefer to just get the names from the metadata
            # without needing to reflect.
            fieldnames = table.columns.keys()
            pk_name = [p.name for p in table.primary_key][0]
            pk = table.c[pk_name]
            num_rows = session.query(pk).count()

        except sqlalchemy.exc.NoSuchTableError:
            # dataset has been approved, but perhaps still processing.
            pass

    if form.validate_on_submit():
        upd = {
            'human_name': form.human_name.data,
            'description': form.description.data,
            'attribution': form.attribution.data,
            'update_freq': form.update_freq.data,
            'latitude': form.latitude.data,
            'longitude': form.longitude.data,
            'location': form.location.data,
            'observed_date': form.observed_date.data,
        }
        session.query(MetaTable)\
            .filter(MetaTable.source_url_hash == meta.source_url_hash)\
            .update(upd)
        session.commit()

        if not meta.approved_status:
            approve_dataset(source_url_hash)

        flash('%s updated successfully!' % meta.human_name, 'success')
        return redirect(url_for('views.view_datasets'))
    else:
        pass

    context = {
        'form': form,
        'meta': meta,
        'fieldnames': fieldnames,
        'num_rows': num_rows,
    }
    return render_template('admin/edit-dataset.html', **context)
示例#10
0
def edit_dataset(source_url_hash):
    form = EditDatasetForm()
    meta = session.query(MetaTable).get(source_url_hash)
    fieldnames = meta.column_names
    num_rows = 0
    
    if meta.approved_status:
        try:
            table_name = meta.dataset_name
            table = Table(table_name, Base.metadata,
                          autoload=True, autoload_with=engine)

            # Would prefer to just get the names from the metadata
            # without needing to reflect.
            fieldnames = table.columns.keys()
            pk_name = [p.name for p in table.primary_key][0]
            pk = table.c[pk_name]
            num_rows = session.query(pk).count()
            
        except sqlalchemy.exc.NoSuchTableError:
            # dataset has been approved, but perhaps still processing.
            pass

    if form.validate_on_submit():
        upd = {
            'human_name': form.human_name.data,
            'description': form.description.data,
            'attribution': form.attribution.data,
            'update_freq': form.update_freq.data,
            'latitude': form.latitude.data,
            'longitude': form.longitude.data,
            'location': form.location.data,
            'observed_date': form.observed_date.data,
        }
        session.query(MetaTable)\
            .filter(MetaTable.source_url_hash == meta.source_url_hash)\
            .update(upd)
        session.commit()

        if not meta.approved_status:
            approve_dataset(source_url_hash)
        
        flash('%s updated successfully!' % meta.human_name, 'success')
        return redirect(url_for('views.view_datasets'))
    else:
        pass

    context = {
        'form': form,
        'meta': meta,
        'fieldnames': fieldnames,
        'num_rows': num_rows,
    }
    return render_template('admin/edit-dataset.html', **context)
示例#11
0
文件: views.py 项目: EzanLTD/plenario
def add_dataset():
    dataset_info = {}
    errors = []
    socrata_source = False

    url = ""
    dataset_id = None
    md = None

    if request.args.get('dataset_url'):
        url = request.args.get('dataset_url')
        (dataset_info, errors,
         socrata_source) = get_context_for_new_dataset(url)

        # populate contributor info from session
        user = session.query(User).get(flask_session['user_id'])
        dataset_info['contributor_name'] = user.name
        dataset_info['contributor_organization'] = 'Plenario Admin'
        dataset_info['contributor_email'] = user.email

        # check if dataset with the same URL has already been loaded
        dataset_id = md5(url).hexdigest()
        md = session.query(MetaTable).get(dataset_id)
        if md:
            errors.append(
                "A dataset with that URL has already been loaded: '%s'" %
                md.human_name)

    if request.method == 'POST' and not md:
        md = add_dataset_to_metatable(request,
                                      url,
                                      dataset_id,
                                      dataset_info,
                                      socrata_source,
                                      approved_status=True)

        json_data_types = None
        if ((not md.is_socrata_source) and md.contributed_data_types):
            json_data_types = json.loads(md.contributed_data_types)

        add_dataset_task.delay(md.source_url_hash, data_types=json_data_types)

        flash('%s added successfully!' % md.human_name, 'success')
        return redirect(url_for('views.view_datasets'))

    context = {
        'dataset_info': dataset_info,
        'errors': errors,
        'socrata_source': socrata_source
    }
    return render_template('admin/add-dataset.html', **context)
示例#12
0
def frequency_update(frequency):
    # hourly, daily, weekly, monthly, yearly
    md = session.query(MetaTable)\
        .filter(MetaTable.update_freq == frequency).all()
    for m in md:
        update_dataset.delay(m.source_url_hash)

    md = session.query(ShapeMetadata)\
        .filter(ShapeMetadata.update_freq == frequency)\
        .filter(ShapeMetadata.is_ingested == True)\
        .all()
    for m in md:
        update_shape.delay(m.dataset_name)
    return '%s update complete' % frequency
示例#13
0
def frequency_update(frequency):
    # hourly, daily, weekly, monthly, yearly
    md = session.query(MetaTable)\
        .filter(MetaTable.update_freq == frequency).all()
    for m in md:
        update_dataset.delay(m.source_url_hash)

    md = session.query(ShapeMetadata)\
        .filter(ShapeMetadata.update_freq == frequency)\
        .filter(ShapeMetadata.is_ingested == True)\
        .all()
    for m in md:
        update_shape.delay(m.dataset_name)
    return '%s update complete' % frequency
示例#14
0
    def index(cls, geom=None):
        # The attributes that we want to pass along as-is
        as_is_attr_names = [
            'dataset_name', 'human_name', 'date_added', 'attribution',
            'description', 'update_freq', 'view_url', 'source_url',
            'num_shapes', 'contributor_name', 'contributor_email',
            'contributor_organization'
        ]

        as_is_attrs = [getattr(cls, name) for name in as_is_attr_names]

        # We need to apply some processing to the bounding box
        bbox = func.ST_AsGeoJSON(cls.bbox)
        attr_names = as_is_attr_names + ['bbox']
        attrs = as_is_attrs + [bbox]

        result = session.query(*attrs).filter(cls.is_ingested)
        listing = [dict(zip(attr_names, row)) for row in result]

        for dataset in listing:
            dataset['date_added'] = str(dataset['date_added'])

        if geom:
            listing = cls.add_intersections_to_index(listing, geom)

        return listing
示例#15
0
def form_detail_sql_query(validator, aggregate_points=False):
    dset = validator.dataset
    try:
        q = session.query(dset)
        if validator.conditions:
            q = q.filter(*validator.conditions)
    except Exception as e:
        return internal_error('Failed to construct column filters.', e)

    try:
        # Add time filters
        maker = FilterMaker(validator.vals, dataset=dset)
        q = q.filter(*maker.time_filters())

        # Add geom filter, if provided
        geom = validator.get_geom()
        if geom is not None:
            geom_filter = maker.geom_filter(geom)
            q = q.filter(geom_filter)
    except Exception as e:
        return internal_error('Failed to construct time and geometry filters.', e)

    #if the query specified a shape dataset, add a join to the sql query with that dataset
    shape_table = validator.vals.get('shape')
    if shape_table != None:
        shape_columns = ['{}.{} as {}'.format(shape_table.name, col.name, col.name) for col in shape_table.c]   
        if aggregate_points: 
            q = q.from_self(shape_table).filter(dset.c.geom.ST_Intersects(shape_table.c.geom)).group_by(shape_table)
        else:
            q = q.join(shape_table, dset.c.geom.ST_Within(shape_table.c.geom))
            #add columns from shape dataset to the select statement
            q = q.add_columns(*shape_columns)

    return q
示例#16
0
文件: api.py 项目: hectron/plenario
def weather_stations():
    #print "weather_stations()"
    raw_query_params = request.args.copy()
    #print "weather_stations(): raw_query_params=", raw_query_params

    stations_table = Table('weather_stations', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    valid_query, query_clauses, resp, status_code = make_query(stations_table,raw_query_params)
    if valid_query:
        resp['meta']['status'] = 'ok'
        base_query = session.query(stations_table)
        for clause in query_clauses:
            print "weather_stations(): filtering on clause", clause
            base_query = base_query.filter(clause)
        values = [r for r in base_query.all()]
        fieldnames = [f for f in stations_table.columns.keys()]
        for value in values:
            d = {f:getattr(value, f) for f in fieldnames}
            loc = str(value.location)
            d['location'] = loads(loc.decode('hex')).__geo_interface__
            resp['objects'].append(d)
    resp['meta']['query'] = raw_query_params
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp
示例#17
0
    def make_grid(self, resolution, geom=None, conditions=None):
        """
        :param resolution: length of side of grid square in meters
        :type resolution: int
        :param geom: string representation of geojson fragment
        :type geom: str
        :param conditions: conditions on columns to filter on
        :type conditions: list of SQLAlchemy binary operations
                          (e.g. col > value)
        :return: grid: result proxy with all result rows
                 size_x, size_y: the horizontal and vertical size
                                    of the grid squares in degrees
        """
        if conditions is None:
            conditions = []

        # We need to convert resolution (given in meters) to degrees
        # - which is the unit of measure for EPSG 4326 -
        # - in order to generate our grid.
        center = self.get_bbox_center()
        # center[1] is longitude
        size_x, size_y = get_size_in_degrees(resolution, center[1])

        # Generate a count for each resolution by resolution square
        t = self.point_table
        q = session.query(func.count(t.c.hash),
                          func.ST_SnapToGrid(t.c.geom, size_x, size_y)
                          .label('squares'))\
            .filter(*conditions)\
            .group_by('squares')

        if geom:
            q = q.filter(t.c.geom.ST_Within(func.ST_GeomFromGeoJSON(geom)))

        return session.execute(q), size_x, size_y
示例#18
0
    def index(cls, geom=None):
        # The attributes that we want to pass along as-is
        as_is_attr_names = ['dataset_name', 'human_name', 'date_added',
                            'attribution', 'description', 'update_freq',
                            'view_url', 'source_url', 'num_shapes']

        as_is_attrs = [getattr(cls, name) for name in as_is_attr_names]

        # We need to apply some processing to the bounding box
        bbox = func.ST_AsGeoJSON(cls.bbox)
        attr_names = as_is_attr_names + ['bbox']
        attrs = as_is_attrs + [bbox]

        result = session.query(*attrs).filter(cls.is_ingested)
        listing = [dict(zip(attr_names, row)) for row in result]

        for dataset in listing:
            dataset['date_added'] = str(dataset['date_added'])

        if geom:
            listing = cls.add_intersections_to_index(listing, geom)

        listing = cls._add_fields_to_index(listing)

        return listing
示例#19
0
文件: views.py 项目: EzanLTD/plenario
def approve_dataset(source_url_hash):
    # get the MetaTable row and change the approved_status and bounce back to view-datasets.

    meta = session.query(MetaTable).get(source_url_hash)

    json_data_types = None
    if ((not meta.is_socrata_source) and meta.contributed_data_types):
        json_data_types = json.loads(meta.contributed_data_types)

    add_dataset_task.delay(source_url_hash, data_types=json_data_types)

    upd = {'approved_status': 'true'}

    meta.approved_status = 'true'
    session.commit()

    # Email the user who submitted that their dataset has been approved.
    # email the response to somebody

    msg_body = """Hello %s,\r\n
\r\n
Your dataset has been approved and added to Plenar.io:\r\n
\r\n
%s\r\n
\r\n
It should appear on http://plenar.io within 24 hours.\r\n
\r\n
Thank you!\r\n
The Plenario Team\r\n
http://plenar.io""" % (meta.contributor_name, meta.human_name)

    send_mail(subject="Your dataset has been added to Plenar.io",
              recipient=meta.contributor_email,
              body=msg_body)
示例#20
0
文件: point.py 项目: carhart/plenario
def dataset_fields(dataset_name):
    try:
        resp = json_response_base(None, [],
                                  query={'dataset_name': dataset_name})
        status_code = 200

        # get json and convert it to a dictionary
        columns = session.query(MetaTable.column_names)\
                         .filter(MetaTable.dataset_name == dataset_name)\
                         .first()[0]

        # return formatted list of column information
        resp['objects'] = [{
            'field_name': key,
            'field_type': value
        } for key, value in columns.items()]

        resp = make_response(json.dumps(resp), status_code)

    except NoSuchTableError:
        error_msg = "'%s' is not a valid table name" % dataset_name
        resp = bad_request(error_msg)

    resp.headers['Content-Type'] = 'application/json'
    return resp
示例#21
0
def admin_add_dataset():
    user = session.query(User).get(flask_session['user_id'])
    context = {'is_admin': True,
               'contributor_name': user.name,
               'contributor_organization': 'Plenario Admin',
               'contributor_email': user.email}
    return add(context)
示例#22
0
def weather_stations():
    #print "weather_stations()"
    raw_query_params = request.args.copy()
    #print "weather_stations(): raw_query_params=", raw_query_params

    stations_table = Table('weather_stations',
                           Base.metadata,
                           autoload=True,
                           autoload_with=engine,
                           extend_existing=True)
    valid_query, query_clauses, resp, status_code = make_query(
        stations_table, raw_query_params)
    if valid_query:
        resp['meta']['status'] = 'ok'
        base_query = session.query(stations_table)
        for clause in query_clauses:
            print "weather_stations(): filtering on clause", clause
            base_query = base_query.filter(clause)
        values = [r for r in base_query.all()]
        fieldnames = [f for f in stations_table.columns.keys()]
        for value in values:
            d = {f: getattr(value, f) for f in fieldnames}
            loc = str(value.location)
            d['location'] = loads(loc.decode('hex')).__geo_interface__
            resp['objects'].append(d)
    resp['meta']['query'] = raw_query_params
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp
示例#23
0
def contrib_view():
    dataset_info = {}
    errors = []
    socrata_source = False

    url = ""
    dataset_id = None
    md = None

    if request.args.get('dataset_url'):
        url = request.args.get('dataset_url')
        (dataset_info, errors, socrata_source) = get_context_for_new_dataset(url)

        # check if dataset with the same URL has already been loaded
        dataset_id = md5(url).hexdigest()
        md = session.query(MetaTable).get(dataset_id)
        if md:
            errors.append("A dataset with that URL has already been loaded: '%s'" % md.human_name)

    if request.method == 'POST' and not md:
        md = add_dataset_to_metatable(request, url, dataset_id, dataset_info, socrata_source, approved_status=False)

        # email a confirmation to the submitter
        msg_body = """Hello %s,\r\n\r\n
We received your recent dataset submission to Plenar.io:\r\n\r\n%s\r\n\r\n
After we review it, we'll notify you when your data is loaded and available.\r\n\r\n
Thank you!\r\nThe Plenario Team\r\nhttp://plenar.io""" % (request.form.get('contributor_name'), md.human_name)

        send_mail(subject="Your dataset has been submitted to Plenar.io", 
            recipient=request.form.get('contributor_email'), body=msg_body)

        return redirect(url_for('views.contrib_thankyou'))

    context = {'dataset_info': dataset_info, 'form': request.form, 'errors': errors, 'socrata_source': socrata_source}
    return render_template('contribute.html', **context)
示例#24
0
def approve_dataset(source_url_hash):
    # get the MetaTable row and change the approved_status and bounce back to view-datasets.

    meta = session.query(MetaTable).get(source_url_hash)

    json_data_types = None
    if ((not meta.is_socrata_source) and meta.contributed_data_types):
        json_data_types = json.loads(meta.contributed_data_types)
        
    add_dataset_task.delay(source_url_hash, data_types=json_data_types)
    
    upd = { 'approved_status': 'true' }

    meta.approved_status = 'true'
    session.commit()

    # Email the user who submitted that their dataset has been approved.
    # email the response to somebody

    msg_body = """Hello %s,\r\n
\r\n
Your dataset has been approved and added to Plenar.io:\r\n
\r\n
%s\r\n
\r\n
It should appear on http://plenar.io within 24 hours.\r\n
\r\n
Thank you!\r\n
The Plenario Team\r\n
http://plenar.io""" % (meta.contributor_name, meta.human_name)

    send_mail(subject="Your dataset has been added to Plenar.io", 
        recipient=meta.contributor_email, body=msg_body)
示例#25
0
    def make_grid(self, resolution, geom=None, conditions=None):
        """
        :param resolution: length of side of grid square in meters
        :type resolution: int
        :param geom: string representation of geojson fragment
        :type geom: str
        :param conditions: conditions on columns to filter on
        :type conditions: list of SQLAlchemy binary operations
                          (e.g. col > value)
        :return: grid: result proxy with all result rows
                 size_x, size_y: the horizontal and vertical size
                                    of the grid squares in degrees
        """
        if conditions is None:
            conditions = []

        # We need to convert resolution (given in meters) to degrees
        # - which is the unit of measure for EPSG 4326 -
        # - in order to generate our grid.
        center = self.get_bbox_center()
        # center[1] is longitude
        size_x, size_y = get_size_in_degrees(resolution, center[1])

        # Generate a count for each resolution by resolution square
        t = self.point_table
        q = session.query(func.count(t.c.hash),
                          func.ST_SnapToGrid(t.c.geom, size_x, size_y)
                          .label('squares'))\
            .filter(*conditions)\
            .group_by('squares')

        if geom:
            q = q.filter(t.c.geom.ST_Within(func.ST_GeomFromGeoJSON(geom)))

        return session.execute(q), size_x, size_y
示例#26
0
def meta():
    # Doesn't require a table lookup,
    # so no params passed on construction
    validator = ParamValidator()
    validator.set_optional('dataset_name',
                           no_op_validator,
                           None)\
             .set_optional('location_geom__within',
                           geom_validator,
                           None)\
             .set_optional('obs_date__ge', date_validator, None)\
             .set_optional('obs_date__le', date_validator, None)

    err = validator.validate(request.args)
    if err:
        return bad_request(err)

    # Set up base select statement
    cols_to_return = [
        'human_name', 'dataset_name', 'source_url', 'view_url', 'obs_from',
        'obs_to', 'date_added', 'last_update', 'update_freq', 'attribution',
        'description', 'column_names'
    ]
    col_objects = [getattr(MetaTable, col) for col in cols_to_return]
    q = session.query(*col_objects)

    # What params did the user provide?
    dataset_name = validator.vals['dataset_name']
    geom = validator.get_geom()
    start_date = validator.vals['obs_date__ge']
    end_date = validator.vals['obs_date__le']

    # Filter over datasets if user provides full date range or geom
    should_filter = geom or (start_date and end_date)

    if dataset_name:
        # If the user specified a name, don't try any filtering.
        # Just spit back that dataset's metadata.
        q = q.filter(MetaTable.dataset_name == dataset_name)
    elif should_filter:
        if geom:
            intersects = sa.func.ST_Intersects(
                sa.func.ST_GeomFromGeoJSON(geom), MetaTable.bbox)
            q = q.filter(intersects)
        if start_date and end_date:
            q = q.filter(
                sa.and_(MetaTable.obs_from < end_date,
                        MetaTable.obs_to > start_date))
    # Otherwise, just send back all the datasets

    metadata_records = [dict(zip(cols_to_return, row)) for row in q.all()]
    resp = json_response_base(validator, metadata_records)

    resp['meta']['total'] = len(resp['objects'])
    status_code = 200
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp
示例#27
0
def approve_dataset(source_url_hash):
    # Approve it
    meta = session.query(MetaTable).get(source_url_hash)
    meta.approved_status = True
    session.commit()
    # Ingest it
    add_dataset_task.delay(source_url_hash)
    send_approval_email(meta.human_name, meta.contributor_name,
                        meta.contributor_email)
示例#28
0
def meta():
    # Doesn't require a table lookup,
    # so no params passed on construction
    validator = ParamValidator()
    validator.set_optional('dataset_name',
                           no_op_validator,
                           None)\
             .set_optional('location_geom__within',
                           geom_validator,
                           None)\
             .set_optional('obs_date__ge', date_validator, None)\
             .set_optional('obs_date__le', date_validator, None)

    err = validator.validate(request.args)
    if err:
        return bad_request(err)

    # Set up base select statement
    cols_to_return = ['human_name', 'dataset_name',
                      'source_url', 'view_url',
                      'obs_from', 'obs_to',
                      'date_added', 'last_update', 'update_freq',
                      'attribution', 'description', 'column_names']
    col_objects = [getattr(MetaTable, col) for col in cols_to_return]
    q = session.query(*col_objects)

    # What params did the user provide?
    dataset_name = validator.vals['dataset_name']
    geom = validator.get_geom()
    start_date = validator.vals['obs_date__ge']
    end_date = validator.vals['obs_date__le']

    # Filter over datasets if user provides full date range or geom
    should_filter = geom or (start_date and end_date)

    if dataset_name:
        # If the user specified a name, don't try any filtering.
        # Just spit back that dataset's metadata.
        q = q.filter(MetaTable.dataset_name == dataset_name)
    elif should_filter:
        if geom:
            intersects = sa.func.ST_Intersects(sa.func.ST_GeomFromGeoJSON(geom),
                                               MetaTable.bbox)
            q = q.filter(intersects)
        if start_date and end_date:
            q = q.filter(sa.and_(MetaTable.obs_from < end_date,
                                 MetaTable.obs_to > start_date))
    # Otherwise, just send back all the datasets

    metadata_records = [dict(zip(cols_to_return, row)) for row in q.all()]
    resp = json_response_base(validator, metadata_records)

    resp['meta']['total'] = len(resp['objects'])
    status_code = 200
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp
示例#29
0
    def validate(self):
        rv = Form.validate(self)
        if not rv:
            return False

        existing_name = db_session.query(User)\
            .filter(User.name == self.name.data).first()
        if existing_name:
            self.name.errors.append('Name is already registered')
            return False

        existing_email = db_session.query(User)\
            .filter(User.email == self.email.data).first()
        if existing_email:
            self.email.errors.append('Email address is already registered')
            return False
        
        return True
示例#30
0
    def validate(self):
        rv = Form.validate(self)
        if not rv:
            return False

        existing_name = db_session.query(User)\
            .filter(User.name == self.name.data).first()
        if existing_name:
            self.name.errors.append('Name is already registered')
            return False

        existing_email = db_session.query(User)\
            .filter(User.email == self.email.data).first()
        if existing_email:
            self.email.errors.append('Email address is already registered')
            return False

        return True
示例#31
0
def approve_shape(dataset_name):
    # Approve it
    meta = session.query(ShapeMetadata).get(dataset_name)
    meta.approved_status = True
    session.commit()
    # Ingest it
    add_shape_task.delay(dataset_name)
    send_approval_email(meta.human_name, meta.contributor_name,
                        meta.contributor_email)
示例#32
0
def approve_dataset(source_url_hash):
    # Approve it
    meta = session.query(MetaTable).get(source_url_hash)
    meta.approved_status = True
    session.commit()
    # Ingest it
    add_dataset_task.delay(source_url_hash)
    send_approval_email(meta.human_name, meta.contributor_name,
                        meta.contributor_email)
示例#33
0
def detail_query(args, aggregate=False):

    meta_params = ('dataset', 'shapeset', 'data_type', 'geom', 'offset', 'limit')
    meta_vals = (args.data.get(k) for k in meta_params)
    dataset, shapeset, data_type, geom, offset, limit = meta_vals

    # If there aren't tree filters provided, a little formatting is needed
    # to make the general filters into an 'and' tree.
    if not has_tree_filters(args.data):
        # Creates an AND condition tree and adds it to args.
        args.data[dataset.name + '__filter'] = request_args_to_condition_tree(
            request_args=args.data,
            ignore=['shapeset']
        )

    # Sort out the filter conditions from the rest of the user arguments.
    filters = {k: v for k, v in args.data.items() if 'filter' in k}

    # Get upset if they specify more than a dataset and shapeset filter.
    if len(filters) > 2:
        return bad_request("Too many table filters provided.")

    # Query the point dataset.
    q = session.query(dataset)

    # If the user specified a geom, filter results to those within its shape.
    if geom:
        q = q.filter(dataset.c.geom.ST_Within(
            sqlalchemy.func.ST_GeomFromGeoJSON(geom)
        ))

    # Retrieve the filters and build conditions from them if they exist.
    point_ctree = filters.get(dataset.name + '__filter')

    # If the user specified point dataset filters, parse and apply them.
    if point_ctree:
        point_conditions = parse_tree(dataset, point_ctree)
        q = q.filter(point_conditions)

    # If a user specified a shape dataset, it was either through the /shapes
    # enpoint, which uses the aggregate result, or through the /detail endpoint
    # which uses the joined result.
    if shapeset is not None:
        if aggregate:
            q = q.from_self(shapeset).filter(dataset.c.geom.ST_Intersects(shapeset.c.geom)).group_by(shapeset)
        else:
            shape_columns = ['{}.{} as {}'.format(shapeset.name, col.name, col.name) for col in shapeset.c]
            q = q.join(shapeset, dataset.c.geom.ST_Within(shapeset.c.geom))
            q = q.add_columns(*shape_columns)

        # If there's a filter specified for the shape dataset, apply those conditions.
        shape_ctree = filters.get(shapeset.name + '__filter')
        if shape_ctree:
            shape_conditions = parse_tree(shapeset, shape_ctree)
            q = q.filter(shape_conditions)

    return q
示例#34
0
 def set_shape(self, shape_dataset_name):
     shape_table_meta = session.query(ShapeMetadata).get(shape_dataset_name)
     if shape_table_meta:
         shape_table = shape_table_meta.shape_table
         self.cols += [
             '{}.{}'.format(shape_table.name, key)
             for key in shape_table.columns.keys()
         ]
         self.vals['shape'] = shape_table
示例#35
0
def admin_add_dataset():
    user = session.query(User).get(flask_session['user_id'])
    context = {
        'is_admin': True,
        'contributor_name': user.name,
        'contributor_organization': 'Plenario Admin',
        'contributor_email': user.email
    }
    return add(context)
示例#36
0
def approve_shape(dataset_name):
    # Approve it
    meta = session.query(ShapeMetadata).get(dataset_name)
    meta.approved_status = True
    session.commit()
    # Ingest it
    add_shape_task.delay(dataset_name)
    send_approval_email(meta.human_name, meta.contributor_name,
                        meta.contributor_email)
示例#37
0
文件: views.py 项目: EzanLTD/plenario
def edit_dataset(source_url_hash):
    form = EditDatasetForm()
    meta = session.query(MetaTable).get(source_url_hash)

    fieldnames = None
    num_rows = 0
    num_weather_observations = 0
    num_rows_w_censusblocks = 0

    if (meta.approved_status == 'true'):
        try:
            table_name = 'dat_%s' % meta.dataset_name

            table = Table(table_name,
                          Base.metadata,
                          autoload=True,
                          autoload_with=engine)
            fieldnames = table.columns.keys()
            pk_name = [p.name for p in table.primary_key][0]
            pk = table.c[pk_name]
            num_rows = session.query(pk).count()

            dat_master = Table('dat_master',
                               Base.metadata,
                               autoload=True,
                               autoload_with=engine)

            sel = session.query(func.count(dat_master.c.master_row_id)).filter(
                and_(dat_master.c.dataset_name == meta.dataset_name,
                     dat_master.c.dataset_row_id == pk,
                     dat_master.c.weather_observation_id.isnot(None)))

            num_weather_observations = sel.first()[0]

            sel = session.query(func.count(dat_master.c.master_row_id)).filter(
                and_(dat_master.c.dataset_name == meta.dataset_name,
                     dat_master.c.dataset_row_id == pk,
                     dat_master.c.census_block.isnot(None)))

            num_rows_w_censusblocks = sel.first()[0]

        except sqlalchemy.exc.NoSuchTableError, e:
            # dataset has been approved, but perhaps still processing.
            pass
示例#38
0
def weather(table):
    raw_query_params = request.args.copy()

    weather_table = Table('dat_weather_observations_%s' % table,
                          Base.metadata,
                          autoload=True,
                          autoload_with=engine,
                          extend_existing=True)
    stations_table = Table('weather_stations',
                           Base.metadata,
                           autoload=True,
                           autoload_with=engine,
                           extend_existing=True)
    valid_query, query_clauses, resp, status_code = make_query(
        weather_table, raw_query_params)
    if valid_query:
        resp['meta']['status'] = 'ok'
        base_query = session.query(weather_table, stations_table)\
            .join(stations_table,
            weather_table.c.wban_code == stations_table.c.wban_code)
        for clause in query_clauses:
            base_query = base_query.filter(clause)

        base_query = base_query.order_by(weather_table.c.id.asc())
        base_query = base_query.limit(
            RESPONSE_LIMIT)  # returning the top 1000 records
        if raw_query_params.get('offset'):
            offset = raw_query_params['offset']
            base_query = base_query.offset(int(offset))
        values = [r for r in base_query.all()]
        weather_fields = weather_table.columns.keys()
        station_fields = stations_table.columns.keys()
        weather_data = {}
        station_data = {}
        for value in values:
            wd = {f: getattr(value, f) for f in weather_fields}
            sd = {f: getattr(value, f) for f in station_fields}
            if weather_data.get(value.wban_code):
                weather_data[value.wban_code].append(wd)
            else:
                weather_data[value.wban_code] = [wd]
            loc = str(value.location)
            sd['location'] = loads(loc.decode('hex')).__geo_interface__
            station_data[value.wban_code] = sd
        for station_id in weather_data.keys():
            d = {
                'station_info': station_data[station_id],
                'observations': weather_data[station_id],
            }
            resp['objects'].append(d)
        resp['meta']['total'] = sum(
            [len(r['observations']) for r in resp['objects']])
    resp['meta']['query'] = raw_query_params
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp
示例#39
0
def meta():
    status_code = 200
    resp = {
            'meta': {
                'status': 'ok',
                'message': '',
            },
            'objects': []
        }
    dataset_name = request.args.get('dataset_name')
    if dataset_name:
        metas = session.query(MetaTable)\
            .filter(MetaTable.dataset_name == dataset_name).all()
    else:
        metas = session.query(MetaTable).all()
    resp['objects'].extend([m.as_dict() for m in metas])
    resp['meta']['total'] = len(resp['objects'])
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp
示例#40
0
def meta():
    status_code = 200
    resp = {
        'meta': {
            'status': 'ok',
            'message': '',
        },
        'objects': []
    }
    dataset_name = request.args.get('dataset_name')
    if dataset_name:
        metas = session.query(MetaTable)\
            .filter(MetaTable.dataset_name == dataset_name).all()
    else:
        metas = session.query(MetaTable).all()
    resp['objects'].extend([m.as_dict() for m in metas])
    resp['meta']['total'] = len(resp['objects'])
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp
示例#41
0
def export_dataset_to_response(dataset_name, query=None):
    """
    :param dataset_name: Name of shape dataset. Expected to be found in meta_shape table.
    :param query: Optional SQL query to be executed on shape dataset to filter results
    Expected query parameter: `data_type`. We expect it to be one of 'json', 'kml', or 'shapefile'.
                                If none of these (or unspecified), return JSON.
    :return: response with geoJSON data and response code
    """

    # Do we have this shape?
    shape_dataset = session.query(ShapeMetadata).get(dataset_name)
    if not (shape_dataset and shape_dataset.is_ingested):
        error_message = 'Could not find shape dataset {}'.format(dataset_name)
        return make_response(error_message, 404)

    # What file format does the user want it in?
    export_format = request.args.get('data_type')
    # json is default export type
    if not export_format:
        export_format = u'json'
    export_format = unicode.lower(export_format)

    # Make a filename that we are reasonably sure to be unique and not occupied by anyone else.
    sacrifice_file = tempfile.NamedTemporaryFile()
    export_path = sacrifice_file.name
    sacrifice_file.close()  # Removes file from system.

    try:
        # Write to that filename
        OgrExport(export_format=export_format,
                  table_name=dataset_name,
                  export_path=export_path,
                  query=query).write_file()
        # Dump it in the response
        with open(export_path, 'r') as to_export:
            resp = make_response(to_export.read(), 200)

        # Make the downloaded filename look nice
        resp.headers['Content-Type'] = _shape_format_to_content_header(
            export_format)
        disp_header = 'attachment; filename={name}.{ext}'.format(
            name=shape_dataset.human_name,
            ext=_shape_format_to_file_extension(export_format))
        resp.headers['Content-Disposition'] = disp_header
        return resp
    except Exception as e:
        error_message = 'Failed to export shape dataset {}'.format(
            dataset_name)
        print repr(e)
        return make_response(error_message, 500)
    finally:
        # Don't leave that file hanging around.
        if os.path.isfile(export_path):
            os.remove(export_path)
示例#42
0
def update_shape(self, table_name):
    # Associate the dataset with this celery task
    # so we can check on the task's status
    meta = session.query(ShapeMetadata).get(table_name)
    meta.celery_task_id = self.request.id
    session.commit()

    # Update the shapefile
    ShapeETL(meta=meta).update()
    return 'Finished updating shape dataset {} from {}.'.\
        format(meta.dataset_name, meta.source_url)
示例#43
0
def update_shape(self, table_name):
    # Associate the dataset with this celery task
    # so we can check on the task's status
    meta = session.query(ShapeMetadata).get(table_name)
    meta.celery_task_id = self.request.id
    session.commit()

    # Update the shapefile
    ShapeETL(meta=meta).update()
    return 'Finished updating shape dataset {} from {}.'.\
        format(meta.dataset_name, meta.source_url)
示例#44
0
    def test_delete_shape(self):
        # Can we remove a shape that's fully ingested?
        city_meta = session.query(ShapeMetadata).get(fixtures["city"].table_name)
        self.assertIsNotNone(city_meta)
        city_meta.remove_table(caller_session=session)
        session.commit()
        city_meta = session.query(ShapeMetadata).get(fixtures["city"].table_name)
        self.assertIsNone(city_meta)

        # Can we remove a shape that's only in the metadata?
        dummy_meta = session.query(ShapeMetadata).get(self.dummy_name)
        self.assertIsNotNone(dummy_meta)
        dummy_meta.remove_table(caller_session=session)
        session.commit()
        dummy_meta = session.query(ShapeMetadata).get(self.dummy_name)
        self.assertIsNone(dummy_meta)

        # Add them back to return to original test state
        ShapeTests.ingest_fixture(fixtures["city"])
        ShapeMetadata.add(caller_session=session, human_name=u"Dummy Name", source_url=None)
        session.commit()
示例#45
0
def update_meta(metadata, table):
    """
    After ingest/update, update the metadata registry to reflect
    :param metadata:
    :param table:
    """
    metadata.update_date_added()
    metadata.obs_from, metadata.obs_to =\
        session.query(func.min(table.c.point_date),
                      func.max(table.c.point_date)).first()

    bbox = session.query(
        func.ST_SetSRID(func.ST_Envelope(func.ST_Union(table.c.geom)),
                        4326)).first()[0]
    metadata.bbox = bbox
    session.add(metadata)
    try:
        session.commit()
    except:
        session.rollback()
        raise
示例#46
0
def view_datasets():
    datasets_pending = session.query(MetaTable).\
        filter(MetaTable.approved_status != True).\
        all()

    shapes_pending = session.query(ShapeMetadata).\
        filter(ShapeMetadata.approved_status != True).\
        all()

    try:
        q = text(''' 
            SELECT m.*, c.status, c.task_id
            FROM meta_master AS m 
            LEFT JOIN celery_taskmeta AS c 
              ON c.id = (
                SELECT id FROM celery_taskmeta 
                WHERE task_id = ANY(m.result_ids) 
                ORDER BY date_done DESC 
                LIMIT 1
              )
            WHERE m.approved_status = 'true'
        ''')
        with engine.begin() as c:
            datasets = list(c.execute(q))
    except NoSuchTableError:
        datasets = session.query(MetaTable)\
            .filter(MetaTable.approved_status == True)\
            .all()

    try:
        shape_datasets = ShapeMetadata.get_all_with_etl_status()
    except NoSuchTableError:
        # If we can't find shape metadata, soldier on.
        shape_datasets = None

    return render_template('admin/view-datasets.html',
                           datasets_pending=datasets_pending,
                           shapes_pending=shapes_pending,
                           datasets=datasets,
                           shape_datasets=shape_datasets)
示例#47
0
def view_datasets():
    datasets_pending = session.query(MetaTable).\
        filter(MetaTable.approved_status != True).\
        all()

    shapes_pending = session.query(ShapeMetadata).\
        filter(ShapeMetadata.approved_status != True).\
        all()

    try:
        q = text(''' 
            SELECT m.*, c.status, c.task_id
            FROM meta_master AS m 
            LEFT JOIN celery_taskmeta AS c 
              ON c.id = (
                SELECT id FROM celery_taskmeta 
                WHERE task_id = ANY(m.result_ids) 
                ORDER BY date_done DESC 
                LIMIT 1
              )
            WHERE m.approved_status = 'true'
        ''')
        with engine.begin() as c:
            datasets = list(c.execute(q))
    except NoSuchTableError:
        datasets = session.query(MetaTable)\
            .filter(MetaTable.approved_status == True)\
            .all()

    try:
        shape_datasets = ShapeMetadata.get_all_with_etl_status()
    except NoSuchTableError:
        # If we can't find shape metadata, soldier on.
        shape_datasets = None

    return render_template('admin/view-datasets.html',
                           datasets_pending=datasets_pending,
                           shapes_pending=shapes_pending,
                           datasets=datasets,
                           shape_datasets=shape_datasets)
示例#48
0
文件: views.py 项目: EzanLTD/plenario
def view_datasets():
    datasets_pending = session.query(MetaTable)\
        .filter(MetaTable.approved_status != 'true')\
        .all()

    counts = {
        'master_row_count':
        table_row_estimate('dat_master'),
        'weather_daily_row_count':
        table_row_estimate('dat_weather_observations_daily'),
        'weather_hourly_row_count':
        table_row_estimate('dat_weather_observations_hourly'),
        'census_block_row_count':
        table_row_estimate('census_blocks'),
    }

    try:
        celery_table = Table('celery_taskmeta',
                             Base.metadata,
                             autoload=True,
                             autoload_with=engine)
        q = text(''' 
            SELECT m.*, c.status, c.task_id
            FROM meta_master AS m 
            LEFT JOIN celery_taskmeta AS c 
              ON c.id = (
                SELECT id FROM celery_taskmeta 
                WHERE task_id = ANY(m.result_ids) 
                ORDER BY date_done DESC 
                LIMIT 1
              )
            WHERE m.approved_status = 'true'
        ''')
        datasets = []
        with engine.begin() as c:
            datasets = list(c.execute(q))
    except NoSuchTableError, e:
        datasets = session.query(MetaTable)\
        .filter(MetaTable.approved_status == 'true')\
        .all()
示例#49
0
 def test_update(self):
     # Try to ingest slightly changed shape
     fixture = fixtures['changed_neighborhoods']
     # Add the fixture to the registry first
     shape_meta = session.query(ShapeMetadata).get('chicago_neighborhoods')
     # Do a ShapeETL update
     ShapeETL(meta=shape_meta, source_path=fixture.path).update()
     t = shape_meta.shape_table
     sel = t.select().where(t.c['sec_neigh'] == 'ENGLEWOOD')
     res = engine.execute(sel).fetchall()
     altered_value = res[0]['pri_neigh']
     # I changed Englewood to Englerwood :P
     self.assertEqual(altered_value, 'Englerwood')
示例#50
0
def add_dataset():
    dataset_info = {}
    errors = []
    socrata_source = False

    url = ""
    dataset_id = None
    md = None

    if request.args.get('dataset_url'):
        url = request.args.get('dataset_url')
        (dataset_info, errors, socrata_source) = get_context_for_new_dataset(url)

        # populate contributor info from session
        user = session.query(User).get(flask_session['user_id'])
        dataset_info['contributor_name'] = user.name
        dataset_info['contributor_organization'] = 'Plenario Admin'
        dataset_info['contributor_email'] = user.email

        # check if dataset with the same URL has already been loaded
        dataset_id = md5(url).hexdigest()
        md = session.query(MetaTable).get(dataset_id)
        if md:
            errors.append("A dataset with that URL has already been loaded: '%s'" % md.human_name)

    if request.method == 'POST' and not md:
        md = add_dataset_to_metatable(request, url, dataset_id, dataset_info, socrata_source, approved_status=True)
        
        json_data_types = None
        if ((not md.is_socrata_source) and md.contributed_data_types):
            json_data_types = json.loads(md.contributed_data_types)

        add_dataset_task.delay(md.source_url_hash, data_types=json_data_types)
        
        flash('%s added successfully!' % md.human_name, 'success')
        return redirect(url_for('views.view_datasets'))
        
    context = {'dataset_info': dataset_info, 'errors': errors, 'socrata_source': socrata_source}
    return render_template('admin/add-dataset.html', **context)
示例#51
0
def delete_dataset(self, source_url_hash):
    md = session.query(MetaTable).get(source_url_hash)
    try:
        dat_table = md.point_table
        dat_table.drop(engine, checkfirst=True)
    except NoSuchTableError:
        # Move on so we can get rid of the metadata
        pass
    session.delete(md)
    try:
        session.commit()
    except InternalError, e:
        raise delete_dataset.retry(exc=e)
示例#52
0
def edit_dataset(source_url_hash):
    form = EditDatasetForm()
    meta = session.query(MetaTable).get(source_url_hash)

    fieldnames = None
    if (meta.approved_status == 'true'):
        try:
            table = Table('dat_%s' % meta.dataset_name, Base.metadata,
                          autoload=True, autoload_with=engine)
            fieldnames = table.columns.keys()
        except sqlalchemy.exc.NoSuchTableError, e:
            # dataset has been approved, but perhaps still processing.
            pass
示例#53
0
def update_meta(metadata, table):
    """
    After ingest/update, update the metadata registry to reflect
    :param metadata:
    :param table:
    """
    metadata.update_date_added()
    metadata.obs_from, metadata.obs_to =\
        session.query(func.min(table.c.point_date),
                      func.max(table.c.point_date)).first()

    bbox = session.query(func.ST_SetSRID(
                                         func.ST_Envelope(func.ST_Union(table.c.geom)),
                                         4326
                                         )).first()[0]
    metadata.bbox = bbox
    session.add(metadata)
    try:
        session.commit()
    except:
        session.rollback()
        raise
示例#54
0
def delete_dataset(self, source_url_hash):
    md = session.query(MetaTable).get(source_url_hash)
    try:
        dat_table = md.point_table
        dat_table.drop(engine, checkfirst=True)
    except NoSuchTableError:
        # Move on so we can get rid of the metadata
        pass
    session.delete(md)
    try:
        session.commit()
    except InternalError, e:
        raise delete_dataset.retry(exc=e)
示例#55
0
 def test_update(self):
     # Try to ingest slightly changed shape
     fixture = fixtures['changed_neighborhoods']
     # Add the fixture to the registry first
     shape_meta = session.query(ShapeMetadata).get('chicago_neighborhoods')
     # Do a ShapeETL update
     ShapeETL(meta=shape_meta, source_path=fixture.path).update()
     t = shape_meta.shape_table
     sel = t.select().where(t.c['sec_neigh'] == 'ENGLEWOOD')
     res = engine.execute(sel).fetchall()
     altered_value = res[0]['pri_neigh']
     # I changed Englewood to Englerwood :P
     self.assertEqual(altered_value, 'Englerwood')
示例#56
0
def edit_dataset(source_url_hash):
    form = EditDatasetForm()
    meta = session.query(MetaTable).get(source_url_hash)

    fieldnames = None
    num_rows = 0
    num_weather_observations = 0
    num_rows_w_censusblocks = 0
    
    if (meta.approved_status == 'true'):
        try:
            table_name = 'dat_%s' % meta.dataset_name
            
            table = Table(table_name, Base.metadata,
                          autoload=True, autoload_with=engine)
            fieldnames = table.columns.keys()
            pk_name  =[p.name for p in table.primary_key][0]
            pk = table.c[pk_name]
            num_rows = session.query(pk).count()

            dat_master = Table('dat_master', Base.metadata, autoload=True, autoload_with=engine)

            sel = session.query(func.count(dat_master.c.master_row_id)).filter(and_(dat_master.c.dataset_name==meta.dataset_name,
                                                                                    dat_master.c.dataset_row_id==pk,
                                                                                    dat_master.c.weather_observation_id.isnot(None)))

            num_weather_observations = sel.first()[0]

            sel = session.query(func.count(dat_master.c.master_row_id)).filter(and_(dat_master.c.dataset_name==meta.dataset_name,
                                                                                    dat_master.c.dataset_row_id==pk,
                                                                                    dat_master.c.census_block.isnot(None)))

            num_rows_w_censusblocks = sel.first()[0]

            
        except sqlalchemy.exc.NoSuchTableError, e:
            # dataset has been approved, but perhaps still processing.
            pass
示例#57
0
def dataset_status():

    source_url_hash = request.args.get("source_url_hash")

    q = ''' 
        SELECT 
          m.human_name, 
          m.source_url_hash,
          c.status, 
          c.date_done,
          c.traceback,
          c.task_id
        FROM meta_master AS m, 
        UNNEST(m.result_ids) AS ids 
        LEFT JOIN celery_taskmeta AS c 
          ON c.task_id = ids
        WHERE c.date_done IS NOT NULL 
    '''

    if source_url_hash:
        name = session.query(MetaTable).get(source_url_hash).dataset_name
        q = q + "AND m.source_url_hash = :source_url_hash"
    else:
        name = None

    q = q + " ORDER BY c.id DESC"

    with engine.begin() as c:
        results = list(c.execute(text(q), source_url_hash=source_url_hash))
    r = []
    for result in results:
        tb = None
        if result.traceback:
            tb = result.traceback\
                .replace('\r\n', '<br />')\
                .replace('\n\r', '<br />')\
                .replace('\n', '<br />')\
                .replace('\r', '<br />')
        d = {
            'human_name': result.human_name,
            'source_url_hash': result.source_url_hash,
            'status': result.status,
            'task_id': result.task_id,
            'traceback': tb,
            'date_done': None,
        }
        if result.date_done:
            d['date_done'] = result.date_done.strftime('%B %d, %Y %H:%M'),
        r.append(d)
    return render_template('admin/dataset-status.html', results=r, name=name)
示例#58
0
def update_dataset(self, source_url_hash):
    md = session.query(MetaTable).get(source_url_hash)
    if md.result_ids:
        ids = md.result_ids
        ids.append(self.request.id)
    else:
        ids = [self.request.id]
    with engine.begin() as c:
        c.execute(MetaTable.__table__.update()\
            .where(MetaTable.source_url_hash == source_url_hash)\
            .values(result_ids=ids))
    etl = PlenarioETL(md)
    etl.update()
    return 'Finished updating {0} ({1})'.format(md.human_name, md.source_url_hash)
示例#59
0
文件: views.py 项目: EzanLTD/plenario
def contrib_view():
    dataset_info = {}
    errors = []
    socrata_source = False

    url = ""
    dataset_id = None
    md = None

    if request.args.get('dataset_url'):
        url = request.args.get('dataset_url')
        (dataset_info, errors,
         socrata_source) = get_context_for_new_dataset(url)

        # check if dataset with the same URL has already been loaded
        dataset_id = md5(url).hexdigest()
        md = session.query(MetaTable).get(dataset_id)
        if md:
            errors.append(
                "A dataset with that URL has already been loaded: '%s'" %
                md.human_name)

    if request.method == 'POST' and not md:
        md = add_dataset_to_metatable(request,
                                      url,
                                      dataset_id,
                                      dataset_info,
                                      socrata_source,
                                      approved_status=False)

        # email a confirmation to the submitter
        msg_body = """Hello %s,\r\n\r\n
We received your recent dataset submission to Plenar.io:\r\n\r\n%s\r\n\r\n
After we review it, we'll notify you when your data is loaded and available.\r\n\r\n
Thank you!\r\nThe Plenario Team\r\nhttp://plenar.io""" % (
            request.form.get('contributor_name'), md.human_name)

        send_mail(subject="Your dataset has been submitted to Plenar.io",
                  recipient=request.form.get('contributor_email'),
                  body=msg_body)

        return redirect(url_for('views.contrib_thankyou'))

    context = {
        'dataset_info': dataset_info,
        'form': request.form,
        'errors': errors,
        'socrata_source': socrata_source
    }
    return render_template('contribute.html', **context)