Exemplo n.º 1
0
def upgrade(migrate_engine):
    # Upgrade operations go here. Don't create your own engine; bind
    # migrate_engine to your metadata
    meta.bind = migrate_engine


    #########################Account


    """ A view stores a specific configuration of a visualisation widget. """



    country = Table('country', meta,
                    Column('id', Integer, primary_key=True),
                    Column('gid', Integer, unique=True),
                    Column('geounit', Unicode(300), unique=True),
                    Column('label', Unicode(300)),
                    Column('pagesettings', MutableDict.as_mutable(JSONType), default=dict)
                    )

    country.create()



    pass
Exemplo n.º 2
0
def upgrade(migrate_engine):
    # Upgrade operations go here. Don't create your own engine; bind
    # migrate_engine to your metadata
    meta.bind = migrate_engine

    #########################Account
    """ A view stores a specific configuration of a visualisation widget. """

    country = Table(
        'country', meta, Column('id', Integer, primary_key=True),
        Column('gid', Integer, unique=True),
        Column('geounit', Unicode(300), unique=True),
        Column('label', Unicode(300)),
        Column('pagesettings', MutableDict.as_mutable(JSONType), default=dict))

    country.create()

    pass
def upgrade(migrate_engine):
    # Upgrade operations go here. Don't create your own engine; bind
    # migrate_engine to your metadata
    meta.bind = migrate_engine

    account = Table('account', meta, autoload=True)
    account = Table('dataset', meta, autoload=True)



    #########################Account

    dataview = Table('dataview', meta,
                    Column('id', Integer, primary_key=True),
                    Column('title', Unicode(500)),
                    Column('description', Unicode()),
                    Column('created_at', DateTime),
                    Column('updated_at', DateTime),
                    Column('urlhash', Unicode(2000)),
                    Column('account_id', Integer, ForeignKey('account.id')),
                    Column('cloned_dataview_id', Integer, ForeignKey('dataview.id')),
                    Column('settings', MutableDict.as_mutable(JSONType), default=dict)
                    )

    dataview.create()


    ################## ManytoMany accounts to datasets
    dataview_dataset_table = Table(
        'dataview_dataset', meta,
        Column('dataview_id', Integer, ForeignKey('dataview.id'),
               primary_key=True),
        Column('dataset_id', Integer, ForeignKey('dataset.id'),
               primary_key=True)
    )

    dataview_dataset_table.create()

    pass
Exemplo n.º 4
0
def upgrade(migrate_engine):
    # Upgrade operations go here. Don't create your own engine; bind
    # migrate_engine to your metadata
    meta.bind = migrate_engine

    account = Table('account', meta, autoload=True)
    account = Table('dataset', meta, autoload=True)

    #########################Account

    dataview = Table(
        'dataview', meta, Column('id', Integer, primary_key=True),
        Column('title', Unicode(500)), Column('description', Unicode()),
        Column('created_at', DateTime), Column('updated_at', DateTime),
        Column('urlhash', Unicode(2000)),
        Column('account_id', Integer, ForeignKey('account.id')),
        Column('cloned_dataview_id', Integer, ForeignKey('dataview.id')),
        Column('settings', MutableDict.as_mutable(JSONType), default=dict))

    dataview.create()

    ################## ManytoMany accounts to datasets
    dataview_dataset_table = Table(
        'dataview_dataset', meta,
        Column('dataview_id',
               Integer,
               ForeignKey('dataview.id'),
               primary_key=True),
        Column('dataset_id',
               Integer,
               ForeignKey('dataset.id'),
               primary_key=True))

    dataview_dataset_table.create()

    pass
Exemplo n.º 5
0
class Dataset(TableHandler, db.Model):
    """ The dataset is the core entity of any access to data. All
    requests to the actual data store are routed through it, as well
    as data loading and model generation.

    The dataset keeps an in-memory representation of the data model
    (including all dimensions and measures) which can be used to
    generate necessary queries.
    """
    __tablename__ = 'dataset'

    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.Unicode(255), unique=True)
    label = db.Column(db.Unicode(2000))
    description = db.Column(db.Unicode())
    currency = db.Column(db.Unicode())
    default_time = db.Column(db.Unicode())
    schema_version = db.Column(db.Unicode())
    entry_custom_html = db.Column(db.Unicode())
    ckan_uri = db.Column(db.Unicode())
    category = db.Column(db.Unicode())
    serp_title = db.Column(db.Unicode(), nullable=True)
    serp_teaser = db.Column(db.Unicode(), nullable=True)
    private = db.Column(db.Boolean, default=False)
    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime,
                           default=datetime.utcnow,
                           onupdate=datetime.utcnow)
    data = db.Column(MutableDict.as_mutable(JSONType), default=dict)

    languages = db.association_proxy('_languages', 'code')
    territories = db.association_proxy('_territories', 'code')

    def __init__(self, data):
        self.data = data.copy()
        dataset = self.data['dataset']
        del self.data['dataset']
        self.label = dataset.get('label')
        self.name = dataset.get('name')
        self.description = dataset.get('description')
        self.currency = dataset.get('currency')
        self.category = dataset.get('category')
        self.serp_title = dataset.get('serp_title')
        self.serp_teaser = dataset.get('serp_teaser')
        self.default_time = dataset.get('default_time')
        self.entry_custom_html = dataset.get('entry_custom_html')
        self.languages = dataset.get('languages', [])
        self.territories = dataset.get('territories', [])
        self.ckan_uri = dataset.get('ckan_uri')
        self._load_model()

    @property
    def model(self):
        model = self.data.copy()
        model['dataset'] = self.as_dict()
        return model

    @property
    def mapping(self):
        return self.data.get('mapping', {})

    @db.reconstructor
    def _load_model(self):
        """ Construct the in-memory object representation of this
        dataset's dimension and measures model.

        This is called upon initialization and deserialization of
        the dataset from the SQLAlchemy store.
        """
        self.dimensions = []
        self.measures = []
        for dim, data in self.mapping.items():
            if data.get('type') == 'measure' or dim == 'amount':
                self.measures.append(Measure(self, dim, data))
                continue
            elif data.get('type') == 'date' or \
                    (dim == 'time' and data.get('datatype') == 'date'):
                dimension = DateDimension(self, dim, data)
            elif data.get('type') in ['value', 'attribute']:
                dimension = AttributeDimension(self, dim, data)
            else:
                dimension = CompoundDimension(self, dim, data)
            self.dimensions.append(dimension)
        self.init()
        self._is_generated = None

    def __getitem__(self, name):
        """ Access a field (dimension or measure) by name. """
        for field in self.fields:
            if field.name == name:
                return field
        raise KeyError()

    def __contains__(self, name):
        try:
            self[name]
            return True
        except KeyError:
            return False

    @property
    def fields(self):
        """ Both the dimensions and metrics in this dataset. """
        return self.dimensions + self.measures

    @property
    def compounds(self):
        """ Return only compound dimensions. """
        return filter(lambda d: isinstance(d, CompoundDimension),
                      self.dimensions)

    @property
    def facet_dimensions(self):
        return [d for d in self.dimensions if d.facet]

    def init(self):
        """ Create a SQLAlchemy model for the current dataset model,
        without creating the tables and columns. This needs to be
        called both for access to the data and in order to generate
        the model physically. """
        self.bind = db.engine
        self.meta = db.MetaData()
        # self.tx = self.bind.begin()
        self.meta.bind = db.engine

        self._init_table(self.meta, self.name, 'entry', id_type=db.Unicode(42))
        for field in self.fields:
            field.column = field.init(self.meta, self.table)
        self.alias = self.table.alias('entry')

    def generate(self):
        """ Create the tables and columns necessary for this dataset
        to keep data.
        """
        for field in self.fields:
            field.generate(self.meta, self.table)
        for dim in self.dimensions:
            if isinstance(dim, CompoundDimension):
                self.table.append_constraint(
                    ForeignKeyConstraint(
                        [dim.name + '_id'],
                        [dim.table.name + '.id'],
                        # use_alter=True,
                        name='fk_' + self.name + '_' + dim.name))
        self._generate_table()
        self._is_generated = True

    @property
    def is_generated(self):
        if self._is_generated is None:
            self._is_generated = self.table.exists()
        return self._is_generated

    @property
    def has_badges(self):
        """
        Property that returns True if the dataset has been given any badges
        """
        # Cast the badge count as a boolean and return it
        return bool(self.badges.count())

    def commit(self):
        pass
        # self.tx.commit()
        # self.tx = self.bind.begin()

    def _make_key(self, data):
        """ Generate a unique identifier for an entry. This is better
        than SQL auto-increment because it is stable across mutltiple
        loads and thus creates stable URIs for entries.
        """
        uniques = [self.name]
        for field in self.fields:
            if not field.key:
                continue
            obj = data.get(field.name)
            if isinstance(obj, dict):
                obj = obj.get('name', obj.get('id'))
            uniques.append(obj)
        return hash_values(uniques)

    def load(self, data):
        """ Handle a single entry of data in the mapping source format,
        i.e. with all needed columns. This will propagate to all dimensions
        and set values as appropriate. """
        entry = dict()
        for field in self.fields:
            field_data = data[field.name]
            entry.update(field.load(self.bind, field_data))
        entry['id'] = self._make_key(data)
        self._upsert(self.bind, entry, ['id'])

    def flush(self):
        """ Delete all data from the dataset tables but leave the table
        structure intact.
        """
        for dimension in self.dimensions:
            dimension.flush(self.bind)
        self._flush(self.bind)

    def drop(self):
        """ Drop all tables created as part of this dataset, i.e. by calling
        ``generate()``. This will of course also delete the data itself.
        """
        self._drop(self.bind)
        for dimension in self.dimensions:
            dimension.drop(self.bind)
        self._is_generated = False

    def key(self, key):
        """ For a given ``key``, find a column to indentify it in a query.
        A ``key`` is either the name of a simple attribute (e.g. ``time``)
        or of an attribute of a complex dimension (e.g. ``to.label``). The
        returned key is using an alias, so it can be used in a query
        directly. """
        attr = None
        if '.' in key:
            key, attr = key.split('.', 1)
        dimension = self[key]
        if hasattr(dimension, 'alias'):
            attr_name = dimension[attr].column.name if attr else 'name'
            return dimension.alias.c[attr_name]
        return self.alias.c[dimension.column.name]

    def entries(self,
                conditions="1=1",
                order_by=None,
                limit=None,
                offset=0,
                step=10000,
                fields=None):
        """ Generate a fully denormalized view of the entries on this
        table. This view is nested so that each dimension will be a hash
        of its attributes.

        This is somewhat similar to the entries collection in the fully
        denormalized schema before OpenSpending 0.11 (MongoDB).
        """
        if not self.is_generated:
            return

        if fields is None:
            fields = self.fields

        joins = self.alias
        for d in self.dimensions:
            if d in fields:
                joins = d.join(joins)
        selects = [f.selectable for f in fields] + [self.alias.c.id]

        # enforce stable sorting:
        if order_by is None:
            order_by = [self.alias.c.id.asc()]

        for i in count():
            qoffset = offset + (step * i)
            qlimit = step
            if limit is not None:
                qlimit = min(limit - (step * i), step)
            if qlimit <= 0:
                break

            query = db.select(selects,
                              conditions,
                              joins,
                              order_by=order_by,
                              use_labels=True,
                              limit=qlimit,
                              offset=qoffset)
            rp = self.bind.execute(query)

            first_row = True
            while True:
                row = rp.fetchone()
                if row is None:
                    if first_row:
                        return
                    break
                first_row = False
                yield decode_row(row, self)

    def aggregate(self,
                  measures=['amount'],
                  drilldowns=[],
                  cuts=[],
                  page=1,
                  pagesize=10000,
                  order=[]):
        """ Query the dataset for a subset of cells based on cuts and
        drilldowns. It returns a structure with a list of drilldown items
        and a summary about the slice cutted by the query.

        ``measures``
            The numeric units to be aggregated over, defaults to
            [``amount``]. (type: `list`)
        ``drilldowns``
            Dimensions to drill down to. (type: `list`)
        ``cuts``
            Specification what to cut from the cube. This is a
            `list` of `two-tuples` where the first item is the dimension
            and the second item is the value to cut from. It is turned into
            a query where multible cuts for the same dimension are combined
            to an *OR* query and then the queries for the different
            dimensions are combined to an *AND* query.
        ``page``
            Page the drilldown result and return page number *page*.
            type: `int`
        ``pagesize``
            Page the drilldown result into page of size *pagesize*.
            type: `int`
        ``order``
            Sort the result based on the dimension *sort_dimension*.
            This may be `None` (*default*) or a `list` of two-`tuples`
            where the first element is the *dimension* and the second
            element is the order (`False` for ascending, `True` for
            descending).
            Type: `list` of two-`tuples`.

        Raises:

        :exc:`ValueError`
            If a cube is not yet computed. Call :meth:`compute` to compute
            the cube.
        :exc:`KeyError`
            If a drilldown, cut or order dimension is not part of this
            cube or the order dimensions are not a subset of the drilldown
            dimensions.

        Returns: A `dict` containing the drilldown and the summary:

          {"drilldown": [
              {"num_entries": 5545,
               "amount": 41087379002.0,
               "cofog1": {"description": "",
                          "label": "Economic affairs"}},
              ... ]
           "summary": {"amount": 7353306450299.0,
                       "num_entries": 133612}}

        """

        # Get the joins (aka alias) and the dataset
        joins = alias = self.alias
        dataset = self

        # Aggregation fields are all of the measures, so we create individual
        # summary fields with the sum function of SQLAlchemy
        fields = [db.func.sum(alias.c[m]).label(m) for m in measures]
        # We append an aggregation field that counts the number of entries
        fields.append(db.func.count(alias.c.id).label("entries"))
        # Create a copy of the statistics fields (for later)
        stats_fields = list(fields)

        # Create label map for time columns (year and month) for lookup
        # since they are found under the time attribute
        labels = {
            'year': dataset['time']['year'].column_alias.label('year'),
            'month': dataset['time']['yearmonth'].column_alias.label('month'),
        }

        # Get the dimensions we're interested in. These would be the drilldowns
        # and the cuts. For compound dimensions we are only interested in the
        # most significant one (e.g. for from.name we're interested in from)
        dimensions = drilldowns + [k for k, v in cuts]
        dimensions = [d.split('.')[0] for d in dimensions]

        # Loop over the dimensions as a set (to avoid multiple occurances)
        for dimension in set(dimensions):
            # If the dimension is year or month we're interested in 'time'
            if dimension in labels:
                dimension = 'time'
            # If the dimension table isn't in the automatic joins we add it
            if dimension not in [c.table.name for c in joins.columns]:
                joins = dataset[dimension].join(joins)

        # Drilldowns are performed using group_by SQL functions
        group_by = []
        for key in drilldowns:
            # If drilldown is in labels we append its mapped column to fields
            if key in labels:
                column = labels[key]
                group_by.append(column)
                fields.append(column)
            else:
                # Get the column from the dataset
                column = dataset.key(key)
                # If the drilldown is a compound dimension or the columns table
                # is in the joins we're already fetching the column so we just
                # append it to fields and the group_by
                if '.' in key or column.table == alias:
                    fields.append(column)
                    group_by.append(column)
                else:
                    # If not we add the column table to the fields and add all
                    # of that tables columns to the group_by
                    fields.append(column.table)
                    for col in column.table.columns:
                        group_by.append(col)

        # Cuts are managed using AND statements and we use a dict with set as
        # the default value to create the filters (cut on various values)
        conditions = db.and_()
        filters = defaultdict(set)

        for key, value in cuts:
            # If the key is in labels (year or month) we get the mapped column
            # else we get the column from the dataset
            if key in labels:
                column = labels[key]
            else:
                column = dataset.key(key)
            # We add the value to the set for that particular column
            filters[column].add(value)

        # Loop over the columns in the filter and add that to the conditions
        # For every value in the set we create and OR statement so we get e.g.
        # year=2007 AND (from.who == 'me' OR from.who == 'you')
        for attr, values in filters.items():
            conditions.append(db.or_(*[attr == v for v in values]))

        # Ordering can be set by a parameter or ordered by measures by default
        order_by = []
        # If no order is defined we default to order of the measures in the
        # order they occur (furthest to the left is most significant)
        if order is None or not len(order):
            order = [(m, True) for m in measures]

        # We loop through the order list to add the columns themselves
        for key, direction in order:
            # If it's a part of the measures we have to order by the
            # aggregated values (the sum of the measure)
            if key in measures:
                column = db.func.sum(alias.c[key]).label(key)
            # If it's in the labels we have to get the mapped column
            elif key in labels:
                column = labels[key]
            # ...if not we just get the column from the dataset
            else:
                column = dataset.key(key)
            # We append the column and set the direction (True == descending)
            order_by.append(column.desc() if direction else column.asc())

        # query 1: get overall sums.
        # Here we use the stats_field we saved earlier
        query = db.select(stats_fields, conditions, joins)
        rp = dataset.bind.execute(query)
        # Execute the query and turn them to a list so we can pop the
        # entry count and then zip the measurements and the totals together
        stats = list(rp.fetchone())
        num_entries = stats.pop()
        total = zip(measures, stats)

        # query 2: get total count of drilldowns
        if len(group_by):
            # Select 1 for each group in the group_by and count them
            query = db.select(['1'], conditions, joins, group_by=group_by)
            query = db.select([db.func.count('1')], '1=1', query.alias('q'))
            rp = dataset.bind.execute(query)
            num_drilldowns, = rp.fetchone()
        else:
            # If there are no drilldowns we still have to do one
            num_drilldowns = 1

        # The drilldown result list
        drilldown = []
        # The offset in the db, based on the page and pagesize (we have to
        # modify it since page counts starts from 1 but we count from 0
        offset = int((page - 1) * pagesize)

        # query 3: get the actual data
        query = db.select(fields,
                          conditions,
                          joins,
                          order_by=order_by,
                          group_by=group_by,
                          use_labels=True,
                          limit=pagesize,
                          offset=offset)
        rp = dataset.bind.execute(query)

        while True:
            # Get each row in the db result and append it, decoded, to the
            # drilldown result. The decoded version is a json represenation
            row = rp.fetchone()
            if row is None:
                break
            result = decode_row(row, dataset)
            drilldown.append(result)

        # Create the summary based on the stats_fields and other things
        # First we add a the total for each measurement in the root of the
        # summary (watch out!) and then we add various other, self-explanatory
        # statistics such as page, number of entries. The currency value is
        # strange since it's redundant for multiple measures but is left as is
        # for backwards compatibility
        summary = {key: value for (key, value) in total}
        summary.update({
            'num_entries':
            num_entries,
            'currency': {m: dataset.currency
                         for m in measures},
            'num_drilldowns':
            num_drilldowns,
            'page':
            page,
            'pages':
            int(math.ceil(num_drilldowns / float(pagesize))),
            'pagesize':
            pagesize
        })

        return {'drilldown': drilldown, 'summary': summary}

    def timerange(self):
        """
        Get the timerange of the dataset (based on the time attribute).
        Returns a tuple of (first timestamp, last timestamp) where timestamp
        is a datetime object
        """
        try:
            # Get the time column
            time = self.key('time')
            # We use SQL's min and max functions to get the timestamps
            query = db.session.query(db.func.min(time), db.func.max(time))
            # We just need one result to get min and max time
            return [
                datetime.strptime(date, '%Y-%m-%d') if date else None
                for date in query.one()
            ]
        except:
            return (None, None)

    def __repr__(self):
        return "<Dataset(%s:%s:%s)>" % (self.name, self.dimensions,
                                        self.measures)

    def __len__(self):
        if not self.is_generated:
            return 0
        rp = self.bind.execute(self.alias.count())
        return rp.fetchone()[0]

    def as_dict(self):
        return {
            'label': self.label,
            'name': self.name,
            'description': self.description,
            'default_time': self.default_time,
            'schema_version': self.schema_version,
            'currency': self.currency,
            'category': self.category,
            'serp_title': self.serp_title,
            'serp_teaser': self.serp_teaser,
            'timestamps': {
                'created': self.created_at,
                'last_modified': self.updated_at
            },
            'languages': list(self.languages),
            'territories': list(self.territories),
            'badges': [b.as_dict(short=True) for b in self.badges]
        }

    @classmethod
    def all_by_account(cls, account):
        """ Query available datasets based on dataset visibility. """
        criteria = [cls.private == false()]
        if account is not None:
            criteria += [
                "1=1" if account.admin else "1=2",
                cls.managers.any(type(account).id == account.id)
            ]
        q = db.session.query(cls).filter(db.or_(*criteria))
        q = q.order_by(cls.label.asc())
        return q

    @classmethod
    def by_name(cls, name):
        return db.session.query(cls).filter_by(name=name).first()
Exemplo n.º 6
0
class Dataset(db.Model):
    """ The dataset is the core entity of any access to data. All
    requests to the actual data store are routed through it, as well
    as data loading and model generation.

    """
    __tablename__ = 'dataset'
    __searchable__ = ['label', 'description']

    id = Column(Integer, primary_key=True)
    name = Column(Unicode(255), unique=True)
    label = Column(Unicode(2000))
    description = Column(Unicode())

    created_at = Column(DateTime, default=datetime.utcnow)
    updated_at = Column(DateTime,
                        default=datetime.utcnow,
                        onupdate=datetime.utcnow)

    datalastupdated = Column(DateTime, default=datetime.utcnow)

    source_id = Column(Integer, ForeignKey('source.id'))
    source = relationship(Source, backref=backref("dataset", uselist=False))

    mapping = Column(MutableDict.as_mutable(JSONType), default=dict)

    ORoperations = Column(MutableDict.as_mutable(JSONType), default=dict)

    prefuncs = Column(MutableDict.as_mutable(JSONType), default=dict)

    dataType = Column(Unicode(2000))

    published = Column(Boolean, default=False)

    loaded = Column(Boolean, default=False)

    tested = Column(Boolean, default=False)

    dataorg_id = Column(Integer, ForeignKey('dataorg.id'))
    dataorg = relationship(DataOrg,
                           backref=backref('datasets', lazy='dynamic'))

    metadataorg_id = Column(Integer, ForeignKey('metadataorg.id'))
    metadataorg = relationship(MetadataOrg,
                               backref=backref('datasets', lazy='dynamic'))

    years = Column(Unicode(1000))

    stats = Column(Unicode(50))

    #TODO
    #tag stuff

    def __init__(self, data=None):
        if data == None:
            return
        self.label = data.get('label')
        if (data.get('name', None)):
            self.name = slugify(str(data.get('name')),
                                max_length=30,
                                separator="_")
        else:
            self.name = slugify(str(data.get('label')),
                                max_length=30,
                                separator="_")

        #check if name is already taken
        if Dataset.by_name(self.name):
            for x in range(10):
                newname = self.name + "_" + str(x)
                if not Dataset.by_name(newname):
                    self.name = newname
                    break

        self.description = data.get('description')
        self.ORoperations = data.get('ORoperations', {})
        self.mapping = data.get('mapping', {})
        self.prefuncs = data.get('prefuncs', {})
        self.created_at = datetime.utcnow()
        self.dataType = data.get('dataType')
        if type(data.get('dataorg')) == int:
            self.dataorg = DataOrg.by_id(data.get('dataorg'))
        else:
            try:
                self.dataorg = data.get('dataorg')
            except Exception, e:
                print "failed to load the dataorg for dataset"
                print e
Exemplo n.º 7
0
class Source(db.Model):
    __tablename__ = 'source'

    id = Column(Integer, primary_key=True)
    url = Column(Unicode)
    created_at = Column(DateTime, default=datetime.utcnow)
    updated_at = Column(DateTime, onupdate=datetime.utcnow)
    analysis = Column(MutableDict.as_mutable(JSONType), default=dict)

    dataset_id = Column(Integer, ForeignKey('dataset.id'))
    dataset = relationship(Dataset,
                           backref=backref(
                               'sources',
                               lazy='dynamic',
                               order_by='Source.created_at.desc()'))

    creator_id = Column(Integer, ForeignKey('account.id'))
    creator = relationship(Account, backref=backref('sources', lazy='dynamic'))

    def __init__(self, dataset, creator, url):
        self.dataset = dataset
        self.creator = creator
        self.url = url

    @property
    def loadable(self):
        """
        Returns True if the source is ready to be imported into the
        database. Does not not require a sample run although it
        probably should.
        """
        # It shouldn't be loaded again into the database
        if self.successfully_loaded:
            return False
        # It needs mapping to be loadable
        if not len(self.dataset.mapping):
            return False
        # There can be no errors in the analysis of the source
        if 'error' in self.analysis:
            return False
        # All is good... proceed
        return True

    @property
    def successfully_sampled(self):
        """
        Returns True if any of this source's runs have been
        successfully sampled (a complete sample run). This shows
        whether the source is ready to be imported into the database
        """
        return True in [r.successful_sample for r in self.runs]

    @property
    def is_running(self):
        """
        Returns True if any of this source's runs have the status
        'running'. This shows whether the loading has been started or not
        to help avoid multiple loads of the same resource.
        """
        return True in [r.is_running for r in self.runs]

    @property
    def successfully_loaded(self):
        """
        Returns True if any of this source's runs have been
        successfully loaded (not a sample and no errors). This
        shows whether the source has been loaded into the database
        """
        return True in [r.successful_load for r in self.runs]

    def __repr__(self):
        try:
            return "<Source(%s,%s)>" % (self.dataset.name, self.url)
        except:
            return ''

    @classmethod
    def by_id(cls, id):
        return db.session.query(cls).filter_by(id=id).first()

    @classmethod
    def all(cls):
        return db.session.query(cls)

    def as_dict(self):
        return {
            "id": self.id,
            "url": self.url,
            "dataset": self.dataset.name,
            "created_at": self.created_at
        }
Exemplo n.º 8
0
class Dataview(db.Model):
    """ The dataset is the core entity of any access to data. All
    requests to the actual data store are routed through it, as well
    as data loading and model generation.

    """
    __tablename__ = 'dataview'

    id = Column(Integer, primary_key=True)
    title = Column(Unicode(500))
    description = Column(Unicode())

    created_at = Column(DateTime, default=datetime.utcnow)
    updated_at = Column(DateTime,
                        default=datetime.utcnow,
                        onupdate=datetime.utcnow)

    urlhash = Column(Unicode(2000), default=make_uuid)

    datasets = relationship(Dataset,
                            secondary=dataview_dataset_table,
                            backref=backref('dataviews', lazy='dynamic'))

    account_id = Column(Integer, ForeignKey('account.id'))
    account = relationship(Account, backref=backref("dataviews"))

    cloned_dataview_id = Column(Integer, ForeignKey('dataview.id'))

    settings = Column(MutableDict.as_mutable(JSONType), default=dict)

    def __init__(self, data=None):
        self.urlhash = make_uuid()
        if not data:
            return
        self.title = data.get("title")
        self.description = data.get("description")
        if current_user.is_authenticated():
            self.account = current_user
        self.settings = data.get("settings", {})
        self.cloned_dataview_id = data.get("cloned_dataview_id", None)

    def __repr__(self):
        return "<Dataview(%r,%r)>" % (self.id, self.title)

    def update(self, data):
        #not to update name
        self.title = data.get("title")
        self.description = data.get("description")
        self.datasets = data.get("datasets")
        self.settings = data.get("settings", {})

    def as_dict(self):
        return {
            'title': self.title,
            'description': self.description,
            'settings': self.settings
        }

    @classmethod
    def clone_dataview(cls, theobj):

        fields = ['title', 'description', 'settings', 'datasets', 'settings']
        classobj = cls()
        for field in fields:
            setattr(classobj, field, getattr(theobj, field))

        classobj.cloned_dataview_id = theobj.id

        db.session.add(classobj)
        db.session.commit()

        return classobj

    @classmethod
    def all_by_account(cls, account, order=True):
        """ Query available datasets based on dataset visibility. """
        return db.session.query(cls).filter_by(account_id=account.id).all()

    @classmethod
    def all(cls, order=True):
        """ Query available datasets based on dataset visibility. """
        q = db.session.query(cls)
        if order:
            q = q.order_by(cls.title.asc())
        return q

    @classmethod
    def by_urlhash(cls, urlhash):
        return db.session.query(cls).filter_by(urlhash=urlhash).first()

    @classmethod
    def by_user_settings(cls, settings, account_id):
        return db.session.query(cls).filter_by(settings=settings,
                                               account_id=account_id).first()

    @classmethod
    def by_id(cls, id):
        return db.session.query(cls).filter_by(id=id).first()
Exemplo n.º 9
0
def upgrade(migrate_engine):
   # Upgrade operations go here. Don't create your own engine; bind
    # migrate_engine to your metadata
    meta.bind = migrate_engine

    #########################Account
    account = Table('account', meta,
                    Column('id', Integer, primary_key=True),
                    Column('fullname', Unicode(2000)),
                    Column('email', Unicode(2000), unique=True),
                    Column('password', Unicode(2000)),
                    Column('api_key', Unicode(2000)),
                    Column('usg_group', Unicode(2000)),
                    Column('login_hash', Unicode(2000)),
                    Column('admin', Boolean, default=False),
                    Column('verified', Boolean, default=False) 
                    )

    account.create()


    ##################MetadataOrg

    metadataorg = Table('metadataorg', meta, 
                            Column('id', Integer, primary_key=True),
                            Column('label', Unicode(2000)),
                            Column('description', Unicode()),
                            Column('contactName', Unicode(2000)),
                            Column('contactEmail', Unicode(2000)),
                            Column('accessLevel', Unicode(2000)),
                            Column('bureauCode', Unicode(2000)),
                            Column('lastUpdated', DateTime)
                        )

    metadataorg.create()





    ################## DataOrg

    dataorg = Table('dataorg', meta,
                    Column('id', Integer, primary_key=True),
                    Column('label', Unicode(2000)),
                    Column('description', Unicode()),
                    Column('ORTemplate', MutableDict.as_mutable(JSONType), default=dict),
                    Column('mappingTemplate', MutableDict.as_mutable(JSONType), default=dict),
                    Column('prefuncs', MutableDict.as_mutable(JSONType), default=dict),
                    Column('lastUpdated', DateTime),
                    Column('metadataorg_id', Integer, ForeignKey('metadataorg.id'))
                    )


    dataorg.create()




    #####@###############Source


    source = Table('source', meta,
                    Column('id', Integer, primary_key=True),
                    Column('name', Unicode(255)),
                    Column('url', Unicode),
                    Column('created_at', DateTime, default=datetime.utcnow),
                    Column('updated_at', DateTime, default=datetime.utcnow,
                        onupdate=datetime.utcnow),
                    Column('ORid', BigInteger)
                    )



    source.create()



    ##################### SourceFile

    sourcefile = Table('sourcefile', meta,
                    Column('id', Integer, primary_key=True),
                    Column('rawfile', Unicode),
                    Column('source_id', Integer, ForeignKey('source.id')),
                    Column('created_at', DateTime, default=datetime.utcnow),
                    Column('updated_at', DateTime, default=datetime.utcnow,
                            onupdate=datetime.utcnow)  
                    )


    sourcefile.create()



    ####################Dataset


    dataset = Table('dataset', meta,
                    Column('id', Integer, primary_key=True),
                    Column('name', Unicode(255), unique=True),
                    Column('label', Unicode(2000)),
                    Column('description', Unicode),
                    Column('category', Unicode()),
                    Column('private', Boolean),
                    Column('created_at', DateTime, default=datetime.utcnow),
                    Column('updated_at', DateTime, default=datetime.utcnow,
                        onupdate=datetime.utcnow),
                    Column('datalastupdated', DateTime, default=datetime.utcnow),
                    Column('source_id', Integer, ForeignKey('source.id')),
                    Column('mapping', MutableDict.as_mutable(JSONType), default=dict),
                    Column('ORoperations', MutableDict.as_mutable(JSONType), default=dict),
                    Column('prefuncs', MutableDict.as_mutable(JSONType), default=dict),
                    Column('dataType', Unicode(2000)),
                    Column('published', Boolean, default=False),
                    Column('loaded', Boolean, default=False),
                    Column('tested', Boolean, default=False),
                    Column('dataorg_id', Integer, ForeignKey('dataorg.id'))
                    )

    dataset.create()



    ###########################Runs

    runs = Table('run', meta,
                Column('id', Integer, primary_key=True),
                Column('operation', Unicode(2000)),
                Column('status', Unicode(2000)),
                Column('time_start', DateTime, default=datetime.utcnow),
                Column('time_end', DateTime),
                Column('dataset_id', Integer, ForeignKey('dataset.id'), nullable=True),
                Column('source_id', Integer, ForeignKey('source.id'), nullable=True)
                )

    runs.create()



    ################## ManytoMany accounts to datasets
    account_dataset_table = Table(
        'account_dataset', meta,
        Column('dataset_id', Integer, ForeignKey('dataset.id'),
               primary_key=True),
        Column('account_id', Integer, ForeignKey('account.id'),
               primary_key=True)
    )

    account_dataset_table.create()


    pass
Exemplo n.º 10
0
class Country(db.Model):
    """ A view stores a specific configuration of a visualisation widget. """

    __tablename__ = 'country'
    __searchable__ = ['label']

    id = Column(Integer, primary_key=True)
    gid = Column(Integer, unique=True)
    geounit = Column(Unicode(300), unique=True)
    label = Column(Unicode(300))
    pagesettings = Column(MutableDict.as_mutable(JSONType), default=dict)

    def __init__(self, gid):
        #gid is the reference to the geometry tables

        #get and populate the data to the geometry_tables

        self.gid = gid

        result = db.engine.execute("SELECT \
                                    country_level0.name as geounit, \
                                    country_level0.label as label \
                                    FROM public.geometry__country_level0 as country_level0 \
                                    WHERE country_level0.gid = %s;" %
                                   (self.gid, ))
        tempobj = result.first()

        if not tempobj:
            pass

        self.geounit = tempobj['geounit']
        self.label = tempobj['label']

        pass

    @property
    def sovereignty(self):
        result = db.engine.execute("SELECT \
                                    country_level0.sovereignt as sovereignty \
                                    FROM public.geometry__country_level0 as country_level0 \
                                    WHERE country_level0.gid = %s;" %
                                   (self.gid, ))
        return result.first()['sovereignty']

    @property
    def regions(self):
        result = db.engine.execute("SELECT \
                            country_level0.sovereignt as sovereignty, \
                            country_level0.label as label, \
                            country_level0.continent as continent, \
                            country_level0.georegion as georegion, \
                            country_level0.dos_region as dos_region, \
                            country_level0.usaid_reg as usaid_reg, \
                            country_level0.dod_cmd as dod_cmd, \
                            country_level0.feed_the_f as feed_the_f, \
                            country_level0.region_un as region_un, \
                            country_level0.wb_inc_lvl as wb_inc_lvl \
                            FROM public.geometry__country_level0 as country_level0 \
                            WHERE country_level0.gid = %s" % (self.gid, ))
        tempobj = result.first()
        return tempobj

    @classmethod
    #@cache.memoize(timeout=360)
    def get_all_json(cls):
        regions = ['continent', 'georegion', 'dos_region', 'usaid_reg', 'dod_cmd',\
                    'feed_the_f', 'region_un', 'wb_inc_lvl']
        result = db.engine.execute("SELECT \
                            country_level0.name as geounit, \
                            country_level0.sovereignt as sovereignty, \
                            country_level0.label as label, \
                            country_level0.iso_a2 as iso_a2, \
                            country_level0.continent as continent, \
                            country_level0.georegion as georegion, \
                            country_level0.dos_region as dos_region, \
                            country_level0.usaid_reg as usaid_reg, \
                            country_level0.dod_cmd as dod_cmd, \
                            country_level0.feed_the_f as feed_the_f, \
                            country_level0.region_un as region_un, \
                            country_level0.wb_inc_lvl as wb_inc_lvl \
                            FROM public.geometry__country_level0 as country_level0 \
                            WHERE country_level0.label = country_level0.sovereignt \
                            ORDER BY country_level0.name;")
        output = []
        for country in result:
            tempreg = {}
            #tempreg = [country[reg] for reg in regions]
            for reg in regions:
                tempreg[reg] = country[reg]

            output.append({
                'geounit': country["geounit"],
                'label': country['label'],
                'iso_a2': country['iso_a2'],
                'regions': tempreg,
                'selected': False,
                'filtered': False,
                'id': country['iso_a2']
            })
        return output

    @classmethod
    def all(cls):
        """ Query available datasets based on dataset visibility. """
        q = db.session.query(cls)
        return q

    @classmethod
    def by_id(cls, id):
        return db.session.query(cls).filter_by(id=id).first()

    @classmethod
    def by_gid(cls, gid):
        return db.session.query(cls).filter_by(gid=gid).first()

    @classmethod
    def by_geounit(cls, geounit):
        return db.session.query(cls).filter_by(geounit=geounit).first()

    def __repr__(self):
        return "<Country(%r,%r)>" % (self.geounit, self.gid)
Exemplo n.º 11
0
class View(db.Model):
    """ A view stores a specific configuration of a visualisation widget. """

    __tablename__ = 'view'

    id = db.Column(db.Integer, primary_key=True)
    widget = db.Column(db.Unicode(2000))
    name = db.Column(db.Unicode(2000))
    label = db.Column(db.Unicode(2000))
    description = db.Column(db.Unicode())
    state = db.Column(MutableDict.as_mutable(JSONType), default=dict)
    public = db.Column(db.Boolean, default=False)

    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime, onupdate=datetime.utcnow)

    dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id'))
    account_id = db.Column(db.Integer,
                           db.ForeignKey('account.id'),
                           nullable=True)

    dataset = db.relationship(Dataset,
                              backref=db.backref(
                                  'views',
                                  cascade='all,delete,delete-orphan',
                                  lazy='dynamic'))

    account = db.relationship(Account,
                              backref=db.backref(
                                  'views',
                                  cascade='all,delete,delete-orphan',
                                  lazy='dynamic'))

    def __init__(self):
        pass

    @classmethod
    def by_id(cls, id):
        return db.session.query(cls).filter_by(id=id).first()

    @classmethod
    def by_name(cls, dataset, name):
        q = db.session.query(cls).filter_by(name=name)
        return q.filter_by(dataset=dataset).first()

    @classmethod
    def all_by_dataset(cls, dataset):
        return db.session.query(cls).filter_by(dataset=dataset)

    def as_dict(self):
        return {
            'id': self.id,
            'widget': self.widget,
            'name': self.name,
            'label': self.label,
            'description': self.description,
            'state': self.state,
            'public': self.public,
            'dataset': self.dataset.name,
            'account': self.account.name if self.account else None
        }

    def __repr__(self):
        return "<View(%s,%s)>" % (self.dataset.name, self.name)
Exemplo n.º 12
0
class DataOrg(db.Model):
    """ The dataset is the core entity of any access to data. All
    requests to the actual data store are routed through it, as well
    as data loading and model generation.

    The dataset keeps an in-memory representation of the data model
    (including all dimensions and measures) which can be used to
    generate necessary queries.
    """
    __tablename__ = 'dataorg'
    __searchable__ = ['label', 'description']

    id = Column(Integer, primary_key=True)
    label = Column(Unicode(2000))
    description = Column(Unicode())

    ORTemplate = Column(MutableDict.as_mutable(JSONType), default=dict)

    mappingTemplate = Column(MutableDict.as_mutable(JSONType), default=dict)

    prefuncs = Column(MutableDict.as_mutable(JSONType), default=dict)

    lastUpdated = Column(DateTime, onupdate=datetime.utcnow)

    #metadataorg_id = Column(Integer, ForeignKey('metadataorg.id'))
    # metadataorg = relationship(MetadataOrg,
    #                        backref=backref('dataorgs', lazy='dynamic'))

    def __init__(self, dataorg=None):
        if not dataorg:
            return
        self.label = dataorg.get('label')
        self.description = dataorg.get('description')
        self.ORTemplate = dataorg.get('ORTemplate', {})
        self.mappingTemplate = dataorg.get('mappingTemplate', {})
        self.prefuncs = dataorg.get('prefuncs', {})
        self.lastUpdated = datetime.utcnow()

    def touch(self):
        """ Update the dataset timestamp. This is used for cache
        invalidation. """
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    def to_json_dump(self):
        """ Returns a JSON representation of an SQLAlchemy-backed object.
        """

        json = {}
        json['fields'] = {}
        json['pk'] = getattr(self, 'id')
        json['model'] = "DataOrg"

        fields = [
            'label', 'description', 'ORTemplate', 'mappingTemplate', 'prefuncs'
        ]

        for field in fields:
            json['fields'][field] = getattr(self, field)

        return json

    @classmethod
    def import_json_dump(cls, theobj):

        fields = [
            'label', 'description', 'ORTemplate', 'mappingTemplate', 'prefuncs'
        ]
        classobj = cls()
        for field in fields:
            setattr(classobj, field, theobj['fields'][field])
            #classobj.set(field, theobj['fields'][field])

        db.session.add(classobj)
        db.session.commit()

        return classobj.id

    def __repr__(self):
        return "<DataOrg(%r,%r)>" % (self.id, self.label)

    def update(self, dataorg):
        self.label = dataset.get('label')
        self.description = dataset.get('description')
        self.ORTemplate = dataset.get('ORTemplate', {})
        self.mappingTemplate = dataset.get('mappingTemplate', {})
        self.prefuncs = dataset.get('prefuncs', {})
        self.lastUpdated = datetime.utcnow()

    def as_dict(self):
        return {
            'id': self.id,
            'label': self.label,
            'description': self.description,
            'lastUpdated': self.lastUpdated
        }

    @classmethod
    def get_all_admin(cls, order=True):
        """ Query available datasets based on dataset visibility. """
        q = db.session.query(cls)
        if order:
            q = q.order_by(cls.label.asc())
        return q

    @classmethod
    def get_all(cls, order=True):
        """ Query available datasets based on dataset visibility. """
        q = db.session.query(cls)
        if order:
            q = q.order_by(cls.label.asc())
        return q

    @classmethod
    def all(cls, order=True):
        """ Query available datasets based on dataset visibility. """
        q = db.session.query(cls)
        if order:
            q = q.order_by(cls.label.asc())
        return q

    @classmethod
    def by_name(cls, label):
        return db.session.query(cls).filter_by(label=label).first()

    @classmethod
    def by_id(cls, id):
        return db.session.query(cls).filter_by(id=id).first()


#TODO
# class MetadataOrgSettings(colander.MappingSchema):
#     fullname = colander.SchemaNode(colander.String())
#     email = colander.SchemaNode(colander.String(),
#                                 validator=colander.Email())
#     public_email = colander.SchemaNode(colander.Boolean(), missing=False)
#     twitter = colander.SchemaNode(colander.String(), missing=None,
#                                   validator=colander.Length(max=140))
#     public_twitter = colander.SchemaNode(colander.Boolean(), missing=False)
#     password1 = colander.SchemaNode(colander.String(),
#                                     missing=None, default=None)
#     password2 = colander.SchemaNode(colander.String(),
#                                     missing=None, default=None)
#     script_root = colander.SchemaNode(colander.String(),
#                                       missing=None, default=None)