def upgrade(migrate_engine): # Upgrade operations go here. Don't create your own engine; bind # migrate_engine to your metadata meta.bind = migrate_engine #########################Account """ A view stores a specific configuration of a visualisation widget. """ country = Table('country', meta, Column('id', Integer, primary_key=True), Column('gid', Integer, unique=True), Column('geounit', Unicode(300), unique=True), Column('label', Unicode(300)), Column('pagesettings', MutableDict.as_mutable(JSONType), default=dict) ) country.create() pass
def upgrade(migrate_engine): # Upgrade operations go here. Don't create your own engine; bind # migrate_engine to your metadata meta.bind = migrate_engine #########################Account """ A view stores a specific configuration of a visualisation widget. """ country = Table( 'country', meta, Column('id', Integer, primary_key=True), Column('gid', Integer, unique=True), Column('geounit', Unicode(300), unique=True), Column('label', Unicode(300)), Column('pagesettings', MutableDict.as_mutable(JSONType), default=dict)) country.create() pass
def upgrade(migrate_engine): # Upgrade operations go here. Don't create your own engine; bind # migrate_engine to your metadata meta.bind = migrate_engine account = Table('account', meta, autoload=True) account = Table('dataset', meta, autoload=True) #########################Account dataview = Table('dataview', meta, Column('id', Integer, primary_key=True), Column('title', Unicode(500)), Column('description', Unicode()), Column('created_at', DateTime), Column('updated_at', DateTime), Column('urlhash', Unicode(2000)), Column('account_id', Integer, ForeignKey('account.id')), Column('cloned_dataview_id', Integer, ForeignKey('dataview.id')), Column('settings', MutableDict.as_mutable(JSONType), default=dict) ) dataview.create() ################## ManytoMany accounts to datasets dataview_dataset_table = Table( 'dataview_dataset', meta, Column('dataview_id', Integer, ForeignKey('dataview.id'), primary_key=True), Column('dataset_id', Integer, ForeignKey('dataset.id'), primary_key=True) ) dataview_dataset_table.create() pass
def upgrade(migrate_engine): # Upgrade operations go here. Don't create your own engine; bind # migrate_engine to your metadata meta.bind = migrate_engine account = Table('account', meta, autoload=True) account = Table('dataset', meta, autoload=True) #########################Account dataview = Table( 'dataview', meta, Column('id', Integer, primary_key=True), Column('title', Unicode(500)), Column('description', Unicode()), Column('created_at', DateTime), Column('updated_at', DateTime), Column('urlhash', Unicode(2000)), Column('account_id', Integer, ForeignKey('account.id')), Column('cloned_dataview_id', Integer, ForeignKey('dataview.id')), Column('settings', MutableDict.as_mutable(JSONType), default=dict)) dataview.create() ################## ManytoMany accounts to datasets dataview_dataset_table = Table( 'dataview_dataset', meta, Column('dataview_id', Integer, ForeignKey('dataview.id'), primary_key=True), Column('dataset_id', Integer, ForeignKey('dataset.id'), primary_key=True)) dataview_dataset_table.create() pass
class Dataset(TableHandler, db.Model): """ The dataset is the core entity of any access to data. All requests to the actual data store are routed through it, as well as data loading and model generation. The dataset keeps an in-memory representation of the data model (including all dimensions and measures) which can be used to generate necessary queries. """ __tablename__ = 'dataset' id = db.Column(db.Integer, primary_key=True) name = db.Column(db.Unicode(255), unique=True) label = db.Column(db.Unicode(2000)) description = db.Column(db.Unicode()) currency = db.Column(db.Unicode()) default_time = db.Column(db.Unicode()) schema_version = db.Column(db.Unicode()) entry_custom_html = db.Column(db.Unicode()) ckan_uri = db.Column(db.Unicode()) category = db.Column(db.Unicode()) serp_title = db.Column(db.Unicode(), nullable=True) serp_teaser = db.Column(db.Unicode(), nullable=True) private = db.Column(db.Boolean, default=False) created_at = db.Column(db.DateTime, default=datetime.utcnow) updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) data = db.Column(MutableDict.as_mutable(JSONType), default=dict) languages = db.association_proxy('_languages', 'code') territories = db.association_proxy('_territories', 'code') def __init__(self, data): self.data = data.copy() dataset = self.data['dataset'] del self.data['dataset'] self.label = dataset.get('label') self.name = dataset.get('name') self.description = dataset.get('description') self.currency = dataset.get('currency') self.category = dataset.get('category') self.serp_title = dataset.get('serp_title') self.serp_teaser = dataset.get('serp_teaser') self.default_time = dataset.get('default_time') self.entry_custom_html = dataset.get('entry_custom_html') self.languages = dataset.get('languages', []) self.territories = dataset.get('territories', []) self.ckan_uri = dataset.get('ckan_uri') self._load_model() @property def model(self): model = self.data.copy() model['dataset'] = self.as_dict() return model @property def mapping(self): return self.data.get('mapping', {}) @db.reconstructor def _load_model(self): """ Construct the in-memory object representation of this dataset's dimension and measures model. This is called upon initialization and deserialization of the dataset from the SQLAlchemy store. """ self.dimensions = [] self.measures = [] for dim, data in self.mapping.items(): if data.get('type') == 'measure' or dim == 'amount': self.measures.append(Measure(self, dim, data)) continue elif data.get('type') == 'date' or \ (dim == 'time' and data.get('datatype') == 'date'): dimension = DateDimension(self, dim, data) elif data.get('type') in ['value', 'attribute']: dimension = AttributeDimension(self, dim, data) else: dimension = CompoundDimension(self, dim, data) self.dimensions.append(dimension) self.init() self._is_generated = None def __getitem__(self, name): """ Access a field (dimension or measure) by name. """ for field in self.fields: if field.name == name: return field raise KeyError() def __contains__(self, name): try: self[name] return True except KeyError: return False @property def fields(self): """ Both the dimensions and metrics in this dataset. """ return self.dimensions + self.measures @property def compounds(self): """ Return only compound dimensions. """ return filter(lambda d: isinstance(d, CompoundDimension), self.dimensions) @property def facet_dimensions(self): return [d for d in self.dimensions if d.facet] def init(self): """ Create a SQLAlchemy model for the current dataset model, without creating the tables and columns. This needs to be called both for access to the data and in order to generate the model physically. """ self.bind = db.engine self.meta = db.MetaData() # self.tx = self.bind.begin() self.meta.bind = db.engine self._init_table(self.meta, self.name, 'entry', id_type=db.Unicode(42)) for field in self.fields: field.column = field.init(self.meta, self.table) self.alias = self.table.alias('entry') def generate(self): """ Create the tables and columns necessary for this dataset to keep data. """ for field in self.fields: field.generate(self.meta, self.table) for dim in self.dimensions: if isinstance(dim, CompoundDimension): self.table.append_constraint( ForeignKeyConstraint( [dim.name + '_id'], [dim.table.name + '.id'], # use_alter=True, name='fk_' + self.name + '_' + dim.name)) self._generate_table() self._is_generated = True @property def is_generated(self): if self._is_generated is None: self._is_generated = self.table.exists() return self._is_generated @property def has_badges(self): """ Property that returns True if the dataset has been given any badges """ # Cast the badge count as a boolean and return it return bool(self.badges.count()) def commit(self): pass # self.tx.commit() # self.tx = self.bind.begin() def _make_key(self, data): """ Generate a unique identifier for an entry. This is better than SQL auto-increment because it is stable across mutltiple loads and thus creates stable URIs for entries. """ uniques = [self.name] for field in self.fields: if not field.key: continue obj = data.get(field.name) if isinstance(obj, dict): obj = obj.get('name', obj.get('id')) uniques.append(obj) return hash_values(uniques) def load(self, data): """ Handle a single entry of data in the mapping source format, i.e. with all needed columns. This will propagate to all dimensions and set values as appropriate. """ entry = dict() for field in self.fields: field_data = data[field.name] entry.update(field.load(self.bind, field_data)) entry['id'] = self._make_key(data) self._upsert(self.bind, entry, ['id']) def flush(self): """ Delete all data from the dataset tables but leave the table structure intact. """ for dimension in self.dimensions: dimension.flush(self.bind) self._flush(self.bind) def drop(self): """ Drop all tables created as part of this dataset, i.e. by calling ``generate()``. This will of course also delete the data itself. """ self._drop(self.bind) for dimension in self.dimensions: dimension.drop(self.bind) self._is_generated = False def key(self, key): """ For a given ``key``, find a column to indentify it in a query. A ``key`` is either the name of a simple attribute (e.g. ``time``) or of an attribute of a complex dimension (e.g. ``to.label``). The returned key is using an alias, so it can be used in a query directly. """ attr = None if '.' in key: key, attr = key.split('.', 1) dimension = self[key] if hasattr(dimension, 'alias'): attr_name = dimension[attr].column.name if attr else 'name' return dimension.alias.c[attr_name] return self.alias.c[dimension.column.name] def entries(self, conditions="1=1", order_by=None, limit=None, offset=0, step=10000, fields=None): """ Generate a fully denormalized view of the entries on this table. This view is nested so that each dimension will be a hash of its attributes. This is somewhat similar to the entries collection in the fully denormalized schema before OpenSpending 0.11 (MongoDB). """ if not self.is_generated: return if fields is None: fields = self.fields joins = self.alias for d in self.dimensions: if d in fields: joins = d.join(joins) selects = [f.selectable for f in fields] + [self.alias.c.id] # enforce stable sorting: if order_by is None: order_by = [self.alias.c.id.asc()] for i in count(): qoffset = offset + (step * i) qlimit = step if limit is not None: qlimit = min(limit - (step * i), step) if qlimit <= 0: break query = db.select(selects, conditions, joins, order_by=order_by, use_labels=True, limit=qlimit, offset=qoffset) rp = self.bind.execute(query) first_row = True while True: row = rp.fetchone() if row is None: if first_row: return break first_row = False yield decode_row(row, self) def aggregate(self, measures=['amount'], drilldowns=[], cuts=[], page=1, pagesize=10000, order=[]): """ Query the dataset for a subset of cells based on cuts and drilldowns. It returns a structure with a list of drilldown items and a summary about the slice cutted by the query. ``measures`` The numeric units to be aggregated over, defaults to [``amount``]. (type: `list`) ``drilldowns`` Dimensions to drill down to. (type: `list`) ``cuts`` Specification what to cut from the cube. This is a `list` of `two-tuples` where the first item is the dimension and the second item is the value to cut from. It is turned into a query where multible cuts for the same dimension are combined to an *OR* query and then the queries for the different dimensions are combined to an *AND* query. ``page`` Page the drilldown result and return page number *page*. type: `int` ``pagesize`` Page the drilldown result into page of size *pagesize*. type: `int` ``order`` Sort the result based on the dimension *sort_dimension*. This may be `None` (*default*) or a `list` of two-`tuples` where the first element is the *dimension* and the second element is the order (`False` for ascending, `True` for descending). Type: `list` of two-`tuples`. Raises: :exc:`ValueError` If a cube is not yet computed. Call :meth:`compute` to compute the cube. :exc:`KeyError` If a drilldown, cut or order dimension is not part of this cube or the order dimensions are not a subset of the drilldown dimensions. Returns: A `dict` containing the drilldown and the summary: {"drilldown": [ {"num_entries": 5545, "amount": 41087379002.0, "cofog1": {"description": "", "label": "Economic affairs"}}, ... ] "summary": {"amount": 7353306450299.0, "num_entries": 133612}} """ # Get the joins (aka alias) and the dataset joins = alias = self.alias dataset = self # Aggregation fields are all of the measures, so we create individual # summary fields with the sum function of SQLAlchemy fields = [db.func.sum(alias.c[m]).label(m) for m in measures] # We append an aggregation field that counts the number of entries fields.append(db.func.count(alias.c.id).label("entries")) # Create a copy of the statistics fields (for later) stats_fields = list(fields) # Create label map for time columns (year and month) for lookup # since they are found under the time attribute labels = { 'year': dataset['time']['year'].column_alias.label('year'), 'month': dataset['time']['yearmonth'].column_alias.label('month'), } # Get the dimensions we're interested in. These would be the drilldowns # and the cuts. For compound dimensions we are only interested in the # most significant one (e.g. for from.name we're interested in from) dimensions = drilldowns + [k for k, v in cuts] dimensions = [d.split('.')[0] for d in dimensions] # Loop over the dimensions as a set (to avoid multiple occurances) for dimension in set(dimensions): # If the dimension is year or month we're interested in 'time' if dimension in labels: dimension = 'time' # If the dimension table isn't in the automatic joins we add it if dimension not in [c.table.name for c in joins.columns]: joins = dataset[dimension].join(joins) # Drilldowns are performed using group_by SQL functions group_by = [] for key in drilldowns: # If drilldown is in labels we append its mapped column to fields if key in labels: column = labels[key] group_by.append(column) fields.append(column) else: # Get the column from the dataset column = dataset.key(key) # If the drilldown is a compound dimension or the columns table # is in the joins we're already fetching the column so we just # append it to fields and the group_by if '.' in key or column.table == alias: fields.append(column) group_by.append(column) else: # If not we add the column table to the fields and add all # of that tables columns to the group_by fields.append(column.table) for col in column.table.columns: group_by.append(col) # Cuts are managed using AND statements and we use a dict with set as # the default value to create the filters (cut on various values) conditions = db.and_() filters = defaultdict(set) for key, value in cuts: # If the key is in labels (year or month) we get the mapped column # else we get the column from the dataset if key in labels: column = labels[key] else: column = dataset.key(key) # We add the value to the set for that particular column filters[column].add(value) # Loop over the columns in the filter and add that to the conditions # For every value in the set we create and OR statement so we get e.g. # year=2007 AND (from.who == 'me' OR from.who == 'you') for attr, values in filters.items(): conditions.append(db.or_(*[attr == v for v in values])) # Ordering can be set by a parameter or ordered by measures by default order_by = [] # If no order is defined we default to order of the measures in the # order they occur (furthest to the left is most significant) if order is None or not len(order): order = [(m, True) for m in measures] # We loop through the order list to add the columns themselves for key, direction in order: # If it's a part of the measures we have to order by the # aggregated values (the sum of the measure) if key in measures: column = db.func.sum(alias.c[key]).label(key) # If it's in the labels we have to get the mapped column elif key in labels: column = labels[key] # ...if not we just get the column from the dataset else: column = dataset.key(key) # We append the column and set the direction (True == descending) order_by.append(column.desc() if direction else column.asc()) # query 1: get overall sums. # Here we use the stats_field we saved earlier query = db.select(stats_fields, conditions, joins) rp = dataset.bind.execute(query) # Execute the query and turn them to a list so we can pop the # entry count and then zip the measurements and the totals together stats = list(rp.fetchone()) num_entries = stats.pop() total = zip(measures, stats) # query 2: get total count of drilldowns if len(group_by): # Select 1 for each group in the group_by and count them query = db.select(['1'], conditions, joins, group_by=group_by) query = db.select([db.func.count('1')], '1=1', query.alias('q')) rp = dataset.bind.execute(query) num_drilldowns, = rp.fetchone() else: # If there are no drilldowns we still have to do one num_drilldowns = 1 # The drilldown result list drilldown = [] # The offset in the db, based on the page and pagesize (we have to # modify it since page counts starts from 1 but we count from 0 offset = int((page - 1) * pagesize) # query 3: get the actual data query = db.select(fields, conditions, joins, order_by=order_by, group_by=group_by, use_labels=True, limit=pagesize, offset=offset) rp = dataset.bind.execute(query) while True: # Get each row in the db result and append it, decoded, to the # drilldown result. The decoded version is a json represenation row = rp.fetchone() if row is None: break result = decode_row(row, dataset) drilldown.append(result) # Create the summary based on the stats_fields and other things # First we add a the total for each measurement in the root of the # summary (watch out!) and then we add various other, self-explanatory # statistics such as page, number of entries. The currency value is # strange since it's redundant for multiple measures but is left as is # for backwards compatibility summary = {key: value for (key, value) in total} summary.update({ 'num_entries': num_entries, 'currency': {m: dataset.currency for m in measures}, 'num_drilldowns': num_drilldowns, 'page': page, 'pages': int(math.ceil(num_drilldowns / float(pagesize))), 'pagesize': pagesize }) return {'drilldown': drilldown, 'summary': summary} def timerange(self): """ Get the timerange of the dataset (based on the time attribute). Returns a tuple of (first timestamp, last timestamp) where timestamp is a datetime object """ try: # Get the time column time = self.key('time') # We use SQL's min and max functions to get the timestamps query = db.session.query(db.func.min(time), db.func.max(time)) # We just need one result to get min and max time return [ datetime.strptime(date, '%Y-%m-%d') if date else None for date in query.one() ] except: return (None, None) def __repr__(self): return "<Dataset(%s:%s:%s)>" % (self.name, self.dimensions, self.measures) def __len__(self): if not self.is_generated: return 0 rp = self.bind.execute(self.alias.count()) return rp.fetchone()[0] def as_dict(self): return { 'label': self.label, 'name': self.name, 'description': self.description, 'default_time': self.default_time, 'schema_version': self.schema_version, 'currency': self.currency, 'category': self.category, 'serp_title': self.serp_title, 'serp_teaser': self.serp_teaser, 'timestamps': { 'created': self.created_at, 'last_modified': self.updated_at }, 'languages': list(self.languages), 'territories': list(self.territories), 'badges': [b.as_dict(short=True) for b in self.badges] } @classmethod def all_by_account(cls, account): """ Query available datasets based on dataset visibility. """ criteria = [cls.private == false()] if account is not None: criteria += [ "1=1" if account.admin else "1=2", cls.managers.any(type(account).id == account.id) ] q = db.session.query(cls).filter(db.or_(*criteria)) q = q.order_by(cls.label.asc()) return q @classmethod def by_name(cls, name): return db.session.query(cls).filter_by(name=name).first()
class Dataset(db.Model): """ The dataset is the core entity of any access to data. All requests to the actual data store are routed through it, as well as data loading and model generation. """ __tablename__ = 'dataset' __searchable__ = ['label', 'description'] id = Column(Integer, primary_key=True) name = Column(Unicode(255), unique=True) label = Column(Unicode(2000)) description = Column(Unicode()) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) datalastupdated = Column(DateTime, default=datetime.utcnow) source_id = Column(Integer, ForeignKey('source.id')) source = relationship(Source, backref=backref("dataset", uselist=False)) mapping = Column(MutableDict.as_mutable(JSONType), default=dict) ORoperations = Column(MutableDict.as_mutable(JSONType), default=dict) prefuncs = Column(MutableDict.as_mutable(JSONType), default=dict) dataType = Column(Unicode(2000)) published = Column(Boolean, default=False) loaded = Column(Boolean, default=False) tested = Column(Boolean, default=False) dataorg_id = Column(Integer, ForeignKey('dataorg.id')) dataorg = relationship(DataOrg, backref=backref('datasets', lazy='dynamic')) metadataorg_id = Column(Integer, ForeignKey('metadataorg.id')) metadataorg = relationship(MetadataOrg, backref=backref('datasets', lazy='dynamic')) years = Column(Unicode(1000)) stats = Column(Unicode(50)) #TODO #tag stuff def __init__(self, data=None): if data == None: return self.label = data.get('label') if (data.get('name', None)): self.name = slugify(str(data.get('name')), max_length=30, separator="_") else: self.name = slugify(str(data.get('label')), max_length=30, separator="_") #check if name is already taken if Dataset.by_name(self.name): for x in range(10): newname = self.name + "_" + str(x) if not Dataset.by_name(newname): self.name = newname break self.description = data.get('description') self.ORoperations = data.get('ORoperations', {}) self.mapping = data.get('mapping', {}) self.prefuncs = data.get('prefuncs', {}) self.created_at = datetime.utcnow() self.dataType = data.get('dataType') if type(data.get('dataorg')) == int: self.dataorg = DataOrg.by_id(data.get('dataorg')) else: try: self.dataorg = data.get('dataorg') except Exception, e: print "failed to load the dataorg for dataset" print e
class Source(db.Model): __tablename__ = 'source' id = Column(Integer, primary_key=True) url = Column(Unicode) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, onupdate=datetime.utcnow) analysis = Column(MutableDict.as_mutable(JSONType), default=dict) dataset_id = Column(Integer, ForeignKey('dataset.id')) dataset = relationship(Dataset, backref=backref( 'sources', lazy='dynamic', order_by='Source.created_at.desc()')) creator_id = Column(Integer, ForeignKey('account.id')) creator = relationship(Account, backref=backref('sources', lazy='dynamic')) def __init__(self, dataset, creator, url): self.dataset = dataset self.creator = creator self.url = url @property def loadable(self): """ Returns True if the source is ready to be imported into the database. Does not not require a sample run although it probably should. """ # It shouldn't be loaded again into the database if self.successfully_loaded: return False # It needs mapping to be loadable if not len(self.dataset.mapping): return False # There can be no errors in the analysis of the source if 'error' in self.analysis: return False # All is good... proceed return True @property def successfully_sampled(self): """ Returns True if any of this source's runs have been successfully sampled (a complete sample run). This shows whether the source is ready to be imported into the database """ return True in [r.successful_sample for r in self.runs] @property def is_running(self): """ Returns True if any of this source's runs have the status 'running'. This shows whether the loading has been started or not to help avoid multiple loads of the same resource. """ return True in [r.is_running for r in self.runs] @property def successfully_loaded(self): """ Returns True if any of this source's runs have been successfully loaded (not a sample and no errors). This shows whether the source has been loaded into the database """ return True in [r.successful_load for r in self.runs] def __repr__(self): try: return "<Source(%s,%s)>" % (self.dataset.name, self.url) except: return '' @classmethod def by_id(cls, id): return db.session.query(cls).filter_by(id=id).first() @classmethod def all(cls): return db.session.query(cls) def as_dict(self): return { "id": self.id, "url": self.url, "dataset": self.dataset.name, "created_at": self.created_at }
class Dataview(db.Model): """ The dataset is the core entity of any access to data. All requests to the actual data store are routed through it, as well as data loading and model generation. """ __tablename__ = 'dataview' id = Column(Integer, primary_key=True) title = Column(Unicode(500)) description = Column(Unicode()) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) urlhash = Column(Unicode(2000), default=make_uuid) datasets = relationship(Dataset, secondary=dataview_dataset_table, backref=backref('dataviews', lazy='dynamic')) account_id = Column(Integer, ForeignKey('account.id')) account = relationship(Account, backref=backref("dataviews")) cloned_dataview_id = Column(Integer, ForeignKey('dataview.id')) settings = Column(MutableDict.as_mutable(JSONType), default=dict) def __init__(self, data=None): self.urlhash = make_uuid() if not data: return self.title = data.get("title") self.description = data.get("description") if current_user.is_authenticated(): self.account = current_user self.settings = data.get("settings", {}) self.cloned_dataview_id = data.get("cloned_dataview_id", None) def __repr__(self): return "<Dataview(%r,%r)>" % (self.id, self.title) def update(self, data): #not to update name self.title = data.get("title") self.description = data.get("description") self.datasets = data.get("datasets") self.settings = data.get("settings", {}) def as_dict(self): return { 'title': self.title, 'description': self.description, 'settings': self.settings } @classmethod def clone_dataview(cls, theobj): fields = ['title', 'description', 'settings', 'datasets', 'settings'] classobj = cls() for field in fields: setattr(classobj, field, getattr(theobj, field)) classobj.cloned_dataview_id = theobj.id db.session.add(classobj) db.session.commit() return classobj @classmethod def all_by_account(cls, account, order=True): """ Query available datasets based on dataset visibility. """ return db.session.query(cls).filter_by(account_id=account.id).all() @classmethod def all(cls, order=True): """ Query available datasets based on dataset visibility. """ q = db.session.query(cls) if order: q = q.order_by(cls.title.asc()) return q @classmethod def by_urlhash(cls, urlhash): return db.session.query(cls).filter_by(urlhash=urlhash).first() @classmethod def by_user_settings(cls, settings, account_id): return db.session.query(cls).filter_by(settings=settings, account_id=account_id).first() @classmethod def by_id(cls, id): return db.session.query(cls).filter_by(id=id).first()
def upgrade(migrate_engine): # Upgrade operations go here. Don't create your own engine; bind # migrate_engine to your metadata meta.bind = migrate_engine #########################Account account = Table('account', meta, Column('id', Integer, primary_key=True), Column('fullname', Unicode(2000)), Column('email', Unicode(2000), unique=True), Column('password', Unicode(2000)), Column('api_key', Unicode(2000)), Column('usg_group', Unicode(2000)), Column('login_hash', Unicode(2000)), Column('admin', Boolean, default=False), Column('verified', Boolean, default=False) ) account.create() ##################MetadataOrg metadataorg = Table('metadataorg', meta, Column('id', Integer, primary_key=True), Column('label', Unicode(2000)), Column('description', Unicode()), Column('contactName', Unicode(2000)), Column('contactEmail', Unicode(2000)), Column('accessLevel', Unicode(2000)), Column('bureauCode', Unicode(2000)), Column('lastUpdated', DateTime) ) metadataorg.create() ################## DataOrg dataorg = Table('dataorg', meta, Column('id', Integer, primary_key=True), Column('label', Unicode(2000)), Column('description', Unicode()), Column('ORTemplate', MutableDict.as_mutable(JSONType), default=dict), Column('mappingTemplate', MutableDict.as_mutable(JSONType), default=dict), Column('prefuncs', MutableDict.as_mutable(JSONType), default=dict), Column('lastUpdated', DateTime), Column('metadataorg_id', Integer, ForeignKey('metadataorg.id')) ) dataorg.create() #####@###############Source source = Table('source', meta, Column('id', Integer, primary_key=True), Column('name', Unicode(255)), Column('url', Unicode), Column('created_at', DateTime, default=datetime.utcnow), Column('updated_at', DateTime, default=datetime.utcnow, onupdate=datetime.utcnow), Column('ORid', BigInteger) ) source.create() ##################### SourceFile sourcefile = Table('sourcefile', meta, Column('id', Integer, primary_key=True), Column('rawfile', Unicode), Column('source_id', Integer, ForeignKey('source.id')), Column('created_at', DateTime, default=datetime.utcnow), Column('updated_at', DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) ) sourcefile.create() ####################Dataset dataset = Table('dataset', meta, Column('id', Integer, primary_key=True), Column('name', Unicode(255), unique=True), Column('label', Unicode(2000)), Column('description', Unicode), Column('category', Unicode()), Column('private', Boolean), Column('created_at', DateTime, default=datetime.utcnow), Column('updated_at', DateTime, default=datetime.utcnow, onupdate=datetime.utcnow), Column('datalastupdated', DateTime, default=datetime.utcnow), Column('source_id', Integer, ForeignKey('source.id')), Column('mapping', MutableDict.as_mutable(JSONType), default=dict), Column('ORoperations', MutableDict.as_mutable(JSONType), default=dict), Column('prefuncs', MutableDict.as_mutable(JSONType), default=dict), Column('dataType', Unicode(2000)), Column('published', Boolean, default=False), Column('loaded', Boolean, default=False), Column('tested', Boolean, default=False), Column('dataorg_id', Integer, ForeignKey('dataorg.id')) ) dataset.create() ###########################Runs runs = Table('run', meta, Column('id', Integer, primary_key=True), Column('operation', Unicode(2000)), Column('status', Unicode(2000)), Column('time_start', DateTime, default=datetime.utcnow), Column('time_end', DateTime), Column('dataset_id', Integer, ForeignKey('dataset.id'), nullable=True), Column('source_id', Integer, ForeignKey('source.id'), nullable=True) ) runs.create() ################## ManytoMany accounts to datasets account_dataset_table = Table( 'account_dataset', meta, Column('dataset_id', Integer, ForeignKey('dataset.id'), primary_key=True), Column('account_id', Integer, ForeignKey('account.id'), primary_key=True) ) account_dataset_table.create() pass
class Country(db.Model): """ A view stores a specific configuration of a visualisation widget. """ __tablename__ = 'country' __searchable__ = ['label'] id = Column(Integer, primary_key=True) gid = Column(Integer, unique=True) geounit = Column(Unicode(300), unique=True) label = Column(Unicode(300)) pagesettings = Column(MutableDict.as_mutable(JSONType), default=dict) def __init__(self, gid): #gid is the reference to the geometry tables #get and populate the data to the geometry_tables self.gid = gid result = db.engine.execute("SELECT \ country_level0.name as geounit, \ country_level0.label as label \ FROM public.geometry__country_level0 as country_level0 \ WHERE country_level0.gid = %s;" % (self.gid, )) tempobj = result.first() if not tempobj: pass self.geounit = tempobj['geounit'] self.label = tempobj['label'] pass @property def sovereignty(self): result = db.engine.execute("SELECT \ country_level0.sovereignt as sovereignty \ FROM public.geometry__country_level0 as country_level0 \ WHERE country_level0.gid = %s;" % (self.gid, )) return result.first()['sovereignty'] @property def regions(self): result = db.engine.execute("SELECT \ country_level0.sovereignt as sovereignty, \ country_level0.label as label, \ country_level0.continent as continent, \ country_level0.georegion as georegion, \ country_level0.dos_region as dos_region, \ country_level0.usaid_reg as usaid_reg, \ country_level0.dod_cmd as dod_cmd, \ country_level0.feed_the_f as feed_the_f, \ country_level0.region_un as region_un, \ country_level0.wb_inc_lvl as wb_inc_lvl \ FROM public.geometry__country_level0 as country_level0 \ WHERE country_level0.gid = %s" % (self.gid, )) tempobj = result.first() return tempobj @classmethod #@cache.memoize(timeout=360) def get_all_json(cls): regions = ['continent', 'georegion', 'dos_region', 'usaid_reg', 'dod_cmd',\ 'feed_the_f', 'region_un', 'wb_inc_lvl'] result = db.engine.execute("SELECT \ country_level0.name as geounit, \ country_level0.sovereignt as sovereignty, \ country_level0.label as label, \ country_level0.iso_a2 as iso_a2, \ country_level0.continent as continent, \ country_level0.georegion as georegion, \ country_level0.dos_region as dos_region, \ country_level0.usaid_reg as usaid_reg, \ country_level0.dod_cmd as dod_cmd, \ country_level0.feed_the_f as feed_the_f, \ country_level0.region_un as region_un, \ country_level0.wb_inc_lvl as wb_inc_lvl \ FROM public.geometry__country_level0 as country_level0 \ WHERE country_level0.label = country_level0.sovereignt \ ORDER BY country_level0.name;") output = [] for country in result: tempreg = {} #tempreg = [country[reg] for reg in regions] for reg in regions: tempreg[reg] = country[reg] output.append({ 'geounit': country["geounit"], 'label': country['label'], 'iso_a2': country['iso_a2'], 'regions': tempreg, 'selected': False, 'filtered': False, 'id': country['iso_a2'] }) return output @classmethod def all(cls): """ Query available datasets based on dataset visibility. """ q = db.session.query(cls) return q @classmethod def by_id(cls, id): return db.session.query(cls).filter_by(id=id).first() @classmethod def by_gid(cls, gid): return db.session.query(cls).filter_by(gid=gid).first() @classmethod def by_geounit(cls, geounit): return db.session.query(cls).filter_by(geounit=geounit).first() def __repr__(self): return "<Country(%r,%r)>" % (self.geounit, self.gid)
class View(db.Model): """ A view stores a specific configuration of a visualisation widget. """ __tablename__ = 'view' id = db.Column(db.Integer, primary_key=True) widget = db.Column(db.Unicode(2000)) name = db.Column(db.Unicode(2000)) label = db.Column(db.Unicode(2000)) description = db.Column(db.Unicode()) state = db.Column(MutableDict.as_mutable(JSONType), default=dict) public = db.Column(db.Boolean, default=False) created_at = db.Column(db.DateTime, default=datetime.utcnow) updated_at = db.Column(db.DateTime, onupdate=datetime.utcnow) dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id')) account_id = db.Column(db.Integer, db.ForeignKey('account.id'), nullable=True) dataset = db.relationship(Dataset, backref=db.backref( 'views', cascade='all,delete,delete-orphan', lazy='dynamic')) account = db.relationship(Account, backref=db.backref( 'views', cascade='all,delete,delete-orphan', lazy='dynamic')) def __init__(self): pass @classmethod def by_id(cls, id): return db.session.query(cls).filter_by(id=id).first() @classmethod def by_name(cls, dataset, name): q = db.session.query(cls).filter_by(name=name) return q.filter_by(dataset=dataset).first() @classmethod def all_by_dataset(cls, dataset): return db.session.query(cls).filter_by(dataset=dataset) def as_dict(self): return { 'id': self.id, 'widget': self.widget, 'name': self.name, 'label': self.label, 'description': self.description, 'state': self.state, 'public': self.public, 'dataset': self.dataset.name, 'account': self.account.name if self.account else None } def __repr__(self): return "<View(%s,%s)>" % (self.dataset.name, self.name)
class DataOrg(db.Model): """ The dataset is the core entity of any access to data. All requests to the actual data store are routed through it, as well as data loading and model generation. The dataset keeps an in-memory representation of the data model (including all dimensions and measures) which can be used to generate necessary queries. """ __tablename__ = 'dataorg' __searchable__ = ['label', 'description'] id = Column(Integer, primary_key=True) label = Column(Unicode(2000)) description = Column(Unicode()) ORTemplate = Column(MutableDict.as_mutable(JSONType), default=dict) mappingTemplate = Column(MutableDict.as_mutable(JSONType), default=dict) prefuncs = Column(MutableDict.as_mutable(JSONType), default=dict) lastUpdated = Column(DateTime, onupdate=datetime.utcnow) #metadataorg_id = Column(Integer, ForeignKey('metadataorg.id')) # metadataorg = relationship(MetadataOrg, # backref=backref('dataorgs', lazy='dynamic')) def __init__(self, dataorg=None): if not dataorg: return self.label = dataorg.get('label') self.description = dataorg.get('description') self.ORTemplate = dataorg.get('ORTemplate', {}) self.mappingTemplate = dataorg.get('mappingTemplate', {}) self.prefuncs = dataorg.get('prefuncs', {}) self.lastUpdated = datetime.utcnow() def touch(self): """ Update the dataset timestamp. This is used for cache invalidation. """ self.updated_at = datetime.utcnow() db.session.add(self) def to_json_dump(self): """ Returns a JSON representation of an SQLAlchemy-backed object. """ json = {} json['fields'] = {} json['pk'] = getattr(self, 'id') json['model'] = "DataOrg" fields = [ 'label', 'description', 'ORTemplate', 'mappingTemplate', 'prefuncs' ] for field in fields: json['fields'][field] = getattr(self, field) return json @classmethod def import_json_dump(cls, theobj): fields = [ 'label', 'description', 'ORTemplate', 'mappingTemplate', 'prefuncs' ] classobj = cls() for field in fields: setattr(classobj, field, theobj['fields'][field]) #classobj.set(field, theobj['fields'][field]) db.session.add(classobj) db.session.commit() return classobj.id def __repr__(self): return "<DataOrg(%r,%r)>" % (self.id, self.label) def update(self, dataorg): self.label = dataset.get('label') self.description = dataset.get('description') self.ORTemplate = dataset.get('ORTemplate', {}) self.mappingTemplate = dataset.get('mappingTemplate', {}) self.prefuncs = dataset.get('prefuncs', {}) self.lastUpdated = datetime.utcnow() def as_dict(self): return { 'id': self.id, 'label': self.label, 'description': self.description, 'lastUpdated': self.lastUpdated } @classmethod def get_all_admin(cls, order=True): """ Query available datasets based on dataset visibility. """ q = db.session.query(cls) if order: q = q.order_by(cls.label.asc()) return q @classmethod def get_all(cls, order=True): """ Query available datasets based on dataset visibility. """ q = db.session.query(cls) if order: q = q.order_by(cls.label.asc()) return q @classmethod def all(cls, order=True): """ Query available datasets based on dataset visibility. """ q = db.session.query(cls) if order: q = q.order_by(cls.label.asc()) return q @classmethod def by_name(cls, label): return db.session.query(cls).filter_by(label=label).first() @classmethod def by_id(cls, id): return db.session.query(cls).filter_by(id=id).first() #TODO # class MetadataOrgSettings(colander.MappingSchema): # fullname = colander.SchemaNode(colander.String()) # email = colander.SchemaNode(colander.String(), # validator=colander.Email()) # public_email = colander.SchemaNode(colander.Boolean(), missing=False) # twitter = colander.SchemaNode(colander.String(), missing=None, # validator=colander.Length(max=140)) # public_twitter = colander.SchemaNode(colander.Boolean(), missing=False) # password1 = colander.SchemaNode(colander.String(), # missing=None, default=None) # password2 = colander.SchemaNode(colander.String(), # missing=None, default=None) # script_root = colander.SchemaNode(colander.String(), # missing=None, default=None)