示例#1
0
    def load_into_db(self, normed_data):
        dburi = 'sqlite:///%s' % 'sqlite_ciafactbook.db'
        import db
        repo = db.Repository(dburi)
        # Series, Country, Value, Year
        countries = {}
        seriess = {}

        def goc(thedict, thekey, thetype):
            if not thekey in thedict:
                existing = thetype.query.filter_by(name=thekey).first()
                if not existing:
                    thedict[thekey] = thetype(name=thekey)
                else:
                    thedict[thekey] = existing
            return thedict[thekey]

        for row in normed_data:
            series = goc(seriess, row[0], db.Series)
            country = goc(countries, row[1], db.Country)
            val = db.Value(
                series=series,
                country=country,
                value=row[2],
                year=row[3],
            )
        db.Session.flush()
示例#2
0
 def _process_sheet(self, tabular_data):
     import db
     repo = db.Repository('sqlite:///%s' % dbpath)
     years = range(2002, 2011)
     td = tabular_data
     cells = td.data
     title = cells[0][0]
     table = db.PesaTable(title=title)
     footnotes = []
     for lastrow in reversed(cells):
         if len(lastrow) > 2:  # into the data
             break
         foot = lastrow[0].strip()
         if foot:
             footnotes.append(foot)
     table.footnotes = simplejson.dumps(footnotes)
     entries = {}
     for row in cells[6:]:
         if row[1]:  # not a subheading or footnote
             series_name = row[0]
             for (year, cell) in zip(years, row[1:10]):
                 db.Expenditure(
                     title=series_name,
                     date=unicode(year),
                     amount=swiss.floatify(cell),
                     pesatable=table,
                 )
     db.Session.flush()
示例#3
0
class TestRepository:
    repo = db.Repository(dburi)

    def test_domain_model(self):
        country = db.Country(code=1, name='Argentina')
        series = db.Series(code=694, name='GDP', is_goal=True)
        value = db.Value(country=country, series=series, year=1990, value=0.5)
        db.Session.flush()
        db.Session.clear()
        vals = db.Value.query.all()
        assert len(vals) == 1
        assert vals[0].value == 0.5
        assert vals[0].country.name == 'Argentina'
示例#4
0
    subf = CRA_DATA["functions/" + slugify(function) + "/" +
                    slugify(subfunction)]
    g = Graph(identifier="%s" % subf)
    g.add((subf, RDF.type, CRA["SubFunction"]))
    g.add((subf, RDFS.label, Literal(subfunction)))
    g.add((subf, DC["title"], Literal(subfunction)))
    g.add((subf, DC["identifier"], Literal(slugify(subfunction))))
    g.add((subf, CRA["function"], CRA_DATA["functions/" + slugify(function)]))
    g.add((subf, SCV["dataset"], cra))
    return g


if __name__ == '__main__':
    cache = swiss.Cache('cache')
    dburi = 'sqlite:///%s' % cache.cache_path('ukgov_finances_cra.db')
    db.Repository(dburi)

    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(name)s:%(levelname)s - %(message)s")
    log = logging.getLogger("cra[rdf]")

    from py4s import FourStore
    store = FourStore("ckan")
    store.connect()
    cursor = store.cursor()

    t0 = datetime.now()

    cursor.delete_model(CRA_SCHEMA_URI)
    cursor.add_model(schema())
示例#5
0
def load_normed_data_into_db():
    fn = 'data.csv'
    import db
    repo = db.Repository(dburi)
    repo.load_normed_data(fn)
示例#6
0
    def load(self):
        '''
        Looks like LA is very limited and is always associated with a given
        "department" -- so this is really a classifier for the account

        Simplest normalization:
            * years
            * dept FK

        Expenditure
            * subfunc 
            * year
            * caporcur
            * region: usuals ones ... (ID or Non-ID not needed ...)
            * programme FK
        
        # does the same programme ever occur within two differnet departments?
        Programme
            * department

        Department?


        What questions do i want to ask:
            * Basically we want to browse in by facets
            * Region, func, subfunc, ...
        '''
        import db
        fp = cache.retrieve(url)
        reader = csv.reader(open(fp))
        # theoretically we'd have distributions to dept from CG as well ...
        # acc = 'CG' acc = 'LA'

        # dept -> account

        # Tag accounts:
        # subfunc
        # Tags relate to other tags ...

        repo = db.Repository(dburi)
        # skip headings
        reader.next()
        _clean = lambda _str: unicode(_str.strip())
        for count, row in enumerate(reader):
            deptcode = _clean(row[0])
            dept = _clean(row[1])
            # have some blank rows at end
            if not dept:
                continue
            function = _clean(row[2])
            subfunction = _clean(row[3])
            pog = _clean(row[4])
            poga = _clean(row[5])  # take verbose one
            # pog = row['Programme Object Group']
            caporcur = _clean(row[7])
            region = _clean(row[9])
            exps = row[10:]
            area = db.Area(title=poga,
                           deptcode=deptcode,
                           department=dept,
                           function=function,
                           subfunction=subfunction,
                           pog=pog,
                           cap_or_cur=caporcur,
                           region=region)
            for ii, exp in enumerate(exps):
                amount = swiss.floatify(exp)
                if amount:  # do not bother with null or zero amounts
                    area.expenditures.append(
                        db.Expenditure(amount=amount, year=2003 + ii))
            if count % 5000 == 0:
                print 'Completed: %s' % count
                db.Session.commit()
                db.Session.remove()
        db.Session.commit()
示例#7
0
 def getSurvivalHighScores(self):
     return db.Repository().getSurvivalHighscore()
示例#8
0
 def getQuizHighScores(self):
     return db.Repository().getQuizHighscore()
示例#9
0
 def addToGoodQuestions(self, data):
     db.Repository().addToGoodQuestions(data)
示例#10
0
 def addQuizHighScore(self, data):
     db.Repository().addQuizHighScore(data)
示例#11
0
 def addSurvivalHichScore(self, data):
     db.Repository().addSurvivalHichScore(data)
示例#12
0
 def deployGoodQuestion(self):
     return db.Repository().getAllGoodQuestionsInOrderOfBestQuestions()