Exemplo n.º 1
0
def import_csv(dataset, url, args):
    """
    Import the csv data into the dataset
    """

    csv_data_url, source_url = url
    source = Source(dataset, shell_account(),
                    csv_data_url)
    # Analyse the csv data and add it to the source
    # If we don't analyse it we'll be left with a weird message
    source.analysis = analyze_csv(csv_data_url)
    # Check to see if the dataset already has this source
    for source_ in dataset.sources:
        if source_.url == csv_data_url:
            source = source_
            break
    db.session.add(source)
    db.session.commit()

    dataset.generate()
    importer = CSVImporter(source)
    importer.run(**vars(args))

    # Check if imported from the file system (source and data url differ)
    if csv_data_url != source_url:
        # If we did, then we must update the source url based on the
        # sources in the dataset model (so we need to fetch the source again
        # or else we'll add a new one)
        source = Source.by_id(source.id)
        source.url = source_url
        db.session.commit()
Exemplo n.º 2
0
def import_csv(dataset, url, args):
    """
    Import the csv data into the dataset
    """

    csv_data_url, source_url = url
    source = Source(dataset, shell_account(), csv_data_url)
    # Analyse the csv data and add it to the source
    # If we don't analyse it we'll be left with a weird message
    source.analysis = analyze_csv(csv_data_url)
    # Check to see if the dataset already has this source
    for source_ in dataset.sources:
        if source_.url == csv_data_url:
            source = source_
            break
    db.session.add(source)
    db.session.commit()

    dataset.generate()
    importer = CSVImporter(source)
    importer.run(**vars(args))

    # Check if imported from the file system (source and data url differ)
    if csv_data_url != source_url:
        # If we did, then we must update the source url based on the
        # sources in the dataset model (so we need to fetch the source again
        # or else we'll add a new one)
        source = Source.by_id(source.id)
        source.url = source_url
        db.session.commit()
Exemplo n.º 3
0
 def _get_run(self, dataset, source, id):
     self._get_dataset(dataset)
     require.dataset.update(c.dataset)
     c.source = Source.by_id(source)
     if c.source is None or c.source.dataset != c.dataset:
         abort(404, _("There is no source '%s'") % source)
     c.run = Run.by_id(id)
     if c.run is None or c.run.source != c.source:
         abort(404, _("There is no run '%s'") % id)
Exemplo n.º 4
0
def get_run(dataset, source, id):
    dataset = get_dataset(dataset)
    source = obj_or_404(Source.by_id(source))
    if source.dataset != dataset:
        raise BadRequest("There was no source")
    run = obj_or_404(Run.by_id(id))
    if run.source != source:
        raise BadRequest("There is no run %s" % str(id))
    return dataset, source, run
Exemplo n.º 5
0
 def _get_run(self, dataset, source, id):
     self._get_dataset(dataset)
     require.dataset.update(c.dataset)
     c.source = Source.by_id(source)
     if c.source is None or c.source.dataset != c.dataset:
         abort(404, _("There is no source '%s'") % source)
     c.run = Run.by_id(id)
     if c.run is None or c.run.source != c.source:
         abort(404, _("There is no run '%s'") % id)
Exemplo n.º 6
0
def get_run(dataset, source, id):
    dataset = get_dataset(dataset)
    require.dataset.update(dataset)
    source = obj_or_404(Source.by_id(source))
    if source.dataset != dataset:
        raise BadRequest("There was no source")
    run = obj_or_404(Run.by_id(id))
    if run.source != source:
        raise BadRequest("There is no run '" + str(id) + '")
    return dataset, source, run
Exemplo n.º 7
0
def check_column(source_id, columnkey, columnvalue):
    with flask_app.app_context():
        source = Source.by_id(source_id)
        sourcerefine = source.get_or_create_ORProject()
        #should cache this at some point
        sourcefile_export = sourcerefine.refineproj.export()
        #remove BOM from the source file
        s = sourcefile_export.read()
        u = s.decode("utf-8-sig")
        sourcefile = io.BytesIO()
        sourcefile.write(str(u))
        sourcefile_csv = csv.DictReader(sourcefile, delimiter="\t")

        arrayset = []
        for row in sourcefile_csv:
            print row[columnvalue]
            arrayset.append(row[columnvalue])

        sourcefile.close()

        returnval = {"errors": [], "message": "There was an unexpected error"}

        if columnkey == "country_level0":
            temp_geom_countries = db.session.query("country").from_statement(
                text(
                    "SELECT geometry__country_level0.label as country FROM public.geometry__country_level0 "
                )).all()
            geom_countries = [y for x in temp_geom_countries for y in x]
            temp_geom_countries = None

            returnval['message'] = "The following countries were not found:"

            for country in arrayset:
                #there is probably a better method that takes advantage of a sorted list
                if country not in geom_countries:
                    #log as error
                    returnval['errors'].append(country)

        elif columnkey == "time":
            returnval['message'] = "Could not parse the following dates:"
            for date_col in arrayset:
                try:
                    parse(date_col)
                except Exception, e:
                    returnval['errors'].append(date_col)

        elif columnkey == "indicatorvalue":
            returnval['message'] = "Could not parse the following values: "
            for val_col in arrayset:
                try:
                    float(val_col)
                except:
                    returnval['errors'].append(val_col)
Exemplo n.º 8
0
def analyze_source(source_id):
    from openspending.model import meta as db
    from openspending.model.source import Source
    from openspending.importer.analysis import analyze_csv
    source = Source.by_id(source_id)
    if not source:
        log.error("No such source: %s", source_id)
        return
    log.info("Analyzing: %s", source.url)
    source.analysis = analyze_csv(source.url)
    if 'error' in source.analysis:
        log.error(source.analysis.get('error'))
    else:
        log.info("Columns: %r", source.analysis.get('columns'))
    db.session.commit()
Exemplo n.º 9
0
def check_column(source_id, columnkey, columnvalue):
    # with flask_app.app_context():
    source = Source.by_id(source_id)
    sourcerefine = source.get_or_create_ORProject()
    # should cache this at some point
    sourcefile_export = sourcerefine.refineproj.export()
    # remove BOM from the source file
    s = sourcefile_export.read()
    u = s.decode("utf-8-sig")
    sourcefile = io.BytesIO()
    sourcefile.write(str(u))
    sourcefile_csv = csv.DictReader(sourcefile, delimiter="\t")

    arrayset = []
    for row in sourcefile_csv:
        print row[columnvalue]
        arrayset.append(row[columnvalue])

    sourcefile.close()

    returnval = {"errors": [], "message": "There was an unexpected error"}

    if columnkey == "country_level0":
        temp_geom_countries = (
            db.session.query("country")
            .from_statement(
                text("SELECT geometry__country_level0.label as country FROM public.geometry__country_level0 ")
            )
            .all()
        )
        geom_countries = [y for x in temp_geom_countries for y in x]
        temp_geom_countries = None

        returnval["message"] = "The following countries were not found:"

        for country in arrayset:
            # there is probably a better method that takes advantage of a sorted list
            if country not in geom_countries:
                # log as error
                returnval["errors"].append(country)

    elif columnkey == "time":
        returnval["message"] = "Could not parse the following dates:"
        for date_col in arrayset:
            try:
                parse(date_col)
            except Exception, e:
                returnval["errors"].append(date_col)
Exemplo n.º 10
0
def load_source(source_id, sample=False):
    from openspending.model.source import Source
    from openspending.importer import CSVImporter
    source = Source.by_id(source_id)
    if not source:
        log.error("No such source: %s", source_id)

    if not source.loadable:
        log.error("Dataset has no mapping.")
        return

    source.dataset.generate()
    importer = CSVImporter(source)
    if sample:
        importer.run(dry_run=True, max_lines=1000, max_errors=1000)
    else:
        importer.run()
        index_dataset.delay(source.dataset.name)
Exemplo n.º 11
0
def load_source(source_id, sample=False):
    with flask_app.app_context():
        source = Source.by_id(source_id)
        if not source:
            return log.error("No such source: %s", source_id)

        if not source.dataset.mapping:
            return log.error("Dataset has no mapping.")

        #we should drop this first to make sure everything loads corrctly
        source.model.drop()

        source.model.generate()

        importer = ORImporter(source)
        if sample:
            importer.run(dry_run=True, max_lines=1000, max_errors=1000)
        else:
            importer.run()
Exemplo n.º 12
0
def load_source(source_id, sample=False):
    # with flask_app.app_context():
    source = Source.by_id(source_id)
    if not source:
        return log.error("No such source: %s", source_id)

    if not source.dataset.mapping:
        return log.error("Dataset has no mapping.")

    # we should drop this first to make sure everything loads corrctly
    source.model.drop()

    source.model.generate()

    importer = ORImporter(source)
    if sample:
        importer.run(dry_run=True, max_lines=1000, max_errors=1000)
    else:
        importer.run()
Exemplo n.º 13
0
def load_budgetdatapackage(source_id, sample=False):
    """
    Same as the CSV importer except that it uses the BudgetDataPackage
    importer instead of the CSVImporter
    """
    from openspending.model.source import Source
    from openspending.importer import BudgetDataPackageImporter

    source = Source.by_id(source_id)
    if not source:
        log.error("No such source: %s", source_id)

    if not source.loadable:
        log.error("Dataset has no mapping.")
        return

    source.dataset.generate()
    importer = BudgetDataPackageImporter(source)
    if sample:
        importer.run(dry_run=True, max_lines=1000, max_errors=1000)
    else:
        importer.run()
        index_dataset.delay(source.dataset.name)
Exemplo n.º 14
0
 def _get_source(self, dataset, id):
     self._get_dataset(dataset)
     c.source = Source.by_id(id)
     if c.source is None or c.source.dataset != c.dataset:
         abort(404, _("There is no source '%s'") % id)
Exemplo n.º 15
0
 def _get_source(self, dataset, id):
     self._get_dataset(dataset)
     c.source = Source.by_id(id)
     if c.source is None or c.source.dataset != c.dataset:
         abort(404, _("There is no source '%s'") % id)