Exemplo n.º 1
0
    def process_mapping(self, form):
        records = []
        mapping = self.__get_mapping(form)
        Model = get_model(self.app_label, self.model_name)
        with open_csv(self.temp_file_name) as csv:
            if form.cleaned_data['header']:
                csv.next()
            for i, row in enumerate(csv):
                if i > 20 and not form.cleaned_data['preview_all']:
                    break
                try:
                    sample = Model()
                    record, key = self._process_row(row, mapping)
                    exists = key and Model.objects.filter(**key).exists() or False
                    if key and exists:
                        sample = Model.objects.get(**key)
                    else:
                        sample = Model()
                    sample = update_model(self.request, sample, record, mapping)

                    records.append([sample, None, row])
                except (ValidationError, AttributeError), e:
                    records.append([sample, str(e), row])
                except (ValueError, ObjectDoesNotExist, ValidationError), e:
                    messages.error(self.request, '%s' % e)
                    records.append([sample, str(e)])
Exemplo n.º 2
0
def build_map(csvfile_name):
    reader = csv.reader(csvfile)
    header = csv.next()
    map = ColumnMap()
    map.build_from(header)

    return map
Exemplo n.º 3
0
def addDataFromCSV(featureClassPath, csvPath):
    """
    Funcion que consume un CSV para insertar datos en un Feature Class ya disponible en un GDB.
    La funcion comproba si la esquema de CSV es como la esquema de Feature Class sin campos como
    OBJECTID o SHAPE, para poder insertar datos.
    :param featureClassPath: Path de la Feature Class que sera actualizado
    :param csvPath: Path de la CSV con los datos de insertar
    :return: No devuelve nada
    """
    csvColumnNames = []
    featureClasssAttrNames = []
    with open(csvPath, "r") as csv:
        names = csv.next()[:-1].split(",")
        for name in names:
            csvColumnNames.append(name)
    fcFields = arcpy.ListFields(featureClassPath)
    for fieldName in fcFields:
        if fieldName.name != "OBJECTID" and fieldName.name != "SHAPE":
            featureClasssAttrNames.append(fieldName.name.encode("utf-8"))

    if (set(featureClasssAttrNames).issubset(csvColumnNames)) == True:
        insertCursorList = featureClasssAttrNames
        insertCursorList.append("SHAPE@X")
        insertCursorList.append("SHAPE@Y")
        with open(csvPath, "r") as _csv:
            _csv.readline()
            with arcpy.da.InsertCursor(featureClassPath,
                                       insertCursorList) as insertCursor:
                for row in _csv:
                    insertCursor.insertRow(row[:-1].split(","))

    else:
        print "La schema de CSV no es la misma con la schema de Feature Class"
Exemplo n.º 4
0
def getKeys(csv):
    keys = {}
    idx = -1

    for i in csv.next():
        idx += 1
        i = i.lower()
        if "file" in i:
            keys['file'] = idx
        elif "sample" in i:
            keys['name'] = idx
        elif "locus" in i or 'marker' in i:
            keys['locus'] = idx
        elif "call" == i:
            keys.setdefault('call', []).append(idx)
        elif "allele" in i:
            keys.setdefault('call', []).append(idx)
        elif "rfu" == i:
            keys.setdefault('rfu', []).append(idx)
        elif keys.has_key('name') and "allele" in i:
            keys.setdefault('call', []).append(idx)

    if not keys:
        return getKeys(csv)

    return keys
Exemplo n.º 5
0
def parse_types(csv, opt=None):
    types = []
    if opt == 0:
        # data types in dedicated line below the header line
        for t in csv.next():
            items = re.findall(r'\w+', t)
            types.append(tuple((element) for element in items[opt:]))
    if opt == 1:
        csv.rewind(0)
        # data types beside column names,
        # values are delimited by non alphanumerich character, like:
        # id:integer, name-varchar-30, income/float/5
        for t in csv.next():
            items = re.findall(r'\w+', t)
            types.append(tuple((element) for element in items[opt:]))
    if opt == 'default':
        csv.rewind(0)
        for item in csv.next():
            types.append(('text', ))
        csv.rewind(0)
    return types
Exemplo n.º 6
0
def parse_types(csv, opt=None):
    types = []
    if opt == 0:
        # data types in dedicated line below the header line
        for t in csv.next():
            items = re.findall(r'\w+', t)
            types.append(tuple((element) for element in items[opt:]))
    if opt == 1:
        csv.rewind(0)
        # data types beside column names,
        # values are delimited by non alphanumerich character, like:
        # id:integer, name-varchar-30, income/float/5
        for t in csv.next():
            items = re.findall(r'\w+', t)
            types.append(tuple((element) for element in items[opt:]))
    if opt == 'default':
        csv.rewind(0)
        for item in csv.next():
            types.append(('text',))
        csv.rewind(0)
    return types
	def __init__(self, csv):

		self.name = ""
		self.partner_chart_list = []
		self.redemptions = []
		csv.next()
		for row in csv:
			
			if self.name == "":
				self.name = row[0]
				print self.name

			if row[1] not in self.partner_chart_list:
				self.partner_chart_list.append(row[1])

			redemption = Redemption(row)
			
			#print redemption.origin
			self.redemptions.append(redemption)
			
			reverse_redemption = Redemption.fromReverseRedemption(redemption)
			self.redemptions.append(reverse_redemption)
Exemplo n.º 8
0
 def execute(self, inputs=None):
     if not hasattr(self, 'csv_stream'):
         self.open_stream()
     batch_size = self.batch_size
     csv = self.csv_stream
     if batch_size == -1:
         batch = [row for row in csv]
         self.open_stream()
         return batch
     batch = []
     for _ in range(batch_size):
         try:
             batch.append(csv.next())
         except StopIteration:
             self.open_stream()
             return batch
     return batch
Exemplo n.º 9
0
def getKeys(csv):
    keys = {}
    idx = -1
    for i in csv.next():
        idx += 1
        i = i.lower()
        if "file" in i:
            keys['file'] = idx
        elif "sample" in i:
            keys['name'] = idx
        elif "locus" in i or 'marker' in i:
            keys['locus'] = idx
        elif "call" == i:
            keys.setdefault('call', []).append(idx)
        elif "rfu" == i:
            keys.setdefault('rfu', []).append(idx)
        elif keys.has_key('name') and "allele" in i:
            keys.setdefault('call', []).append(idx)
    if not keys:
        return getKeys(csv)
    return keys
Exemplo n.º 10
0
import sys
import pprint
import string
import csv
import json
import geopy
import time

filename = sys.argv[1]
csv = csv.reader(open(sys.argv[1], 'r'), delimiter=',', quotechar='"')

trade_data = { 'countries': {}, 'trade': {} }
could_not_geocode = []

header = csv.next()
g = geopy.geocoders.Google()

# geocoder is failing on the following countries. doing them by hand.
manual_geocodes = {
'Mexico': [22.593726,-101.777344],
'Jamaica': [18.156291,-77.294312],
'Grenada': [12.118551,-61.680679],
'Sint Maarten': [18.083854,-63.052597],
'Svalbard, Jan Mayen Island': [71.008023,-8.421021],
'Georgia': [42.098222,43.395996],
'Gibraltar': [36.13427,-5.347767],
'San Marino': [43.938945,12.463303],
'Yugoslavia (fomer)': [43.850374,19.6875],
'Serbia and Montenegro': [43.084937,19.907227],
'Greece': [39.690281,21.75293],
Exemplo n.º 11
0
import csv
import sqlite3 as sqlite
import numpy as np
import matplotlib.pyplot as plt

import getfile

csvfile = open(getfile.get_from_strawlab("week1/CTS.csv"),'rb')
con = sqlite.connect(':memory:')

with con and csvfile:
    csv = csv.reader(csvfile)
    cur = con.cursor()
    cur.execute("CREATE TABLE CTS(date INTEGER PRIMARY KEY, co2 FLOAT, temp FLOAT);")

    header = csv.next() #save the csv header row
    idx_date = header.index("yr_mn")
    idx_co2 = header.index("CO2")
    idx_temp = header.index("GISS")
    for row in csv:
        cur.execute("INSERT INTO CTS VALUES (?,?,?)", (row[idx_date],row[idx_co2],row[idx_temp]) )

    cur.execute("SELECT date, co2 FROM cts WHERE co2 != 'NA'")
    data = np.array(cur.fetchall())

    plt.plot(data[:,0],data[:,1])
    plt.xlabel("date"); plt.ylabel("CO2 (ppm)")
    plt.show()

Exemplo n.º 12
0
 def _head(self, rows=10):
     with open_csv(self._filename) as csv:
         output = []
         for i in range(rows):
             output.append(csv.next())
     return output
# Created by: Tim Bramlett
# For questions: 
# [email protected] OR
# [email protected]

import csv
import requests
import ast

f = open("test.csv", "r")
csv = csv.reader(f, dialect='excel')
csv.next() # skips the header

# I simply used this in order to not have to hard-code the token in this script.
#   I was writing this on a Windows VM and couldn't remember how to script Env Variables
#   on Windows :)
with open(".secret.txt") as file:
    somewhatSecret = file.read()
    somewhatSecret = ast.literal_eval(somewhatSecret)
    # Note: With statements are a context manager.
    # As such, they do not affect scope.
    # Hence, somewhatSecret is available outside the
    # With statement

headers = {
    'authorization': "Basic YWRtaW46YWRtaW4=",
    'x-cloupia-request-key': somewhatSecret['apikey'],
    'cache-control': "no-cache",
    'postman-token': "0d544a7c-8a76-195f-eb69-404857ad217e"
    }
Exemplo n.º 14
0
'''
Organize raw data on .dat format to run first SOM and have something to play
with
'''

import csv

raw_csv = '/home/dani/AAA/LargeData/WDIandGDF_csv/WDI_GDF_Data.csv'

data = {} # data[cty][year] = {'var_names': [], 'var_values': []

csv = csv.reader(open(raw_csv, 'r'), delimiter = ',', quotechar='"')
years = csv.next()[4:]

variables = []
countries = []
cty2name = {}

for line in csv:
    var, var_name, cty, cty_name, values = line[0], line[1], line[2], line[3], \
            line[4:]
    variables.append(var)
    countries.append(cty)
    if cty not in cty2name:
        cty2name[cty] = cty_name
    if cty not in data:
        data[cty] = {}
    for year in years:
        if year not in data[cty]:
            data[cty][year] = {}
        data[cty][year][var] = values[years.index(year)]
Exemplo n.º 15
0
'''
Organize raw data on .dat format to run first SOM and have something to play
with
'''

import csv

raw_csv = '/home/dani/AAA/LargeData/WDIandGDF_csv/WDI_GDF_Data.csv'

data = {}  # data[cty][year] = {'var_names': [], 'var_values': []

csv = csv.reader(open(raw_csv, 'r'), delimiter=',', quotechar='"')
years = csv.next()[4:]

variables = []
countries = []
cty2name = {}

for line in csv:
    var, var_name, cty, cty_name, values = line[0], line[1], line[2], line[3], \
            line[4:]
    variables.append(var)
    countries.append(cty)
    if cty not in cty2name:
        cty2name[cty] = cty_name
    if cty not in data:
        data[cty] = {}
    for year in years:
        if year not in data[cty]:
            data[cty][year] = {}
        data[cty][year][var] = values[years.index(year)]
Exemplo n.º 16
0
	"delivery": "url",
	"mailsubject": "",
	"mailfrom":"",
	"mailto":"",
	"mailmessage":""
}

r = s.post(url, data=payload)
soup = BeautifulSoup(r.content, 'lxml')
download = soup.find('a', text='Download')

r = s.get('https://spinitron.com' + download.get('href'))

# Set up CSV reader and process the header
csv = csv.reader(r.content.splitlines())
header = csv.next()
artist_ind = header.index("Artist")
album_ind = header.index("Disk")
song_ind = header.index("Song")
 
# Make an empty list
song_list = []
artist_counts = {}
album_counts = {}
song_counts = {}
 
# Loop through the lines in the file and get each coordinate
for row in csv:
	artist = row[artist_ind]
	album = row[album_ind]
	song = row[song_ind]
Exemplo n.º 17
0
import csv
import sys
import re

if len(sys.argv) != 2:
    print "Uso: %s <archivo.csv>" % (sys.argv[0])
    sys.exit()

fd = open(sys.argv[1], "rb")
csv = csv.reader(fd)
paramigrar = []
total = 0
regexp = re.compile("http://guifi.net/node/(\d+)$")

for i in range(6):
    csv.next()

# Las líneas del CSV cuyo 4º campo != 'no', se guardan
try:
    while (True):
        l = csv.next()
        total = total + 1
        try:
            if l[3].lower() != 'no':
                l.pop(3)  #Este campo ya no es útil
                l[0] = regexp.search(l[0]).group(
                    1)  #Sustituimos la url por el nid directamente
                paramigrar.append(l)
        except IndexError:
            print "-- Index Error --", l
except StopIteration:
Exemplo n.º 18
0
    def handle(self, *args: None, **options: Dict[Any, Any]) -> None:
        """Command handle."""
        csv: FileIterator = FileIterator(options["csv_path"][0])
        datas = Data.objects.all().order_by("location__pk")
        counter: Dict[str, int] = {
            "match": 0,
            "csv_only": 0,
            "models_only": 0,
            "hash_mismatch": 0,
        }
        subpath_map: SubpathMap = map_subpath_locations(csv)

        for data in datas.iterator():
            subpath = data.location.subpath
            urls = data.location.files
            urls = urls.exclude(Q(path__endswith="/"))  # exclude directories
            urls = ModelIterator(urls.order_by("path"))

            if subpath not in subpath_map:
                filecount = urls.count
                files = f"({filecount} files)"
                self.stdout.write(f"MODEL-ONLY {subpath}/* {files}")
                counter["models_only"] += filecount
                continue
            subpath_map[subpath]["visited"] = True

            csv.restrict(
                start=subpath_map[subpath]["start"],
                end=subpath_map[subpath]["end"],
            )
            csv.seek_relative(0)

            next_in_models, model_hash = urls.next()
            next_in_csv, csv_hash = csv.next()
            while next_in_csv and next_in_models:
                if next_in_models == next_in_csv:
                    # entries match, verify checksum
                    if model_hash == csv_hash:
                        counter["match"] += 1
                    else:
                        fullpath = f"{subpath}/{next_in_models}"
                        hashes = f"{model_hash} != {csv_hash}"
                        self.stdout.write(f"HASH {fullpath} {hashes}")
                        counter["hash_mismatch"] += 1
                    # advance both
                    next_in_models, model_hash = urls.next()
                    next_in_csv, csv_hash = csv.next()
                elif next_in_models < next_in_csv or not csv.has_next():
                    # entries are missing in CSV
                    # (models are alphabetically *behind*)
                    fullpath = subpath + "/" + next_in_models
                    self.stdout.write(f"MODEL-ONLY {fullpath}")
                    counter["models_only"] += 1
                    next_in_models, model_hash = urls.next()  # advance models
                elif next_in_models > next_in_csv or not urls.has_next():
                    # entries are missing in models
                    # (models are alphabetically *ahead*)
                    fullpath = subpath + "/" + next_in_csv
                    self.stdout.write(f"CSV-ONLY {fullpath}")
                    counter["csv_only"] += 1
                    next_in_csv, csv_hash = csv.next()  # advance CSV

            # either (or both) of the iterators is finished,
            # now we need to exhaust the other
            while next_in_csv:
                self.stdout.write(f"CSV-ONLY {subpath}/{next_in_csv}")
                counter["csv_only"] += 1
                next_in_csv, csv_hash = csv.next()
            while next_in_models:
                self.stdout.write(f"MODEL-ONLY {subpath}/{next_in_models}")
                counter["models_only"] += 1
                next_in_models, model_hash = urls.next()

        # list all subpaths from CSV that we haven't visited
        # while traversing models' data
        for subpath in subpath_map:
            if "visited" not in subpath_map[subpath]:
                filecount = subpath_map[subpath]["linecount"]
                self.stdout.write(f"CSV-ONLY {subpath}/* ({filecount} files)")
                counter["csv_only"] += filecount

        # print an overview/summary
        out = ""
        out += f"{counter['match']} files OK"
        if counter["csv_only"] != 0:
            out += f", {counter['csv_only']} files in CSV only"
        if counter["models_only"] != 0:
            out += f", {counter['models_only']} files in models only"
        if counter["hash_mismatch"] != 0:
            out += f", {counter['hash_mismatch']} files do not match the hash"
        self.stdout.write(out)

        # double check the numbers just in case
        ReferencedPath_count = ReferencedPath.objects.exclude(
            Q(path__endswith="/")
        ).count()
        self.stdout.write(f"CSV length = {csv.length}")
        self.stdout.write(f"ReferencedPath count = {ReferencedPath_count}")

        matches = counter["hash_mismatch"] + counter["match"]
        csv_records = matches + counter["csv_only"]
        models_records = matches + counter["models_only"]
        # this should never happen, but it's better to check,
        # just because it's so easy to do
        if csv_records != csv.length:
            self.stdout.write(
                "Numbers don't add up."
                " OK + csv_only + hash_mismatch != CSV.line_count."
            )
        if models_records != ReferencedPath_count:
            self.stdout.write(
                "Numbers don't add up."
                " OK + models_only + hash_mismatch != ReferencedPath_count."
                " There might be orphaned ReferencedPaths."
            )
Exemplo n.º 19
0
import csv
import sys
import re

if len(sys.argv) != 2:
	print "Uso: %s <archivo.csv>" %(sys.argv[0])
	sys.exit()

fd = open(sys.argv[1], "rb")
csv = csv.reader(fd)
paramigrar = []
total = 0
regexp = re.compile("http://guifi.net/node/(\d+)$")

for i in range(6):
	csv.next()

# Las líneas del CSV cuyo 4º campo != 'no', se guardan
try:
	while(True):
		l = csv.next()
		total = total + 1
		try:
			if l[3].lower() != 'no':
				l.pop(3) #Este campo ya no es útil
				l[0] = regexp.search(l[0]).group(1) #Sustituimos la url por el nid directamente
				paramigrar.append(l)
		except IndexError:
			print "-- Index Error --", l
except StopIteration:
	pass
Exemplo n.º 20
0
    start = timeit.default_timer()
    #     pool = Pool()

    fileName = sys.argv[1]

    MN = int(sys.argv[2])
    UN = int(sys.argv[3])
    F = int(sys.argv[4])
    ITERATIONS = int(sys.argv[5])

    rawR = []
    onlyUid = []
    onlyMovieID = []

    csv = open(fileName, 'r')
    csv.next()
    for row in csv:
        eachR = row.split(',')

        eachR.pop(3)
        onlyUid.append(int(eachR[0]))
        onlyMovieID.append(int(eachR[1]))

        rawR.append(map(float, eachR))

    onlyUid.sort()
    onlyMovieID.sort()

    onlyUid = list(set(onlyUid))
    onlyUid.sort()
Exemplo n.º 21
0
import argparse
import csv

parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('-c','--csv', help='Load events from csv.', action='store_false')
parser.add_argument('-r','--random', help='Send random events. (will use a list otherwise)', action='store_true')
parser.add_argument('-l','--loop', help='Loop events sequence.', action='store_true')

args = parser.parse_args()

csv_data = []
# columns that must be converted to float
float_indexes = [0,2,8,9,10,13,14,15,16,19,25,26,27,30,31,32,33,36,42,43,44,47,48,49,50]
with open('data_files/smalley.csv', 'rb') as csvfile:
    csv = csv.reader(csvfile, delimiter=',', quotechar='"')
    csv.next() # first row
    csv.next() # and second row are headers
    for row in csv:
        for i, val in enumerate(row):
            if i in float_indexes:
                try:
                    row[i] = float(row[i].replace(",", "."))
                except:
                    row[i] = 0

        csv_data.append(row)


osc_port = 57120 #default SuperCollider port (must be open before executing this program)
# just some easy ansi colors printing: 'o' for ok, 'w' for warning, 'e' for error.
def printc(t, c='o'):
Exemplo n.º 22
0
 def _head(self, rows=10):
     with open_csv(self._filename) as csv:
         output = []
         for i in range(rows):
             output.append(csv.next())
     return output
Exemplo n.º 23
0
    def command(cls, config_ini, options, submissions_csv_filepath):

        # Inventive CSV. Columns:
        # applicationnumber, applicationdate, jobrole, laname, officerauthorised, theme, responsedate, acceptancestatus, odicertificateurl, dguurl, inventoryurl, localcodes, dataseturl, schemaurl, guidanceurl, frequencyofpublishing, foinumberest, submissioncomplete, lastlaupdate, techreviewstatus, lasttechupdate, adminreviewstatus, paymentamount, closed, lastadminupdate, applicantnotes, administrationnotes, technicalnotes, lastupdated
        with open(submissions_csv_filepath, 'rb') as f:
            csv = UnicodeCsvReader(f, encoding='iso-8859-1')
            header = csv.next()
            header = [col_name.strip().lower().replace(' ', '_') for col_name in header]
            Submission = namedtuple('Submission', header)
            submissions = [Submission(*row) for row in csv]

        if config_ini:
            # this is only for when running from the command-line
            #print 'Loading CKAN config...'
            common.load_config(config_ini)
            common.register_translator()
            #print '...done'

        from ckan import model
        from ckan.plugins import toolkit
        from ckanext.dgu.lib import helpers as dgu_helpers
        from ckanext.dgu.model.schema_codelist import Schema

        log = __import__('logging').getLogger(__name__)

        # Match the organizations in the submissions
        lga_orgs_by_dgu_org_name = {}
        accepted_submission_dgu_orgs = set()
        for submission in submissions:
            la_title = la_map.get(submission.laname, submission.laname)
            org = model.Session.query(model.Group) \
                       .filter_by(title=la_title) \
                       .first()
            assert org, 'Submission org title not found: %r' % la_title
            lga_orgs_by_dgu_org_name[org.name] = submission.laname
            if submission.acceptancestatus == 'Accepted':
                accepted_submission_dgu_orgs.add(org.name)

        stats = Stats()
        stats_incentive = Stats()
        results = []

        if options.write:
            rev = model.repo.new_revision()
            rev.author = 'script-%s.py' % __file__

        # Iterate over organizations
        if options.dataset:
            dataset = toolkit.get_action('package_show')(data_dict={'id': options.dataset})
            org_names = [dataset['organization']['name']]
        elif options.organization:
            org_names = [options.organization]
        elif options.incentive_only:
            org_names = sorted(accepted_submission_dgu_orgs)
        else:
            org_names = dgu_helpers.all_la_org_names()
        #print '%s organizations' % len(org_names)
        for org_name in org_names:
            org_title = model.Group.by_name(org_name).title
            lga_org = lga_orgs_by_dgu_org_name.get(org_name)

            # Iterate over the schemas
            if options.schema:
                schema = all_schemas_by_dgu_name[options.schema]
                if options.incentive_only and not schema.lga_name:
                    # not an incentive schema, so no results
                    schemas = []
                elif options.incentive_only:
                    schemas = [all_schemas_by_lga_name[submission.theme]
                               for submission in submissions
                               if submission.laname == lga_org
                               and submission.theme == schema.lga_name
                               and submission.acceptancestatus == 'Accepted']
                else:
                    schemas = [all_schemas_by_lga_name.get(
                               options.schema,
                               schema)]
            elif options.incentive_only:
                schemas = [all_schemas_by_lga_name[submission.theme]
                           for submission in submissions
                           if submission.laname == lga_org
                           and submission.acceptancestatus == 'Accepted']
            else:
                schemas = all_schemas
            #print '%s schemas' % len(schemas)
            for schema in schemas:

                # Find the relevant incentive submission
                if lga_org:
                    for submission in submissions:
                        if submission.laname == lga_org and \
                                submission.theme == schema.lga_name:
                            break
                    else:
                        submission = None
                else:
                    submission = None

                result = dict(
                    org_name=org_name,
                    org_title=org_title,
                    org_name_lga=submission.laname if submission else '',
                    schema_dgu_title=schema.dgu_schema_name,
                    schema_lga=schema.lga_name,
                    lga_application_number=submission.applicationnumber if submission else '',
                    lga_application_acceptance_status=submission.acceptancestatus if submission else '',
                    dataset_names=[],
                    dataset_titles=[],
                    dataset_schema_applied=[],
                    )

                stat_id = '%s %s' % (org_name, schema.lga_name)
                if submission:
                    stat_id += ' %s' % submission.applicationnumber

                def add_datasets_to_results(datasets, result):
                    for dataset in datasets:
                        if dataset['name'] not in result['dataset_names']:
                            result['dataset_names'].append(dataset['name'])
                            result['dataset_titles'].append(dataset['title'])
                            schema_applied = True if schema.dgu_schema_name in \
                                [s['title'] for s in dataset.get('schema', [])] \
                                else False
                            result['dataset_schema_applied'].append(schema_applied)
                            if not schema_applied and options.write:
                                pkg = model.Package.get(dataset['name'])
                                schema_obj = Schema.by_title(schema.dgu_schema_name)
                                assert schema_obj, schema.dgu_schema_name
                                try:
                                    schema_ids = json.loads(pkg.extras.get('schema') or '[]')
                                except ValueError:
                                    log.error('Not valid JSON in schema field: %s %r',
                                              dataset['name'], pkg.extras.get('schema'))
                                    schema_ids = []
                                schema_ids.append(schema_obj.id)
                                pkg.extras['schema'] = json.dumps(schema_ids)

                # Already a schema?
                data_dict = {'fq': 'publisher:%s ' % org_name +
                                   'schema_multi:"%s"' % schema.dgu_schema_name}
                datasets = toolkit.get_action('package_search')(data_dict=data_dict)
                if datasets['count'] > 0:
                    add_datasets_to_results(datasets['results'], result)
                    stats.add('OK - Dataset with schema',
                              stat_id + ' %s' % ';'.join(result['dataset_names']))
                    found_schema = True
                else:
                    found_schema = False

                # Submission specifies DGU dataset
                if submission and submission.dguurl:
                    match = re.match('http://data.gov.uk/dataset/(.*)', submission.dguurl)
                    if match:
                        dataset_name = dataset_name_original = match.groups()[0]
                        # some have trailing /
                        dataset_name = dataset_name.strip('/')
                        # hampshire have a hash appended
                        if '#' in dataset_name:
                            dataset_name = dataset_name.split('#')[0]
                        # poole have a resource name appended
                        if '/resource' in dataset_name:
                            dataset_name = dataset_name.split('/resource')[0]
                        # manual corrections
                        if dataset_name in dataset_name_corrections:
                            dataset_name = dataset_name_corrections[dataset_name]
                        dataset = model.Package.by_name(dataset_name)
                        # salford ones added a '1'
                        if not dataset:
                            dataset = model.Package.by_name(dataset_name + '1')
                            if dataset:
                                dataset_name += '1'

                        if dataset and dataset.state == 'active':
                            dataset_dict = toolkit.get_action('package_show')(data_dict={'id': dataset.id})
                            add_datasets_to_results([dataset_dict], result)
                            if dataset_name != dataset_name_original:
                                stats_incentive.add('OK - DGU Dataset listed and with corrections it checks out',
                                          stat_id + ' %s' % dataset_name)
                            else:
                                stats_incentive.add('OK - DGU Dataset listed and it checks out',
                                          stat_id + ' %s' % dataset_name)
                        elif dataset:
                            stats_incentive.add('ERROR - DGU Dataset listed BUT it is deleted!',
                                            '%s %s' % (stat_id, submission.dguurl))
                        else:
                            stats_incentive.add('ERROR - DGU Dataset listed BUT it is not found',
                                            '%s %s' % (stat_id, submission.dguurl))
                    else:
                        stats_incentive.add('ERROR - DGU Dataset listed BUT the URL is not the correct format',
                                        '%s %s' % (stat_id, submission.dguurl))

                # Submission mentions dataset on LA site - maybe it is in DGU already?
                elif submission and submission.dataseturl:
                    datasets = model.Session.query(model.Package) \
                                    .join(model.ResourceGroup) \
                                    .join(model.Resource) \
                                    .filter(model.Resource.url==submission.dataseturl) \
                                    .filter(model.Package.state=='active') \
                                    .filter(model.Resource.state=='active') \
                                    .all()
                    dataset_dicts = [
                        toolkit.get_action('package_show')(data_dict={'id': dataset.id})
                        for dataset in datasets]
                    add_datasets_to_results(dataset_dicts, result)
                    if len(datasets) > 1:
                        stats_incentive.add('No DGU Dataset, but Dataset URL matches multiple DGU datasets',
                                            '%s %s' % (stat_id, datasets[0].name))
                    elif len(datasets) == 0:
                        stats_incentive.add('No DGU Dataset and Dataset URL not found on DGU',
                                            stat_id)
                    else:
                        stats_incentive.add('No DGU Dataset, but Dataset URL matches DGU dataset',
                                            '%s %s' % (stat_id, datasets[0].name))

                # Search for datasets in the catalogue
                datasets = cls.find_dataset_for_schema(schema=schema, org_name=org_name)
                if datasets is None:
                    if not found_schema:
                        stats.add('Search revealed none', stat_id)
                elif len(datasets) > 1:
                    add_datasets_to_results(datasets, result)
                    if not found_schema:
                        stats.add('Found datasets (multiple) in search', '%s %r' % (stat_id, [d['name'] for d in datasets]))
                elif datasets:
                    add_datasets_to_results(datasets, result)
                    if not found_schema:
                        stats.add('Found dataset in search', '%s %s' % (stat_id, datasets[0]['name']))
                else:
                    if not found_schema:
                        stats.add('No dataset for submission', stat_id)

                results.append(result)

        rows_with_datasets_count = \
            len([result for result in results
                 if any(result['dataset_schema_applied'])])
        rows_with_datasets_or_candidate_datasets_count = \
            len([result for result in results
                 if result['dataset_schema_applied']])

        if options.print_:
            print '\n Incentive stats\n' + stats_incentive.report()
            print '\n Overall stats\n' + stats.report()

        if options.write:
            print 'Writing'
            model.Session.commit()

        return {'table': results,
                'rows_with_datasets_count': rows_with_datasets_count,
                'rows_with_datasets_or_candidate_datasets_count': rows_with_datasets_or_candidate_datasets_count}