def process_mapping(self, form): records = [] mapping = self.__get_mapping(form) Model = get_model(self.app_label, self.model_name) with open_csv(self.temp_file_name) as csv: if form.cleaned_data['header']: csv.next() for i, row in enumerate(csv): if i > 20 and not form.cleaned_data['preview_all']: break try: sample = Model() record, key = self._process_row(row, mapping) exists = key and Model.objects.filter(**key).exists() or False if key and exists: sample = Model.objects.get(**key) else: sample = Model() sample = update_model(self.request, sample, record, mapping) records.append([sample, None, row]) except (ValidationError, AttributeError), e: records.append([sample, str(e), row]) except (ValueError, ObjectDoesNotExist, ValidationError), e: messages.error(self.request, '%s' % e) records.append([sample, str(e)])
def build_map(csvfile_name): reader = csv.reader(csvfile) header = csv.next() map = ColumnMap() map.build_from(header) return map
def addDataFromCSV(featureClassPath, csvPath): """ Funcion que consume un CSV para insertar datos en un Feature Class ya disponible en un GDB. La funcion comproba si la esquema de CSV es como la esquema de Feature Class sin campos como OBJECTID o SHAPE, para poder insertar datos. :param featureClassPath: Path de la Feature Class que sera actualizado :param csvPath: Path de la CSV con los datos de insertar :return: No devuelve nada """ csvColumnNames = [] featureClasssAttrNames = [] with open(csvPath, "r") as csv: names = csv.next()[:-1].split(",") for name in names: csvColumnNames.append(name) fcFields = arcpy.ListFields(featureClassPath) for fieldName in fcFields: if fieldName.name != "OBJECTID" and fieldName.name != "SHAPE": featureClasssAttrNames.append(fieldName.name.encode("utf-8")) if (set(featureClasssAttrNames).issubset(csvColumnNames)) == True: insertCursorList = featureClasssAttrNames insertCursorList.append("SHAPE@X") insertCursorList.append("SHAPE@Y") with open(csvPath, "r") as _csv: _csv.readline() with arcpy.da.InsertCursor(featureClassPath, insertCursorList) as insertCursor: for row in _csv: insertCursor.insertRow(row[:-1].split(",")) else: print "La schema de CSV no es la misma con la schema de Feature Class"
def getKeys(csv): keys = {} idx = -1 for i in csv.next(): idx += 1 i = i.lower() if "file" in i: keys['file'] = idx elif "sample" in i: keys['name'] = idx elif "locus" in i or 'marker' in i: keys['locus'] = idx elif "call" == i: keys.setdefault('call', []).append(idx) elif "allele" in i: keys.setdefault('call', []).append(idx) elif "rfu" == i: keys.setdefault('rfu', []).append(idx) elif keys.has_key('name') and "allele" in i: keys.setdefault('call', []).append(idx) if not keys: return getKeys(csv) return keys
def parse_types(csv, opt=None): types = [] if opt == 0: # data types in dedicated line below the header line for t in csv.next(): items = re.findall(r'\w+', t) types.append(tuple((element) for element in items[opt:])) if opt == 1: csv.rewind(0) # data types beside column names, # values are delimited by non alphanumerich character, like: # id:integer, name-varchar-30, income/float/5 for t in csv.next(): items = re.findall(r'\w+', t) types.append(tuple((element) for element in items[opt:])) if opt == 'default': csv.rewind(0) for item in csv.next(): types.append(('text', )) csv.rewind(0) return types
def parse_types(csv, opt=None): types = [] if opt == 0: # data types in dedicated line below the header line for t in csv.next(): items = re.findall(r'\w+', t) types.append(tuple((element) for element in items[opt:])) if opt == 1: csv.rewind(0) # data types beside column names, # values are delimited by non alphanumerich character, like: # id:integer, name-varchar-30, income/float/5 for t in csv.next(): items = re.findall(r'\w+', t) types.append(tuple((element) for element in items[opt:])) if opt == 'default': csv.rewind(0) for item in csv.next(): types.append(('text',)) csv.rewind(0) return types
def __init__(self, csv): self.name = "" self.partner_chart_list = [] self.redemptions = [] csv.next() for row in csv: if self.name == "": self.name = row[0] print self.name if row[1] not in self.partner_chart_list: self.partner_chart_list.append(row[1]) redemption = Redemption(row) #print redemption.origin self.redemptions.append(redemption) reverse_redemption = Redemption.fromReverseRedemption(redemption) self.redemptions.append(reverse_redemption)
def execute(self, inputs=None): if not hasattr(self, 'csv_stream'): self.open_stream() batch_size = self.batch_size csv = self.csv_stream if batch_size == -1: batch = [row for row in csv] self.open_stream() return batch batch = [] for _ in range(batch_size): try: batch.append(csv.next()) except StopIteration: self.open_stream() return batch return batch
def getKeys(csv): keys = {} idx = -1 for i in csv.next(): idx += 1 i = i.lower() if "file" in i: keys['file'] = idx elif "sample" in i: keys['name'] = idx elif "locus" in i or 'marker' in i: keys['locus'] = idx elif "call" == i: keys.setdefault('call', []).append(idx) elif "rfu" == i: keys.setdefault('rfu', []).append(idx) elif keys.has_key('name') and "allele" in i: keys.setdefault('call', []).append(idx) if not keys: return getKeys(csv) return keys
import sys import pprint import string import csv import json import geopy import time filename = sys.argv[1] csv = csv.reader(open(sys.argv[1], 'r'), delimiter=',', quotechar='"') trade_data = { 'countries': {}, 'trade': {} } could_not_geocode = [] header = csv.next() g = geopy.geocoders.Google() # geocoder is failing on the following countries. doing them by hand. manual_geocodes = { 'Mexico': [22.593726,-101.777344], 'Jamaica': [18.156291,-77.294312], 'Grenada': [12.118551,-61.680679], 'Sint Maarten': [18.083854,-63.052597], 'Svalbard, Jan Mayen Island': [71.008023,-8.421021], 'Georgia': [42.098222,43.395996], 'Gibraltar': [36.13427,-5.347767], 'San Marino': [43.938945,12.463303], 'Yugoslavia (fomer)': [43.850374,19.6875], 'Serbia and Montenegro': [43.084937,19.907227], 'Greece': [39.690281,21.75293],
import csv import sqlite3 as sqlite import numpy as np import matplotlib.pyplot as plt import getfile csvfile = open(getfile.get_from_strawlab("week1/CTS.csv"),'rb') con = sqlite.connect(':memory:') with con and csvfile: csv = csv.reader(csvfile) cur = con.cursor() cur.execute("CREATE TABLE CTS(date INTEGER PRIMARY KEY, co2 FLOAT, temp FLOAT);") header = csv.next() #save the csv header row idx_date = header.index("yr_mn") idx_co2 = header.index("CO2") idx_temp = header.index("GISS") for row in csv: cur.execute("INSERT INTO CTS VALUES (?,?,?)", (row[idx_date],row[idx_co2],row[idx_temp]) ) cur.execute("SELECT date, co2 FROM cts WHERE co2 != 'NA'") data = np.array(cur.fetchall()) plt.plot(data[:,0],data[:,1]) plt.xlabel("date"); plt.ylabel("CO2 (ppm)") plt.show()
def _head(self, rows=10): with open_csv(self._filename) as csv: output = [] for i in range(rows): output.append(csv.next()) return output
# Created by: Tim Bramlett # For questions: # [email protected] OR # [email protected] import csv import requests import ast f = open("test.csv", "r") csv = csv.reader(f, dialect='excel') csv.next() # skips the header # I simply used this in order to not have to hard-code the token in this script. # I was writing this on a Windows VM and couldn't remember how to script Env Variables # on Windows :) with open(".secret.txt") as file: somewhatSecret = file.read() somewhatSecret = ast.literal_eval(somewhatSecret) # Note: With statements are a context manager. # As such, they do not affect scope. # Hence, somewhatSecret is available outside the # With statement headers = { 'authorization': "Basic YWRtaW46YWRtaW4=", 'x-cloupia-request-key': somewhatSecret['apikey'], 'cache-control': "no-cache", 'postman-token': "0d544a7c-8a76-195f-eb69-404857ad217e" }
''' Organize raw data on .dat format to run first SOM and have something to play with ''' import csv raw_csv = '/home/dani/AAA/LargeData/WDIandGDF_csv/WDI_GDF_Data.csv' data = {} # data[cty][year] = {'var_names': [], 'var_values': [] csv = csv.reader(open(raw_csv, 'r'), delimiter = ',', quotechar='"') years = csv.next()[4:] variables = [] countries = [] cty2name = {} for line in csv: var, var_name, cty, cty_name, values = line[0], line[1], line[2], line[3], \ line[4:] variables.append(var) countries.append(cty) if cty not in cty2name: cty2name[cty] = cty_name if cty not in data: data[cty] = {} for year in years: if year not in data[cty]: data[cty][year] = {} data[cty][year][var] = values[years.index(year)]
''' Organize raw data on .dat format to run first SOM and have something to play with ''' import csv raw_csv = '/home/dani/AAA/LargeData/WDIandGDF_csv/WDI_GDF_Data.csv' data = {} # data[cty][year] = {'var_names': [], 'var_values': [] csv = csv.reader(open(raw_csv, 'r'), delimiter=',', quotechar='"') years = csv.next()[4:] variables = [] countries = [] cty2name = {} for line in csv: var, var_name, cty, cty_name, values = line[0], line[1], line[2], line[3], \ line[4:] variables.append(var) countries.append(cty) if cty not in cty2name: cty2name[cty] = cty_name if cty not in data: data[cty] = {} for year in years: if year not in data[cty]: data[cty][year] = {} data[cty][year][var] = values[years.index(year)]
"delivery": "url", "mailsubject": "", "mailfrom":"", "mailto":"", "mailmessage":"" } r = s.post(url, data=payload) soup = BeautifulSoup(r.content, 'lxml') download = soup.find('a', text='Download') r = s.get('https://spinitron.com' + download.get('href')) # Set up CSV reader and process the header csv = csv.reader(r.content.splitlines()) header = csv.next() artist_ind = header.index("Artist") album_ind = header.index("Disk") song_ind = header.index("Song") # Make an empty list song_list = [] artist_counts = {} album_counts = {} song_counts = {} # Loop through the lines in the file and get each coordinate for row in csv: artist = row[artist_ind] album = row[album_ind] song = row[song_ind]
import csv import sys import re if len(sys.argv) != 2: print "Uso: %s <archivo.csv>" % (sys.argv[0]) sys.exit() fd = open(sys.argv[1], "rb") csv = csv.reader(fd) paramigrar = [] total = 0 regexp = re.compile("http://guifi.net/node/(\d+)$") for i in range(6): csv.next() # Las líneas del CSV cuyo 4º campo != 'no', se guardan try: while (True): l = csv.next() total = total + 1 try: if l[3].lower() != 'no': l.pop(3) #Este campo ya no es útil l[0] = regexp.search(l[0]).group( 1) #Sustituimos la url por el nid directamente paramigrar.append(l) except IndexError: print "-- Index Error --", l except StopIteration:
def handle(self, *args: None, **options: Dict[Any, Any]) -> None: """Command handle.""" csv: FileIterator = FileIterator(options["csv_path"][0]) datas = Data.objects.all().order_by("location__pk") counter: Dict[str, int] = { "match": 0, "csv_only": 0, "models_only": 0, "hash_mismatch": 0, } subpath_map: SubpathMap = map_subpath_locations(csv) for data in datas.iterator(): subpath = data.location.subpath urls = data.location.files urls = urls.exclude(Q(path__endswith="/")) # exclude directories urls = ModelIterator(urls.order_by("path")) if subpath not in subpath_map: filecount = urls.count files = f"({filecount} files)" self.stdout.write(f"MODEL-ONLY {subpath}/* {files}") counter["models_only"] += filecount continue subpath_map[subpath]["visited"] = True csv.restrict( start=subpath_map[subpath]["start"], end=subpath_map[subpath]["end"], ) csv.seek_relative(0) next_in_models, model_hash = urls.next() next_in_csv, csv_hash = csv.next() while next_in_csv and next_in_models: if next_in_models == next_in_csv: # entries match, verify checksum if model_hash == csv_hash: counter["match"] += 1 else: fullpath = f"{subpath}/{next_in_models}" hashes = f"{model_hash} != {csv_hash}" self.stdout.write(f"HASH {fullpath} {hashes}") counter["hash_mismatch"] += 1 # advance both next_in_models, model_hash = urls.next() next_in_csv, csv_hash = csv.next() elif next_in_models < next_in_csv or not csv.has_next(): # entries are missing in CSV # (models are alphabetically *behind*) fullpath = subpath + "/" + next_in_models self.stdout.write(f"MODEL-ONLY {fullpath}") counter["models_only"] += 1 next_in_models, model_hash = urls.next() # advance models elif next_in_models > next_in_csv or not urls.has_next(): # entries are missing in models # (models are alphabetically *ahead*) fullpath = subpath + "/" + next_in_csv self.stdout.write(f"CSV-ONLY {fullpath}") counter["csv_only"] += 1 next_in_csv, csv_hash = csv.next() # advance CSV # either (or both) of the iterators is finished, # now we need to exhaust the other while next_in_csv: self.stdout.write(f"CSV-ONLY {subpath}/{next_in_csv}") counter["csv_only"] += 1 next_in_csv, csv_hash = csv.next() while next_in_models: self.stdout.write(f"MODEL-ONLY {subpath}/{next_in_models}") counter["models_only"] += 1 next_in_models, model_hash = urls.next() # list all subpaths from CSV that we haven't visited # while traversing models' data for subpath in subpath_map: if "visited" not in subpath_map[subpath]: filecount = subpath_map[subpath]["linecount"] self.stdout.write(f"CSV-ONLY {subpath}/* ({filecount} files)") counter["csv_only"] += filecount # print an overview/summary out = "" out += f"{counter['match']} files OK" if counter["csv_only"] != 0: out += f", {counter['csv_only']} files in CSV only" if counter["models_only"] != 0: out += f", {counter['models_only']} files in models only" if counter["hash_mismatch"] != 0: out += f", {counter['hash_mismatch']} files do not match the hash" self.stdout.write(out) # double check the numbers just in case ReferencedPath_count = ReferencedPath.objects.exclude( Q(path__endswith="/") ).count() self.stdout.write(f"CSV length = {csv.length}") self.stdout.write(f"ReferencedPath count = {ReferencedPath_count}") matches = counter["hash_mismatch"] + counter["match"] csv_records = matches + counter["csv_only"] models_records = matches + counter["models_only"] # this should never happen, but it's better to check, # just because it's so easy to do if csv_records != csv.length: self.stdout.write( "Numbers don't add up." " OK + csv_only + hash_mismatch != CSV.line_count." ) if models_records != ReferencedPath_count: self.stdout.write( "Numbers don't add up." " OK + models_only + hash_mismatch != ReferencedPath_count." " There might be orphaned ReferencedPaths." )
import csv import sys import re if len(sys.argv) != 2: print "Uso: %s <archivo.csv>" %(sys.argv[0]) sys.exit() fd = open(sys.argv[1], "rb") csv = csv.reader(fd) paramigrar = [] total = 0 regexp = re.compile("http://guifi.net/node/(\d+)$") for i in range(6): csv.next() # Las líneas del CSV cuyo 4º campo != 'no', se guardan try: while(True): l = csv.next() total = total + 1 try: if l[3].lower() != 'no': l.pop(3) #Este campo ya no es útil l[0] = regexp.search(l[0]).group(1) #Sustituimos la url por el nid directamente paramigrar.append(l) except IndexError: print "-- Index Error --", l except StopIteration: pass
start = timeit.default_timer() # pool = Pool() fileName = sys.argv[1] MN = int(sys.argv[2]) UN = int(sys.argv[3]) F = int(sys.argv[4]) ITERATIONS = int(sys.argv[5]) rawR = [] onlyUid = [] onlyMovieID = [] csv = open(fileName, 'r') csv.next() for row in csv: eachR = row.split(',') eachR.pop(3) onlyUid.append(int(eachR[0])) onlyMovieID.append(int(eachR[1])) rawR.append(map(float, eachR)) onlyUid.sort() onlyMovieID.sort() onlyUid = list(set(onlyUid)) onlyUid.sort()
import argparse import csv parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('-c','--csv', help='Load events from csv.', action='store_false') parser.add_argument('-r','--random', help='Send random events. (will use a list otherwise)', action='store_true') parser.add_argument('-l','--loop', help='Loop events sequence.', action='store_true') args = parser.parse_args() csv_data = [] # columns that must be converted to float float_indexes = [0,2,8,9,10,13,14,15,16,19,25,26,27,30,31,32,33,36,42,43,44,47,48,49,50] with open('data_files/smalley.csv', 'rb') as csvfile: csv = csv.reader(csvfile, delimiter=',', quotechar='"') csv.next() # first row csv.next() # and second row are headers for row in csv: for i, val in enumerate(row): if i in float_indexes: try: row[i] = float(row[i].replace(",", ".")) except: row[i] = 0 csv_data.append(row) osc_port = 57120 #default SuperCollider port (must be open before executing this program) # just some easy ansi colors printing: 'o' for ok, 'w' for warning, 'e' for error. def printc(t, c='o'):
def _head(self, rows=10): with open_csv(self._filename) as csv: output = [] for i in range(rows): output.append(csv.next()) return output
def command(cls, config_ini, options, submissions_csv_filepath): # Inventive CSV. Columns: # applicationnumber, applicationdate, jobrole, laname, officerauthorised, theme, responsedate, acceptancestatus, odicertificateurl, dguurl, inventoryurl, localcodes, dataseturl, schemaurl, guidanceurl, frequencyofpublishing, foinumberest, submissioncomplete, lastlaupdate, techreviewstatus, lasttechupdate, adminreviewstatus, paymentamount, closed, lastadminupdate, applicantnotes, administrationnotes, technicalnotes, lastupdated with open(submissions_csv_filepath, 'rb') as f: csv = UnicodeCsvReader(f, encoding='iso-8859-1') header = csv.next() header = [col_name.strip().lower().replace(' ', '_') for col_name in header] Submission = namedtuple('Submission', header) submissions = [Submission(*row) for row in csv] if config_ini: # this is only for when running from the command-line #print 'Loading CKAN config...' common.load_config(config_ini) common.register_translator() #print '...done' from ckan import model from ckan.plugins import toolkit from ckanext.dgu.lib import helpers as dgu_helpers from ckanext.dgu.model.schema_codelist import Schema log = __import__('logging').getLogger(__name__) # Match the organizations in the submissions lga_orgs_by_dgu_org_name = {} accepted_submission_dgu_orgs = set() for submission in submissions: la_title = la_map.get(submission.laname, submission.laname) org = model.Session.query(model.Group) \ .filter_by(title=la_title) \ .first() assert org, 'Submission org title not found: %r' % la_title lga_orgs_by_dgu_org_name[org.name] = submission.laname if submission.acceptancestatus == 'Accepted': accepted_submission_dgu_orgs.add(org.name) stats = Stats() stats_incentive = Stats() results = [] if options.write: rev = model.repo.new_revision() rev.author = 'script-%s.py' % __file__ # Iterate over organizations if options.dataset: dataset = toolkit.get_action('package_show')(data_dict={'id': options.dataset}) org_names = [dataset['organization']['name']] elif options.organization: org_names = [options.organization] elif options.incentive_only: org_names = sorted(accepted_submission_dgu_orgs) else: org_names = dgu_helpers.all_la_org_names() #print '%s organizations' % len(org_names) for org_name in org_names: org_title = model.Group.by_name(org_name).title lga_org = lga_orgs_by_dgu_org_name.get(org_name) # Iterate over the schemas if options.schema: schema = all_schemas_by_dgu_name[options.schema] if options.incentive_only and not schema.lga_name: # not an incentive schema, so no results schemas = [] elif options.incentive_only: schemas = [all_schemas_by_lga_name[submission.theme] for submission in submissions if submission.laname == lga_org and submission.theme == schema.lga_name and submission.acceptancestatus == 'Accepted'] else: schemas = [all_schemas_by_lga_name.get( options.schema, schema)] elif options.incentive_only: schemas = [all_schemas_by_lga_name[submission.theme] for submission in submissions if submission.laname == lga_org and submission.acceptancestatus == 'Accepted'] else: schemas = all_schemas #print '%s schemas' % len(schemas) for schema in schemas: # Find the relevant incentive submission if lga_org: for submission in submissions: if submission.laname == lga_org and \ submission.theme == schema.lga_name: break else: submission = None else: submission = None result = dict( org_name=org_name, org_title=org_title, org_name_lga=submission.laname if submission else '', schema_dgu_title=schema.dgu_schema_name, schema_lga=schema.lga_name, lga_application_number=submission.applicationnumber if submission else '', lga_application_acceptance_status=submission.acceptancestatus if submission else '', dataset_names=[], dataset_titles=[], dataset_schema_applied=[], ) stat_id = '%s %s' % (org_name, schema.lga_name) if submission: stat_id += ' %s' % submission.applicationnumber def add_datasets_to_results(datasets, result): for dataset in datasets: if dataset['name'] not in result['dataset_names']: result['dataset_names'].append(dataset['name']) result['dataset_titles'].append(dataset['title']) schema_applied = True if schema.dgu_schema_name in \ [s['title'] for s in dataset.get('schema', [])] \ else False result['dataset_schema_applied'].append(schema_applied) if not schema_applied and options.write: pkg = model.Package.get(dataset['name']) schema_obj = Schema.by_title(schema.dgu_schema_name) assert schema_obj, schema.dgu_schema_name try: schema_ids = json.loads(pkg.extras.get('schema') or '[]') except ValueError: log.error('Not valid JSON in schema field: %s %r', dataset['name'], pkg.extras.get('schema')) schema_ids = [] schema_ids.append(schema_obj.id) pkg.extras['schema'] = json.dumps(schema_ids) # Already a schema? data_dict = {'fq': 'publisher:%s ' % org_name + 'schema_multi:"%s"' % schema.dgu_schema_name} datasets = toolkit.get_action('package_search')(data_dict=data_dict) if datasets['count'] > 0: add_datasets_to_results(datasets['results'], result) stats.add('OK - Dataset with schema', stat_id + ' %s' % ';'.join(result['dataset_names'])) found_schema = True else: found_schema = False # Submission specifies DGU dataset if submission and submission.dguurl: match = re.match('http://data.gov.uk/dataset/(.*)', submission.dguurl) if match: dataset_name = dataset_name_original = match.groups()[0] # some have trailing / dataset_name = dataset_name.strip('/') # hampshire have a hash appended if '#' in dataset_name: dataset_name = dataset_name.split('#')[0] # poole have a resource name appended if '/resource' in dataset_name: dataset_name = dataset_name.split('/resource')[0] # manual corrections if dataset_name in dataset_name_corrections: dataset_name = dataset_name_corrections[dataset_name] dataset = model.Package.by_name(dataset_name) # salford ones added a '1' if not dataset: dataset = model.Package.by_name(dataset_name + '1') if dataset: dataset_name += '1' if dataset and dataset.state == 'active': dataset_dict = toolkit.get_action('package_show')(data_dict={'id': dataset.id}) add_datasets_to_results([dataset_dict], result) if dataset_name != dataset_name_original: stats_incentive.add('OK - DGU Dataset listed and with corrections it checks out', stat_id + ' %s' % dataset_name) else: stats_incentive.add('OK - DGU Dataset listed and it checks out', stat_id + ' %s' % dataset_name) elif dataset: stats_incentive.add('ERROR - DGU Dataset listed BUT it is deleted!', '%s %s' % (stat_id, submission.dguurl)) else: stats_incentive.add('ERROR - DGU Dataset listed BUT it is not found', '%s %s' % (stat_id, submission.dguurl)) else: stats_incentive.add('ERROR - DGU Dataset listed BUT the URL is not the correct format', '%s %s' % (stat_id, submission.dguurl)) # Submission mentions dataset on LA site - maybe it is in DGU already? elif submission and submission.dataseturl: datasets = model.Session.query(model.Package) \ .join(model.ResourceGroup) \ .join(model.Resource) \ .filter(model.Resource.url==submission.dataseturl) \ .filter(model.Package.state=='active') \ .filter(model.Resource.state=='active') \ .all() dataset_dicts = [ toolkit.get_action('package_show')(data_dict={'id': dataset.id}) for dataset in datasets] add_datasets_to_results(dataset_dicts, result) if len(datasets) > 1: stats_incentive.add('No DGU Dataset, but Dataset URL matches multiple DGU datasets', '%s %s' % (stat_id, datasets[0].name)) elif len(datasets) == 0: stats_incentive.add('No DGU Dataset and Dataset URL not found on DGU', stat_id) else: stats_incentive.add('No DGU Dataset, but Dataset URL matches DGU dataset', '%s %s' % (stat_id, datasets[0].name)) # Search for datasets in the catalogue datasets = cls.find_dataset_for_schema(schema=schema, org_name=org_name) if datasets is None: if not found_schema: stats.add('Search revealed none', stat_id) elif len(datasets) > 1: add_datasets_to_results(datasets, result) if not found_schema: stats.add('Found datasets (multiple) in search', '%s %r' % (stat_id, [d['name'] for d in datasets])) elif datasets: add_datasets_to_results(datasets, result) if not found_schema: stats.add('Found dataset in search', '%s %s' % (stat_id, datasets[0]['name'])) else: if not found_schema: stats.add('No dataset for submission', stat_id) results.append(result) rows_with_datasets_count = \ len([result for result in results if any(result['dataset_schema_applied'])]) rows_with_datasets_or_candidate_datasets_count = \ len([result for result in results if result['dataset_schema_applied']]) if options.print_: print '\n Incentive stats\n' + stats_incentive.report() print '\n Overall stats\n' + stats.report() if options.write: print 'Writing' model.Session.commit() return {'table': results, 'rows_with_datasets_count': rows_with_datasets_count, 'rows_with_datasets_or_candidate_datasets_count': rows_with_datasets_or_candidate_datasets_count}