def schema_json(self, dataset_type): try: geno = get_geno(dataset_type) except RecombinantException: abort(404, _('Recombinant dataset_type not found')) schema = OrderedDict() for k in ['dataset_type', 'title', 'notes']: if k in geno: schema[k] = geno[k] schema['resources'] = [] for chromo in geno['resources']: resource = OrderedDict() schema['resources'].append(resource) choice_fields = dict( (f['datastore_id'], f['choices']) for f in recombinant_choice_fields( chromo['resource_name'], all_languages=True)) for k in ['title', 'resource_name']: if k in chromo: resource[k] = chromo[k] resource['fields'] = [] for field in chromo['fields']: if not field.get('visible_to_public', True): continue fld = OrderedDict() resource['fields'].append(fld) fld['id'] = field['datastore_id'] for k in ['label', 'description', 'obligation', 'format_type']: if k in field: fld[k] = field[k] if fld['id'] in choice_fields: choices = OrderedDict() fld['choices'] = choices for ck, cv in choice_fields[fld['id']]: choices[ck] = cv resource['primary_key'] = chromo['datastore_primary_key'] if 'examples' in chromo: ex_record = chromo['examples']['record'] example = OrderedDict() for field in chromo['fields']: if field['datastore_id'] in ex_record: example[field['datastore_id']] = ex_record[ field['datastore_id']] resource['example_record'] = example response.headers['Content-Type'] = 'application/json' response.headers['Content-Disposition'] = ( 'inline; filename="{0}.json"'.format( dataset_type)) return json.dumps(schema, indent=2, ensure_ascii=False).encode('utf-8')
def excel_data_dictionary(geno): """ return an openpyxl.Workbook object containing the field reference from geno, one sheet per language """ book = openpyxl.Workbook() sheet = book.active style1 = { 'PatternFill': { 'patternType': 'solid', 'fgColor': 'FFFFF056'}, 'Font': { 'bold': True}} style2 = { 'PatternFill': { 'patternType': 'solid', 'fgColor': 'FFDFE2DB'}} from pylons import config from ckan.lib.i18n import handle_request, get_lang from ckan.common import c, request for lang in config['ckan.locales_offered'].split(): if sheet is None: sheet = book.create_sheet() sheet.title = lang.upper() # switch language (FIXME: this is harder than it should be) request.environ['CKAN_LANG'] = lang handle_request(request, c) choice_fields = dict( (f['datastore_id'], f['choices']) for chromo in geno['resources'] for f in recombinant_choice_fields(chromo['resource_name'])) refs = [] for chromo in geno['resources']: for field in chromo['fields']: _append_field_ref_rows(refs, field, link=None) if field['datastore_id'] in choice_fields: _append_field_choices_rows( refs, choice_fields[field['datastore_id']], full_text_choices=False) _populate_reference_sheet(sheet, geno, refs) sheet = None return book
def _populate_excel_sheet(sheet, chromo, org, refs): """ Format openpyxl sheet for the resource definition chromo and org. refs - list of rows to add to reference sheet, modified in place from this function returns field information for reference sheet """ boolean_validator = openpyxl.worksheet.datavalidation.DataValidation( type="list", formula1='"FALSE,TRUE"', allow_blank=True) sheet.add_data_validation(boolean_validator) sheet.title = chromo['resource_name'] def fill_cell(row, column, value, styles): c = sheet.cell(row=row, column=column) c.value = value apply_styles(styles, c) org_style = chromo['excel_organization_style'] fill_cell(1, 1, org['name'], org_style) fill_cell(1, 2, org['title'], org_style) apply_styles(org_style, sheet.row_dimensions[1]) header_style = chromo['excel_header_style'] choice_fields = dict( (f['datastore_id'], f['choices']) for f in recombinant_choice_fields(chromo['resource_name'])) for n, field in enumerate(chromo['fields'], 1): fill_cell(2, n, _(field['label']), header_style) fill_cell(3, n, field['datastore_id'], header_style) # jumping through openpyxl hoops: col_letter = openpyxl.cell.get_column_letter(n) col = sheet.column_dimensions[col_letter] col.width = field['excel_column_width'] # FIXME: format only below header col.number_format = datastore_type[field['datastore_type']].xl_format validation_range = '{0}4:{0}1004'.format(col_letter) if field['datastore_type'] == 'boolean': boolean_validator.ranges.append(validation_range) if field['datastore_id'] in choice_fields: refs.append([_(field['label'])]) ref1 = len(refs) + 2 for key, value in choice_fields[field['datastore_id']]: refs.append([None, key, value]) refN = len(refs) + 1 refs.append([]) choice_range = 'reference!$B${0}:$B${1}'.format(ref1, refN) v = openpyxl.worksheet.datavalidation.DataValidation( type="list", formula1=choice_range, allow_blank=True) v.errorTitle = u'Invalid choice' v.error = (u'Please enter one of the valid keys shown on ' 'sheet "reference" rows {0}-{1}'.format(ref1, refN)) sheet.add_data_validation(v) v.ranges.append(validation_range) # hilight header if bad values pasted below sheet.conditional_formatting.add("{0}2".format(col_letter), openpyxl.formatting.FormulaRule([( 'COUNTIF({0},"<>"&"")' # all non-blank cells '-SUMPRODUCT(COUNTIF({0},{1}))' .format(validation_range, choice_range))], stopIfTrue=True, fill=red_fill)) apply_styles(header_style, sheet.row_dimensions[2]) apply_styles(header_style, sheet.row_dimensions[3]) sheet.row_dimensions[3].hidden = True sheet.freeze_panes = sheet['A4']
def _update_records(records, org_detail, conn, resource_name): """ Update records on solr core :param records: record dicts :ptype records: sequence of record dicts :param org_detail: org structure as returned via local CKAN :ptype org_detail: dict with local CKAN org structure :param conn: solr connection :ptype conn: obj :param resource_name: type being updated """ chromo = get_chromo(resource_name) pk = chromo.get("datastore_primary_key", []) if not isinstance(pk, list): pk = [pk] org = org_detail["name"] orghash = hashlib.md5(org).hexdigest() def unique_id(r): "return hash, friendly id" s = orghash f = org if not pk: s = hashlib.md5(s + recombinant_type + "-%d" % r["_id"]).hexdigest() f += u"|" + unicode(r["_id"]) for k in pk: s = hashlib.md5(s + r[k].encode("utf-8")).hexdigest() f += u"|" + unicode(r[k]) return s, f out = [] choice_fields = dict( (f["datastore_id"], dict(f["choices"])) for f in recombinant_choice_fields(resource_name, all_languages=True) ) for r in records: unique, friendly = unique_id(r) shortform = None shortform_fr = None for e in org_detail["extras"]: if e["key"] == "shortform": shortform = e["value"] elif e["key"] == "shortform_fr": shortform_fr = e["value"] solrrec = { "id": unique, "unique_id": friendly, "org_name_code": org_detail["name"], "org_name_en": org_detail["title"].split(" | ", 1)[0], "org_name_fr": org_detail["title"].split(" | ", 1)[-1], } for f in chromo["fields"]: key = f["datastore_id"] value = r[key] facet_range = f.get("solr_float_range_facet") if facet_range: try: float_value = float(value) except ValueError: pass else: for i, fac in enumerate(facet_range): if "less_than" not in fac or float_value < fac["less_than"]: solrrec[key + "_range"] = str(i) solrrec[key + "_range_en"] = fac["label"].split(" | ")[0] solrrec[key + "_range_fr"] = fac["label"].split(" | ")[-1] break if f.get("datastore_type") == "date": try: value = date2zulu(value) # CM: If this only applies to PD types this should be accurate # CM: This should only apply if valid (as per date2zulu) else NULL if f.get("extract_date_year"): solrrec["date_year"] = value.split("-", 1)[0] if f.get("extract_date_month"): solrrec["date_month"] = value.split("-")[1] except ValueError: pass solrrec[key] = value choices = choice_fields.get(f["datastore_id"]) if not choices: continue if key.endswith("_code"): key = key[:-5] solrrec[key + "_en"] = recombinant_language_text(choices.get(value, ""), "en") solrrec[key + "_fr"] = recombinant_language_text(choices.get(value, ""), "fr") solrrec["text"] = u" ".join(unicode(v) for v in solrrec.values()) out.append(solrrec) conn.add_many(out, _commit=True)
def _populate_excel_sheet(sheet, geno, chromo, org, refs, resource_num): """ Format openpyxl sheet for the resource definition chromo and org. (Version 3) refs - list of rows to add to reference sheet, modified in place from this function resource_num - 1-based index of resource returns cranges dict of {datastore_id: reference_key_range} """ sheet.title = chromo['resource_name'] edge_style = dict(DEFAULT_EDGE_STYLE, **geno.get('excel_edge_style', {})) required_style = dict(edge_style, **geno.get('excel_required_style', {})) header_style = dict(DEFAULT_HEADER_STYLE, **geno.get('excel_header_style', {})) cheadings_style = dict(DEFAULT_CHEADING_STYLE, **geno.get('excel_column_heading_style', {})) example_style = dict(DEFAULT_EXAMPLE_STYLE, **geno.get('excel_example_style', {})) error_style = dict(DEFAULT_ERROR_STYLE, **geno.get('excel_error_style', {})) cranges = {} data_num_rows = chromo.get('excel_data_num_rows', DEFAULT_DATA_NUM_ROWS) # create rows so we can set all heights for i in xrange(1, DATA_FIRST_ROW + data_num_rows): sheet.cell(row=i, column=1).value = None sheet.merge_cells(EXAMPLE_MERGE) fill_cell(sheet, EXAMPLE_ROW, 1, _('e.g.'), example_style) fill_cell( sheet, DATA_FIRST_ROW, RPAD_COL_NUM, u'=IF(r{rnum}!{col}{row},"","▶")'.format( rnum=resource_num, col=RPAD_COL, row=DATA_FIRST_ROW), TYPE_HERE_STYLE) fill_cell( sheet, HEADER_ROW, DATA_FIRST_COL_NUM, recombinant_language_text(chromo['title']) + u' \N{em dash} ' + org_title_lang_hack(org['title']), header_style) sheet.cell(row=CODE_ROW, column=1).value = 'v3' # template version # allow only upload to this org sheet.cell(row=CODE_ROW, column=2).value = org['name'] cheadings_dimensions = sheet.row_dimensions[CHEADINGS_ROW] choice_fields = dict( (f['datastore_id'], f['choices']) for f in recombinant_choice_fields(chromo['resource_name'])) for col_num, field in template_cols_fields(chromo): field_heading = recombinant_language_text( field.get('excel_heading', field['label'])).strip() cheadings_dimensions.height = max( cheadings_dimensions.height, field_heading.count('\n') * LINE_HEIGHT + CHEADINGS_HEIGHT) col_heading_style = cheadings_style if 'excel_column_heading_style' in field: # use geno column heading style as base, just override keys col_heading_style = dict( cheadings_style, **field['excel_column_heading_style']) apply_styles(col_heading_style, sheet.cell( row=HEADER_ROW, column=col_num)) apply_styles(col_heading_style, sheet.cell( row=CSTATUS_ROW, column=col_num)) fill_cell( sheet, CHEADINGS_ROW, col_num, field_heading, col_heading_style) reference_row1 = len(refs) + REF_FIRST_ROW # match against db columns sheet.cell(row=CODE_ROW, column=col_num).value = field['datastore_id'] example = chromo['examples']['record'].get(field['datastore_id'], '') fill_cell( sheet, EXAMPLE_ROW, col_num, u','.join(example) if isinstance(example, list) else example, example_style) col_letter = openpyxl.cell.get_column_letter(col_num) # jump to first error/required cell in column fill_cell( sheet, CSTATUS_ROW, col_num, '=IF(e{rnum}!{col}{row}>0,HYPERLINK("#{col}"&e{rnum}!{col}{row},"")' ',IF(r{rnum}!{col}{row}>0,HYPERLINK("#{col}"&r{rnum}!{col}{row},""),""))' .format(rnum=resource_num, col=col_letter, row=CSTATUS_ROW), col_heading_style) col = sheet.column_dimensions[col_letter] if 'excel_column_width' in field: col.width = field['excel_column_width'] else: col.width = max(estimate_width(field_heading), CHEADINGS_MIN_WIDTH) validation_range = '{col}{row1}:{col}{rowN}'.format( col=col_letter, row1=DATA_FIRST_ROW, rowN=DATA_FIRST_ROW + data_num_rows - 1) xl_format = datastore_type[field['datastore_type']].xl_format alignment = openpyxl.styles.Alignment(wrap_text=True) protection = openpyxl.styles.Protection(locked=False) for (c,) in sheet[validation_range]: c.number_format = xl_format c.alignment = alignment c.protection = protection ex_cell = sheet.cell(row=EXAMPLE_ROW, column=col_num) ex_cell.number_format = xl_format ex_cell.alignment = alignment _append_field_ref_rows(refs, field, "#'{sheet}'!{col}{row}".format( sheet=sheet.title, col=col_letter, row=CHEADINGS_ROW)) if field['datastore_id'] in choice_fields: full_text_choices = ( field['datastore_type'] != '_text' and field.get( 'excel_full_text_choices', False)) ref1 = len(refs) + REF_FIRST_ROW max_choice_width = _append_field_choices_rows( refs, choice_fields[field['datastore_id']], full_text_choices) refN = len(refs) + REF_FIRST_ROW - 2 if full_text_choices: if 'excel_column_width' not in field: col.width = max(col.width, max_choice_width) # expand example for ck, cv in choice_fields[field['datastore_id']]: if ck == example: ex_cell.value = u"{0}: {1}".format(ck, cv) break choice_range = 'reference!${col}${ref1}:${col}${refN}'.format( col=REF_KEY_COL, ref1=ref1, refN=refN) user_choice_range = field.get('excel_choice_range_formula') if user_choice_range: choice_keys = set( key for (_i, key, _i, _i) in string.Formatter().parse(user_choice_range) if key != 'range' and key != 'range_top') choice_values = {} if choice_keys: choice_values = { f['datastore_id']: "{col}{num}".format( col=openpyxl.cell.get_column_letter(cn), num=DATA_FIRST_ROW) for cn, f in template_cols_fields(chromo) if f['datastore_id'] in choice_keys} user_choice_range = user_choice_range.format( range=choice_range, range_top=choice_range.split(':')[0], **choice_values) cranges[field['datastore_id']] = choice_range choices = [c[0] for c in choice_fields[field['datastore_id']]] if field['datastore_type'] != '_text': v = openpyxl.worksheet.datavalidation.DataValidation( type="list", formula1=user_choice_range or choice_range, allow_blank=True) v.errorTitle = u'Invalid choice' valid_keys = u', '.join(unicode(c) for c in choices) if len(valid_keys) < 40: v.error = (u'Please enter one of the valid keys: ' + valid_keys) else: v.error = (u'Please enter one of the valid keys shown on ' 'sheet "reference" rows {0}-{1}'.format(ref1, refN)) sheet.add_data_validation(v) v.ranges.append(validation_range) sheet.cell(row=CHEADINGS_ROW, column=col_num).hyperlink = ( '#reference!{colA}{row1}:{colZ}{rowN}'.format( colA=REF_FIELD_NUM_COL, row1=reference_row1, colZ=REF_VALUE_COL, rowN=len(refs) + REF_FIRST_ROW - 2)) _add_conditional_formatting( sheet, col_letter, resource_num, error_style, required_style, data_num_rows) sheet.row_dimensions[HEADER_ROW].height = HEADER_HEIGHT sheet.row_dimensions[CODE_ROW].hidden = True sheet.row_dimensions[CSTATUS_ROW].height = CSTATUS_HEIGHT sheet.row_dimensions[EXAMPLE_ROW].height = chromo.get( 'excel_example_height', DEFAULT_EXAMPLE_HEIGHT) for i in xrange(DATA_FIRST_ROW, DATA_FIRST_ROW + data_num_rows): sheet.row_dimensions[i].height = chromo.get( 'excel_data_height', DEFAULT_DATA_HEIGHT) # jump to first error/required cell in row sheet.cell(row=i, column=RSTATUS_COL_NUM).value = ( '=IF(e{rnum}!{col}{row}>0,' 'HYPERLINK("#"&ADDRESS({row},e{rnum}!{col}{row}),""),' 'IF(r{rnum}!{col}{row}>0,' 'HYPERLINK("#"&ADDRESS({row},r{rnum}!{col}{row}),""),""))' .format(rnum=resource_num, col=RSTATUS_COL, row=i)) sheet.column_dimensions[RSTATUS_COL].width = RSTATUS_WIDTH sheet.column_dimensions[RPAD_COL].width = RPAD_WIDTH sheet.freeze_panes = sheet[FREEZE_PANES] apply_styles(header_style, sheet.row_dimensions[HEADER_ROW]) apply_styles(cheadings_style, sheet.row_dimensions[CHEADINGS_ROW]) apply_styles(cheadings_style, sheet.row_dimensions[CSTATUS_ROW]) apply_styles(example_style, sheet.row_dimensions[EXAMPLE_ROW]) for (c,) in sheet[EDGE_RANGE]: apply_styles(edge_style, c) # trying to set the active cell (not working yet) select = "{col}{row}".format(col=DATA_FIRST_COL, row=DATA_FIRST_ROW) sheet.sheet_view.selection[0].activeCell = select sheet.sheet_view.selection[0].sqref = select return cranges
def _update_records(records, org_detail, conn, resource_name, unmatched): """ Update records on solr core :param records: record dicts :param org_detail: org structure as returned via local CKAN :param conn: solr connection :param resource_name: type being updated :param unmatched: yet-unmatched values for comparing prev/next year :returns: new unmatched for next call for same org+resource_name """ chromo = get_chromo(resource_name) pk = chromo.get('datastore_primary_key', []) if not isinstance(pk, list): pk = [pk] org = org_detail['name'] orghash = hashlib.md5(org).hexdigest() def unique_id(r): "return hash, friendly id, partial id" s = orghash f = org p = org if not pk: s = hashlib.md5(s + recombinant_type + "-%d" % r['_id']).hexdigest() f += u'|' + unicode(r['_id']) p += u'|' + unicode(r['_id']) for k in pk: s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest() f += u'|' + unicode(r[k]) if u'|' not in p: p += u'|' + unicode(r[k]) return s, f, p out = [] choice_fields = dict( (f['datastore_id'], dict(f['choices'])) for f in recombinant_choice_fields(resource_name, all_languages=True)) if any('solr_compare_previous_year' in f for f in chromo['fields']): if not unmatched: # previous years, next years unmatched = ({}, {}) else: unmatched = None for r in records: unique, friendly, partial = unique_id(r) solrrec = { 'id': unique, 'unique_id': friendly, 'partial_id': partial, 'org_name_code': org_detail['name'], 'org_name_en': org_detail['title'].split(' | ', 1)[0], 'org_name_fr': org_detail['title'].split(' | ', 1)[-1], } org_fields = chromo.get('solr_org_fields') if org_fields: for e in org_detail['extras']: if e['key'] in org_fields: solrrec[e['key']] = e['value'] for f in chromo['fields']: key = f['datastore_id'] value = r[key] facet_range = f.get('solr_dollar_range_facet') if facet_range: try: float_value = float(value) except ValueError: pass else: solrrec.update(dollar_range_facet( key, facet_range, float_value)) sum_to = list_or_none(f.get('solr_sum_to_field')) if sum_to: for fname in sum_to: sum_to_field(solrrec, fname, value) if f.get('datastore_type') == 'date': try: value = date2zulu(value) # CM: If this only applies to PD types this should be accurate # CM: This should only apply if valid (as per date2zulu) else NULL if f.get('extract_date_year'): solrrec['date_year'] = value.split('-', 1)[0] if f.get('extract_date_month'): solrrec['date_month'] = value.split('-')[1] if f.get('extract_date_clean'): solrrec['date_clean'] = value except ValueError: pass elif f.get('extract_date_year'): if f.get('datastore_type') == 'year': solrrec['date_year'] = value else: try: solrrec['date_year'] = int(value.split('-', 1)[0]) except ValueError: pass if f.get('extract_double_sortable'): try: solrrec['doubl_' + key] = float(value) except ValueError: pass solrrec[key] = value choices = choice_fields.get(f['datastore_id']) if choices: if key.endswith('_code'): key = key[:-5] choice = choices.get(value, {}) _add_choice(solrrec, key, r, choice, f) solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values()) if 'solr_static_fields' in chromo: solrrec.update(chromo['solr_static_fields']) if unmatched: match_compare_output(solrrec, out, unmatched, chromo) else: out.append(solrrec) import pysolr for a in reversed(range(10)): try: if out: conn.add(out, commit=False) break except pysolr.SolrError: if not a: raise print "waiting..." import time time.sleep((10-a) * 5) print "retrying..." return unmatched
def _update_records(records, org_detail, conn, resource_name, unmatched): """ Update records on solr core :param records: record dicts :param org_detail: org structure as returned via local CKAN :param conn: solr connection :param resource_name: type being updated :param unmatched: yet-unmatched values for comparing prev/next year :returns: new unmatched for next call for same org+resource_name """ chromo = get_chromo(resource_name) pk = chromo.get('datastore_primary_key', []) if not isinstance(pk, list): pk = [pk] org = org_detail['name'] orghash = hashlib.md5(org).hexdigest() def unique_id(r): "return hash, friendly id" s = orghash f = org if not pk: s = hashlib.md5(s + recombinant_type + "-%d" % r['_id']).hexdigest() f += u'|' + unicode(r['_id']) for k in pk: s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest() f += u'|' + unicode(r[k]) return s, f out = [] choice_fields = dict( (f['datastore_id'], dict(f['choices'])) for f in recombinant_choice_fields(resource_name, all_languages=True)) if any('solr_compare_previous_year' in f for f in chromo['fields']): if not unmatched: # previous years, next years unmatched = ({}, {}) else: unmatched = None for r in records: unique, friendly = unique_id(r) shortform = org_detail['shortform'] shortform_fr = org_detail['shortform_fr'] solrrec = { 'id': unique, 'unique_id': friendly, 'org_name_code': org_detail['name'], 'org_name_en': org_detail['title_tranlated']['en'], 'org_name_fr': org_detail['title_tranlsated']['fr'], } for f in chromo['fields']: key = f['datastore_id'] value = r[key] facet_range = f.get('solr_dollar_range_facet') if facet_range: try: float_value = float(value) except ValueError: pass else: solrrec.update( dollar_range_facet(key, facet_range, float_value)) sum_to = list_or_none(f.get('solr_sum_to_field')) if sum_to: for fname in sum_to: sum_to_field(solrrec, fname, value) if f.get('datastore_type') == 'date': try: value = date2zulu(value) # CM: If this only applies to PD types this should be accurate # CM: This should only apply if valid (as per date2zulu) else NULL if f.get('extract_date_year'): solrrec['date_year'] = value.split('-', 1)[0] if f.get('extract_date_month'): solrrec['date_month'] = value.split('-')[1] except ValueError: pass elif f.get('datastore_type') == 'year': if f.get('extract_date_year'): solrrec['date_year'] = value solrrec[key] = value choices = choice_fields.get(f['datastore_id']) if choices: if key.endswith('_code'): key = key[:-5] solrrec[key + '_en'] = recombinant_language_text( choices.get(value, ''), 'en') solrrec[key + '_fr'] = recombinant_language_text( choices.get(value, ''), 'fr') solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values()) if 'solr_static_fields' in chromo: solrrec.update(chromo['solr_static_fields']) if unmatched: match_compare_output(solrrec, out, unmatched, chromo) else: out.append(solrrec) if out: conn.add_many(out, _commit=True) return unmatched
def _update_records(records, org_detail, conn, resource_name, unmatched): """ Update records on solr core :param records: record dicts :param org_detail: org structure as returned via local CKAN :param conn: solr connection :param resource_name: type being updated :param unmatched: yet-unmatched values for comparing prev/next year :returns: new unmatched for next call for same org+resource_name """ chromo = get_chromo(resource_name) pk = chromo.get('datastore_primary_key', []) if not isinstance(pk, list): pk = [pk] org = org_detail['name'] orghash = hashlib.md5(org).hexdigest() def unique_id(r): "return hash, friendly id" s = orghash f = org if not pk: s = hashlib.md5(s + recombinant_type + "-%d" % r['_id']).hexdigest() f += u'|' + unicode(r['_id']) for k in pk: s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest() f += u'|' + unicode(r[k]) return s, f out = [] choice_fields = dict( (f['datastore_id'], dict(f['choices'])) for f in recombinant_choice_fields(resource_name, all_languages=True)) if any('solr_compare_previous_year' in f for f in chromo['fields']): if not unmatched: # previous years, next years unmatched = ({}, {}) else: unmatched = None for r in records: unique, friendly = unique_id(r) shortform = None shortform_fr = None for e in org_detail['extras']: if e['key'] == 'shortform': shortform = e['value'] elif e['key'] == 'shortform_fr': shortform_fr = e['value'] solrrec = { 'id': unique, 'unique_id': friendly, 'org_name_code': org_detail['name'], 'org_name_en': org_detail['title'].split(' | ', 1)[0], 'org_name_fr': org_detail['title'].split(' | ', 1)[-1], } for f in chromo['fields']: key = f['datastore_id'] value = r[key] facet_range = f.get('solr_dollar_range_facet') if facet_range: try: float_value = float(value) except ValueError: pass else: solrrec.update(dollar_range_facet( key, facet_range, float_value)) sum_to = list_or_none(f.get('solr_sum_to_field')) if sum_to: for fname in sum_to: sum_to_field(solrrec, fname, value) if f.get('datastore_type') == 'date': try: value = date2zulu(value) # CM: If this only applies to PD types this should be accurate # CM: This should only apply if valid (as per date2zulu) else NULL if f.get('extract_date_year'): solrrec['date_year'] = value.split('-', 1)[0] if f.get('extract_date_month'): solrrec['date_month'] = value.split('-')[1] except ValueError: pass solrrec[key] = value choices = choice_fields.get(f['datastore_id']) if choices: if key.endswith('_code'): key = key[:-5] solrrec[key + '_en'] = recombinant_language_text( choices.get(value, ''), 'en') solrrec[key + '_fr'] = recombinant_language_text( choices.get(value, ''), 'fr') solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values()) if unmatched: match_compare_output(solrrec, out, unmatched, chromo) else: out.append(solrrec) if out: conn.add_many(out, _commit=True) return unmatched
def _update_records(records, org_detail, conn, resource_name, unmatched): """ Update records on solr core :param records: record dicts :param org_detail: org structure as returned via local CKAN :param conn: solr connection :param resource_name: type being updated :param unmatched: yet-unmatched values for comparing prev/next year :returns: new unmatched for next call for same org+resource_name """ chromo = get_chromo(resource_name) pk = chromo.get('datastore_primary_key', []) if not isinstance(pk, list): pk = [pk] org = org_detail['name'] orghash = hashlib.md5(org).hexdigest() def unique_id(r): "return hash, friendly id, partial id" s = orghash f = org p = org for k in pk: s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest() f += u'|' + unicode(r[k]) if u'|' not in p: p += u'|' + unicode(r[k]) return s, f, p out = [] choice_fields = dict( (f['datastore_id'], dict(f['choices'])) for f in recombinant_choice_fields(resource_name, all_languages=True)) if any('solr_compare_previous_year' in f for f in chromo['fields']): if not unmatched: # previous years, next years unmatched = ({}, {}) else: unmatched = None for r in records: unique, friendly, partial = unique_id(r) solrrec = { 'id': unique, 'unique_id': friendly, 'partial_id': partial, 'org_name_code': org_detail['name'], 'org_name_en': org_detail['title'].split(' | ', 1)[0], 'org_name_fr': org_detail['title'].split(' | ', 1)[-1], } org_fields = chromo.get('solr_org_fields') if org_fields: for e in org_detail['extras']: if e['key'] in org_fields: solrrec[e['key']] = e['value'] for f in chromo['fields']: key = f['datastore_id'] value = r.get(key, '') facet_range = f.get('solr_dollar_range_facet') if facet_range: try: float_value = float( value.replace('$', '').replace(',', '')) except ValueError: pass else: solrrec.update( dollar_range_facet(key, facet_range, float_value)) sum_to = list_or_none(f.get('solr_sum_to_field')) if sum_to: for fname in sum_to: sum_to_field(solrrec, fname, value) if f.get('datastore_type') == 'date': try: value = date2zulu(value) # CM: If this only applies to PD types this should be accurate # CM: This should only apply if valid (as per date2zulu) else NULL if f.get('extract_date_year'): solrrec['date_year'] = value.split('-', 1)[0] if f.get('extract_date_month'): solrrec['date_month'] = value.split('-')[1] if f.get('extract_date_clean'): solrrec['date_clean'] = value except ValueError: pass elif f.get('extract_date_year'): if f.get('datastore_type') == 'year': solrrec['date_year'] = value else: try: solrrec['date_year'] = int(value.split('-', 1)[0]) except ValueError: pass if f.get('extract_double_sortable'): try: solrrec['doubl_' + key] = float(value) except ValueError: pass solrrec[key] = value choices = choice_fields.get(f['datastore_id']) if choices: if key.endswith('_code'): key = key[:-5] if f.get('datastore_type') == '_text': solrrec[key + '_en'] = '; '.join( recombinant_language_text(choices[v], 'en') for v in value.split(',') if v in choices) solrrec[key + '_fr'] = '; '.join( recombinant_language_text(choices[v], 'fr') for v in value.split(',') if v in choices) else: choice = choices.get(value, {}) _add_choice(solrrec, key, r, choice, f) solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values()) if 'solr_static_fields' in chromo: solrrec.update(chromo['solr_static_fields']) ssrf = chromo.get('solr_sum_range_facet') if ssrf: key = ssrf['sum_field'] float_value = float(solrrec[key]) solrrec.update( numeric_range_facet(key, ssrf['facet_values'], float_value)) if unmatched: match_compare_output(solrrec, out, unmatched, chromo) else: out.append(solrrec) if unmatched: out.extend(unmatched[1].values()) import pysolr for a in reversed(range(10)): try: if out: conn.add(out, commit=False) break except pysolr.SolrError: if not a: raise print "waiting..." import time time.sleep((10 - a) * 5) print "retrying..." return unmatched
def _populate_excel_sheet(sheet, chromo, org, refs): """ Format openpyxl sheet for the resource definition chromo and org. refs - list of rows to add to reference sheet, modified in place from this function returns field information for reference sheet """ boolean_validator = openpyxl.worksheet.datavalidation.DataValidation( type="list", formula1='"FALSE,TRUE"', allow_blank=True) sheet.add_data_validation(boolean_validator) sheet.title = chromo['resource_name'] def fill_cell(row, column, value, styles): c = sheet.cell(row=row, column=column) c.value = value apply_styles(styles, c) org_style = chromo['excel_organization_style'] fill_cell(1, 1, org['name'], org_style) fill_cell(1, 2, org['title'], org_style) apply_styles(org_style, sheet.row_dimensions[1]) header_style = chromo['excel_header_style'] choice_fields = dict( (f['datastore_id'], f['choices']) for f in recombinant_choice_fields(chromo['resource_name'])) for n, field in enumerate(chromo['fields'], 1): fill_cell(2, n, _(field['label']), header_style) fill_cell(3, n, field['datastore_id'], header_style) # jumping through openpyxl hoops: col_letter = openpyxl.cell.get_column_letter(n) col = sheet.column_dimensions[col_letter] col.width = field['excel_column_width'] # FIXME: format only below header col.number_format = datastore_type[field['datastore_type']].xl_format validation_range = '{0}4:{0}1004'.format(col_letter) if field['datastore_type'] == 'boolean': boolean_validator.ranges.append(validation_range) if field['datastore_id'] in choice_fields: refs.append([_(field['label'])]) ref1 = len(refs) + 2 for key, value in choice_fields[field['datastore_id']]: refs.append([None, key, value]) refN = len(refs) + 1 refs.append([]) choice_range = 'reference!$B${0}:$B${1}'.format(ref1, refN) v = openpyxl.worksheet.datavalidation.DataValidation( type="list", formula1=choice_range, allow_blank=True) v.errorTitle = u'Invalid choice' v.error = (u'Please enter one of the valid keys shown on ' 'sheet "reference" rows {0}-{1}'.format(ref1, refN)) sheet.add_data_validation(v) v.ranges.append(validation_range) # hilight header if bad values pasted below sheet.conditional_formatting.add( "{0}2".format(col_letter), openpyxl.formatting.FormulaRule( [( 'COUNTIF({0},"<>"&"")' # all non-blank cells '-SUMPRODUCT(COUNTIF({0},{1}))'.format( validation_range, choice_range))], stopIfTrue=True, fill=red_fill)) apply_styles(header_style, sheet.row_dimensions[2]) apply_styles(header_style, sheet.row_dimensions[3]) sheet.row_dimensions[3].hidden = True sheet.freeze_panes = sheet['A4']