def _add_choice(solrrec, key, record, choice, field): """ add the english+french values for choice to solrrec """ solrrec[key + '_en'] = recombinant_language_text(choice, 'en') solrrec[key + '_fr'] = recombinant_language_text(choice, 'fr') # lookups used for choices that expand to multiple values if 'lookup' in choice: lookup = choice['lookup'] elif 'conditional_lookup' in choice: for conditional in choice['conditional_lookup']: if 'column' in conditional: column = record[conditional['column']] if not column < conditional['less_than']: continue lookup = conditional['lookup'] break else: return solrrec['multi_' + key + '_en'] = [ recombinant_language_text(field['choices_lookup'][cl], 'en') for cl in lookup] solrrec['multi_' + key + '_fr'] = [ recombinant_language_text(field['choices_lookup'][cl], 'fr') for cl in lookup]
def _add_choice(solrrec, key, record, choice, field): """ add the english+french values for choice to solrrec """ solrrec[key + '_en'] = recombinant_language_text(choice, 'en') solrrec[key + '_fr'] = recombinant_language_text(choice, 'fr') # lookups used for choices that expand to multiple values if 'lookup' in choice: lookup = choice['lookup'] elif 'conditional_lookup' in choice: for conditional in choice['conditional_lookup']: if 'column' in conditional: column = record[conditional['column']] if not column < conditional['less_than']: continue lookup = conditional['lookup'] break else: return solrrec['multi_' + key + '_en'] = [ recombinant_language_text(field['choices_lookup'][cl], 'en') for cl in lookup ] solrrec['multi_' + key + '_fr'] = [ recombinant_language_text(field['choices_lookup'][cl], 'fr') for cl in lookup ]
def _append_field_ref_rows(refs, field, link): refs.append((None, [])) label = recombinant_language_text(field['label']) refs.append(('title', [(link, label) if link else label])) refs.append(('attr', [ _('ID'), field['datastore_id']])) if 'description' in field: refs.append(('attr', [ _('Description'), recombinant_language_text(field['description'])])) if 'obligation' in field: refs.append(('attr', [ _('Obligation'), recombinant_language_text(field['obligation'])])) if 'format_type' in field: refs.append(('attr', [ _('Format'), recombinant_language_text(field['format_type'])]))
def _update_records(records, org_detail, conn, resource_name): """ Update records on solr core :param records: record dicts :ptype records: sequence of record dicts :param org_detail: org structure as returned via local CKAN :ptype org_detail: dict with local CKAN org structure :param conn: solr connection :ptype conn: obj :param resource_name: type being updated """ chromo = get_chromo(resource_name) pk = chromo.get("datastore_primary_key", []) if not isinstance(pk, list): pk = [pk] org = org_detail["name"] orghash = hashlib.md5(org).hexdigest() def unique_id(r): "return hash, friendly id" s = orghash f = org if not pk: s = hashlib.md5(s + recombinant_type + "-%d" % r["_id"]).hexdigest() f += u"|" + unicode(r["_id"]) for k in pk: s = hashlib.md5(s + r[k].encode("utf-8")).hexdigest() f += u"|" + unicode(r[k]) return s, f out = [] choice_fields = dict( (f["datastore_id"], dict(f["choices"])) for f in recombinant_choice_fields(resource_name, all_languages=True) ) for r in records: unique, friendly = unique_id(r) shortform = None shortform_fr = None for e in org_detail["extras"]: if e["key"] == "shortform": shortform = e["value"] elif e["key"] == "shortform_fr": shortform_fr = e["value"] solrrec = { "id": unique, "unique_id": friendly, "org_name_code": org_detail["name"], "org_name_en": org_detail["title"].split(" | ", 1)[0], "org_name_fr": org_detail["title"].split(" | ", 1)[-1], } for f in chromo["fields"]: key = f["datastore_id"] value = r[key] facet_range = f.get("solr_float_range_facet") if facet_range: try: float_value = float(value) except ValueError: pass else: for i, fac in enumerate(facet_range): if "less_than" not in fac or float_value < fac["less_than"]: solrrec[key + "_range"] = str(i) solrrec[key + "_range_en"] = fac["label"].split(" | ")[0] solrrec[key + "_range_fr"] = fac["label"].split(" | ")[-1] break if f.get("datastore_type") == "date": try: value = date2zulu(value) # CM: If this only applies to PD types this should be accurate # CM: This should only apply if valid (as per date2zulu) else NULL if f.get("extract_date_year"): solrrec["date_year"] = value.split("-", 1)[0] if f.get("extract_date_month"): solrrec["date_month"] = value.split("-")[1] except ValueError: pass solrrec[key] = value choices = choice_fields.get(f["datastore_id"]) if not choices: continue if key.endswith("_code"): key = key[:-5] solrrec[key + "_en"] = recombinant_language_text(choices.get(value, ""), "en") solrrec[key + "_fr"] = recombinant_language_text(choices.get(value, ""), "fr") solrrec["text"] = u" ".join(unicode(v) for v in solrrec.values()) out.append(solrrec) conn.add_many(out, _commit=True)
def _populate_reference_sheet(sheet, geno, refs): field_count = 1 edge_style = dict(DEFAULT_EDGE_STYLE, **geno.get('excel_edge_style', {})) header1_style = dict(DEFAULT_HEADER_STYLE, **geno.get('excel_header_style', {})) header2_style = dict(DEFAULT_REF_HEADER2_STYLE, **geno.get('excel_header_style', {})) choice_style = dict(DEFAULT_EXAMPLE_STYLE, **geno.get('excel_example_style', {})) fill_cell( sheet, REF_HEADER1_ROW, REF_KEY_COL_NUM, recombinant_language_text(geno['title']), header1_style) apply_styles(header1_style, sheet.row_dimensions[REF_HEADER1_ROW]) fill_cell( sheet, REF_HEADER2_ROW, REF_KEY_COL_NUM, _('Reference'), header2_style) apply_styles(header2_style, sheet.row_dimensions[REF_HEADER2_ROW]) for (c,) in sheet[REF_EDGE_RANGE]: apply_styles(edge_style, c) sheet.row_dimensions[REF_HEADER1_ROW].height = REF_HEADER1_HEIGHT sheet.row_dimensions[REF_HEADER2_ROW].height = REF_HEADER2_HEIGHT for row_number, (style, ref_line) in enumerate(refs, REF_FIRST_ROW - 1): link = None if len(ref_line) == 2: value = wrap_text_to_width(ref_line[1], REF_VALUE_WIDTH).strip() ref_line = [ref_line[0], value] elif len(ref_line) == 1 and isinstance(ref_line[0], tuple): link, value = ref_line[0] value = value.strip() ref_line = [value] for cnum, cval in enumerate(ref_line, REF_KEY_COL_NUM): sheet.cell(row=row_number, column=cnum).value = ( cval.strip().replace('\n', '\r\n')) if len(ref_line) == 2: sheet.row_dimensions[row_number].height = LINE_HEIGHT + ( value.count('\n') * LINE_HEIGHT) key_cell = sheet.cell(row=row_number, column=REF_KEY_COL_NUM) value_cell = sheet.cell(row=row_number, column=REF_VALUE_COL_NUM) if style == 'title': sheet.merge_cells(REF_FIELD_NUM_MERGE.format(row=row_number)) sheet.merge_cells(REF_FIELD_TITLE_MERGE.format(row=row_number)) fill_cell( sheet, row_number, REF_FIELD_NUM_COL_NUM, field_count, REF_NUMBER_STYLE) title_cell = sheet.cell(row=row_number, column=REF_KEY_COL_NUM) if link: title_cell.hyperlink = link apply_styles(REF_TITLE_STYLE, title_cell) sheet.row_dimensions[row_number].height = REF_FIELD_TITLE_HEIGHT field_count += 1 elif style == 'choice': pad_cell = sheet.cell(row=row_number, column=REF_KEY_COL_NUM - 1) apply_styles(choice_style, pad_cell) apply_styles(choice_style, key_cell) apply_styles(choice_style, value_cell) elif style == 'attr': apply_styles(REF_ATTR_STYLE, key_cell) apply_styles(REF_VALUE_STYLE, value_cell) elif style == 'choice heading': apply_styles(REF_ATTR_STYLE, key_cell) apply_styles(REF_VALUE_STYLE, value_cell) sheet.row_dimensions[row_number].height = REF_CHOICE_HEADING_HEIGHT apply_styles(REF_PAPER_STYLE, sheet.row_dimensions[row_number]) sheet.column_dimensions[RSTATUS_COL].width = RSTATUS_WIDTH sheet.cell(row=1, column=RPAD_COL_NUM).value = None # make sure rpad col exists sheet.column_dimensions[RPAD_COL].width = RPAD_WIDTH sheet.column_dimensions[REF_KEY_COL].width = REF_KEY_WIDTH sheet.column_dimensions[REF_VALUE_COL].width = REF_VALUE_WIDTH
def _populate_excel_sheet(sheet, geno, chromo, org, refs, resource_num): """ Format openpyxl sheet for the resource definition chromo and org. (Version 3) refs - list of rows to add to reference sheet, modified in place from this function resource_num - 1-based index of resource returns cranges dict of {datastore_id: reference_key_range} """ sheet.title = chromo['resource_name'] edge_style = dict(DEFAULT_EDGE_STYLE, **geno.get('excel_edge_style', {})) required_style = dict(edge_style, **geno.get('excel_required_style', {})) header_style = dict(DEFAULT_HEADER_STYLE, **geno.get('excel_header_style', {})) cheadings_style = dict(DEFAULT_CHEADING_STYLE, **geno.get('excel_column_heading_style', {})) example_style = dict(DEFAULT_EXAMPLE_STYLE, **geno.get('excel_example_style', {})) error_style = dict(DEFAULT_ERROR_STYLE, **geno.get('excel_error_style', {})) cranges = {} data_num_rows = chromo.get('excel_data_num_rows', DEFAULT_DATA_NUM_ROWS) # create rows so we can set all heights for i in xrange(1, DATA_FIRST_ROW + data_num_rows): sheet.cell(row=i, column=1).value = None sheet.merge_cells(EXAMPLE_MERGE) fill_cell(sheet, EXAMPLE_ROW, 1, _('e.g.'), example_style) fill_cell( sheet, DATA_FIRST_ROW, RPAD_COL_NUM, u'=IF(r{rnum}!{col}{row},"","▶")'.format( rnum=resource_num, col=RPAD_COL, row=DATA_FIRST_ROW), TYPE_HERE_STYLE) fill_cell( sheet, HEADER_ROW, DATA_FIRST_COL_NUM, recombinant_language_text(chromo['title']) + u' \N{em dash} ' + org_title_lang_hack(org['title']), header_style) sheet.cell(row=CODE_ROW, column=1).value = 'v3' # template version # allow only upload to this org sheet.cell(row=CODE_ROW, column=2).value = org['name'] cheadings_dimensions = sheet.row_dimensions[CHEADINGS_ROW] choice_fields = dict( (f['datastore_id'], f['choices']) for f in recombinant_choice_fields(chromo['resource_name'])) for col_num, field in template_cols_fields(chromo): field_heading = recombinant_language_text( field.get('excel_heading', field['label'])).strip() cheadings_dimensions.height = max( cheadings_dimensions.height, field_heading.count('\n') * LINE_HEIGHT + CHEADINGS_HEIGHT) col_heading_style = cheadings_style if 'excel_column_heading_style' in field: # use geno column heading style as base, just override keys col_heading_style = dict( cheadings_style, **field['excel_column_heading_style']) apply_styles(col_heading_style, sheet.cell( row=HEADER_ROW, column=col_num)) apply_styles(col_heading_style, sheet.cell( row=CSTATUS_ROW, column=col_num)) fill_cell( sheet, CHEADINGS_ROW, col_num, field_heading, col_heading_style) reference_row1 = len(refs) + REF_FIRST_ROW # match against db columns sheet.cell(row=CODE_ROW, column=col_num).value = field['datastore_id'] example = chromo['examples']['record'].get(field['datastore_id'], '') fill_cell( sheet, EXAMPLE_ROW, col_num, u','.join(example) if isinstance(example, list) else example, example_style) col_letter = openpyxl.cell.get_column_letter(col_num) # jump to first error/required cell in column fill_cell( sheet, CSTATUS_ROW, col_num, '=IF(e{rnum}!{col}{row}>0,HYPERLINK("#{col}"&e{rnum}!{col}{row},"")' ',IF(r{rnum}!{col}{row}>0,HYPERLINK("#{col}"&r{rnum}!{col}{row},""),""))' .format(rnum=resource_num, col=col_letter, row=CSTATUS_ROW), col_heading_style) col = sheet.column_dimensions[col_letter] if 'excel_column_width' in field: col.width = field['excel_column_width'] else: col.width = max(estimate_width(field_heading), CHEADINGS_MIN_WIDTH) validation_range = '{col}{row1}:{col}{rowN}'.format( col=col_letter, row1=DATA_FIRST_ROW, rowN=DATA_FIRST_ROW + data_num_rows - 1) xl_format = datastore_type[field['datastore_type']].xl_format alignment = openpyxl.styles.Alignment(wrap_text=True) protection = openpyxl.styles.Protection(locked=False) for (c,) in sheet[validation_range]: c.number_format = xl_format c.alignment = alignment c.protection = protection ex_cell = sheet.cell(row=EXAMPLE_ROW, column=col_num) ex_cell.number_format = xl_format ex_cell.alignment = alignment _append_field_ref_rows(refs, field, "#'{sheet}'!{col}{row}".format( sheet=sheet.title, col=col_letter, row=CHEADINGS_ROW)) if field['datastore_id'] in choice_fields: full_text_choices = ( field['datastore_type'] != '_text' and field.get( 'excel_full_text_choices', False)) ref1 = len(refs) + REF_FIRST_ROW max_choice_width = _append_field_choices_rows( refs, choice_fields[field['datastore_id']], full_text_choices) refN = len(refs) + REF_FIRST_ROW - 2 if full_text_choices: if 'excel_column_width' not in field: col.width = max(col.width, max_choice_width) # expand example for ck, cv in choice_fields[field['datastore_id']]: if ck == example: ex_cell.value = u"{0}: {1}".format(ck, cv) break choice_range = 'reference!${col}${ref1}:${col}${refN}'.format( col=REF_KEY_COL, ref1=ref1, refN=refN) user_choice_range = field.get('excel_choice_range_formula') if user_choice_range: choice_keys = set( key for (_i, key, _i, _i) in string.Formatter().parse(user_choice_range) if key != 'range' and key != 'range_top') choice_values = {} if choice_keys: choice_values = { f['datastore_id']: "{col}{num}".format( col=openpyxl.cell.get_column_letter(cn), num=DATA_FIRST_ROW) for cn, f in template_cols_fields(chromo) if f['datastore_id'] in choice_keys} user_choice_range = user_choice_range.format( range=choice_range, range_top=choice_range.split(':')[0], **choice_values) cranges[field['datastore_id']] = choice_range choices = [c[0] for c in choice_fields[field['datastore_id']]] if field['datastore_type'] != '_text': v = openpyxl.worksheet.datavalidation.DataValidation( type="list", formula1=user_choice_range or choice_range, allow_blank=True) v.errorTitle = u'Invalid choice' valid_keys = u', '.join(unicode(c) for c in choices) if len(valid_keys) < 40: v.error = (u'Please enter one of the valid keys: ' + valid_keys) else: v.error = (u'Please enter one of the valid keys shown on ' 'sheet "reference" rows {0}-{1}'.format(ref1, refN)) sheet.add_data_validation(v) v.ranges.append(validation_range) sheet.cell(row=CHEADINGS_ROW, column=col_num).hyperlink = ( '#reference!{colA}{row1}:{colZ}{rowN}'.format( colA=REF_FIELD_NUM_COL, row1=reference_row1, colZ=REF_VALUE_COL, rowN=len(refs) + REF_FIRST_ROW - 2)) _add_conditional_formatting( sheet, col_letter, resource_num, error_style, required_style, data_num_rows) sheet.row_dimensions[HEADER_ROW].height = HEADER_HEIGHT sheet.row_dimensions[CODE_ROW].hidden = True sheet.row_dimensions[CSTATUS_ROW].height = CSTATUS_HEIGHT sheet.row_dimensions[EXAMPLE_ROW].height = chromo.get( 'excel_example_height', DEFAULT_EXAMPLE_HEIGHT) for i in xrange(DATA_FIRST_ROW, DATA_FIRST_ROW + data_num_rows): sheet.row_dimensions[i].height = chromo.get( 'excel_data_height', DEFAULT_DATA_HEIGHT) # jump to first error/required cell in row sheet.cell(row=i, column=RSTATUS_COL_NUM).value = ( '=IF(e{rnum}!{col}{row}>0,' 'HYPERLINK("#"&ADDRESS({row},e{rnum}!{col}{row}),""),' 'IF(r{rnum}!{col}{row}>0,' 'HYPERLINK("#"&ADDRESS({row},r{rnum}!{col}{row}),""),""))' .format(rnum=resource_num, col=RSTATUS_COL, row=i)) sheet.column_dimensions[RSTATUS_COL].width = RSTATUS_WIDTH sheet.column_dimensions[RPAD_COL].width = RPAD_WIDTH sheet.freeze_panes = sheet[FREEZE_PANES] apply_styles(header_style, sheet.row_dimensions[HEADER_ROW]) apply_styles(cheadings_style, sheet.row_dimensions[CHEADINGS_ROW]) apply_styles(cheadings_style, sheet.row_dimensions[CSTATUS_ROW]) apply_styles(example_style, sheet.row_dimensions[EXAMPLE_ROW]) for (c,) in sheet[EDGE_RANGE]: apply_styles(edge_style, c) # trying to set the active cell (not working yet) select = "{col}{row}".format(col=DATA_FIRST_COL, row=DATA_FIRST_ROW) sheet.sheet_view.selection[0].activeCell = select sheet.sheet_view.selection[0].sqref = select return cranges
def _update_records(records, org_detail, conn, resource_name, unmatched): """ Update records on solr core :param records: record dicts :param org_detail: org structure as returned via local CKAN :param conn: solr connection :param resource_name: type being updated :param unmatched: yet-unmatched values for comparing prev/next year :returns: new unmatched for next call for same org+resource_name """ chromo = get_chromo(resource_name) pk = chromo.get('datastore_primary_key', []) if not isinstance(pk, list): pk = [pk] org = org_detail['name'] orghash = hashlib.md5(org).hexdigest() def unique_id(r): "return hash, friendly id" s = orghash f = org if not pk: s = hashlib.md5(s + recombinant_type + "-%d" % r['_id']).hexdigest() f += u'|' + unicode(r['_id']) for k in pk: s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest() f += u'|' + unicode(r[k]) return s, f out = [] choice_fields = dict( (f['datastore_id'], dict(f['choices'])) for f in recombinant_choice_fields(resource_name, all_languages=True)) if any('solr_compare_previous_year' in f for f in chromo['fields']): if not unmatched: # previous years, next years unmatched = ({}, {}) else: unmatched = None for r in records: unique, friendly = unique_id(r) shortform = org_detail['shortform'] shortform_fr = org_detail['shortform_fr'] solrrec = { 'id': unique, 'unique_id': friendly, 'org_name_code': org_detail['name'], 'org_name_en': org_detail['title_tranlated']['en'], 'org_name_fr': org_detail['title_tranlsated']['fr'], } for f in chromo['fields']: key = f['datastore_id'] value = r[key] facet_range = f.get('solr_dollar_range_facet') if facet_range: try: float_value = float(value) except ValueError: pass else: solrrec.update( dollar_range_facet(key, facet_range, float_value)) sum_to = list_or_none(f.get('solr_sum_to_field')) if sum_to: for fname in sum_to: sum_to_field(solrrec, fname, value) if f.get('datastore_type') == 'date': try: value = date2zulu(value) # CM: If this only applies to PD types this should be accurate # CM: This should only apply if valid (as per date2zulu) else NULL if f.get('extract_date_year'): solrrec['date_year'] = value.split('-', 1)[0] if f.get('extract_date_month'): solrrec['date_month'] = value.split('-')[1] except ValueError: pass elif f.get('datastore_type') == 'year': if f.get('extract_date_year'): solrrec['date_year'] = value solrrec[key] = value choices = choice_fields.get(f['datastore_id']) if choices: if key.endswith('_code'): key = key[:-5] solrrec[key + '_en'] = recombinant_language_text( choices.get(value, ''), 'en') solrrec[key + '_fr'] = recombinant_language_text( choices.get(value, ''), 'fr') solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values()) if 'solr_static_fields' in chromo: solrrec.update(chromo['solr_static_fields']) if unmatched: match_compare_output(solrrec, out, unmatched, chromo) else: out.append(solrrec) if out: conn.add_many(out, _commit=True) return unmatched
def _update_records(records, org_detail, conn, resource_name, unmatched): """ Update records on solr core :param records: record dicts :param org_detail: org structure as returned via local CKAN :param conn: solr connection :param resource_name: type being updated :param unmatched: yet-unmatched values for comparing prev/next year :returns: new unmatched for next call for same org+resource_name """ chromo = get_chromo(resource_name) pk = chromo.get('datastore_primary_key', []) if not isinstance(pk, list): pk = [pk] org = org_detail['name'] orghash = hashlib.md5(org).hexdigest() def unique_id(r): "return hash, friendly id" s = orghash f = org if not pk: s = hashlib.md5(s + recombinant_type + "-%d" % r['_id']).hexdigest() f += u'|' + unicode(r['_id']) for k in pk: s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest() f += u'|' + unicode(r[k]) return s, f out = [] choice_fields = dict( (f['datastore_id'], dict(f['choices'])) for f in recombinant_choice_fields(resource_name, all_languages=True)) if any('solr_compare_previous_year' in f for f in chromo['fields']): if not unmatched: # previous years, next years unmatched = ({}, {}) else: unmatched = None for r in records: unique, friendly = unique_id(r) shortform = None shortform_fr = None for e in org_detail['extras']: if e['key'] == 'shortform': shortform = e['value'] elif e['key'] == 'shortform_fr': shortform_fr = e['value'] solrrec = { 'id': unique, 'unique_id': friendly, 'org_name_code': org_detail['name'], 'org_name_en': org_detail['title'].split(' | ', 1)[0], 'org_name_fr': org_detail['title'].split(' | ', 1)[-1], } for f in chromo['fields']: key = f['datastore_id'] value = r[key] facet_range = f.get('solr_dollar_range_facet') if facet_range: try: float_value = float(value) except ValueError: pass else: solrrec.update(dollar_range_facet( key, facet_range, float_value)) sum_to = list_or_none(f.get('solr_sum_to_field')) if sum_to: for fname in sum_to: sum_to_field(solrrec, fname, value) if f.get('datastore_type') == 'date': try: value = date2zulu(value) # CM: If this only applies to PD types this should be accurate # CM: This should only apply if valid (as per date2zulu) else NULL if f.get('extract_date_year'): solrrec['date_year'] = value.split('-', 1)[0] if f.get('extract_date_month'): solrrec['date_month'] = value.split('-')[1] except ValueError: pass solrrec[key] = value choices = choice_fields.get(f['datastore_id']) if choices: if key.endswith('_code'): key = key[:-5] solrrec[key + '_en'] = recombinant_language_text( choices.get(value, ''), 'en') solrrec[key + '_fr'] = recombinant_language_text( choices.get(value, ''), 'fr') solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values()) if unmatched: match_compare_output(solrrec, out, unmatched, chromo) else: out.append(solrrec) if out: conn.add_many(out, _commit=True) return unmatched
def _update_records(records, org_detail, conn, resource_name, unmatched): """ Update records on solr core :param records: record dicts :param org_detail: org structure as returned via local CKAN :param conn: solr connection :param resource_name: type being updated :param unmatched: yet-unmatched values for comparing prev/next year :returns: new unmatched for next call for same org+resource_name """ chromo = get_chromo(resource_name) pk = chromo.get('datastore_primary_key', []) if not isinstance(pk, list): pk = [pk] org = org_detail['name'] orghash = hashlib.md5(org).hexdigest() def unique_id(r): "return hash, friendly id, partial id" s = orghash f = org p = org for k in pk: s = hashlib.md5(s + r[k].encode('utf-8')).hexdigest() f += u'|' + unicode(r[k]) if u'|' not in p: p += u'|' + unicode(r[k]) return s, f, p out = [] choice_fields = dict( (f['datastore_id'], dict(f['choices'])) for f in recombinant_choice_fields(resource_name, all_languages=True)) if any('solr_compare_previous_year' in f for f in chromo['fields']): if not unmatched: # previous years, next years unmatched = ({}, {}) else: unmatched = None for r in records: unique, friendly, partial = unique_id(r) solrrec = { 'id': unique, 'unique_id': friendly, 'partial_id': partial, 'org_name_code': org_detail['name'], 'org_name_en': org_detail['title'].split(' | ', 1)[0], 'org_name_fr': org_detail['title'].split(' | ', 1)[-1], } org_fields = chromo.get('solr_org_fields') if org_fields: for e in org_detail['extras']: if e['key'] in org_fields: solrrec[e['key']] = e['value'] for f in chromo['fields']: key = f['datastore_id'] value = r.get(key, '') facet_range = f.get('solr_dollar_range_facet') if facet_range: try: float_value = float( value.replace('$', '').replace(',', '')) except ValueError: pass else: solrrec.update( dollar_range_facet(key, facet_range, float_value)) sum_to = list_or_none(f.get('solr_sum_to_field')) if sum_to: for fname in sum_to: sum_to_field(solrrec, fname, value) if f.get('datastore_type') == 'date': try: value = date2zulu(value) # CM: If this only applies to PD types this should be accurate # CM: This should only apply if valid (as per date2zulu) else NULL if f.get('extract_date_year'): solrrec['date_year'] = value.split('-', 1)[0] if f.get('extract_date_month'): solrrec['date_month'] = value.split('-')[1] if f.get('extract_date_clean'): solrrec['date_clean'] = value except ValueError: pass elif f.get('extract_date_year'): if f.get('datastore_type') == 'year': solrrec['date_year'] = value else: try: solrrec['date_year'] = int(value.split('-', 1)[0]) except ValueError: pass if f.get('extract_double_sortable'): try: solrrec['doubl_' + key] = float(value) except ValueError: pass solrrec[key] = value choices = choice_fields.get(f['datastore_id']) if choices: if key.endswith('_code'): key = key[:-5] if f.get('datastore_type') == '_text': solrrec[key + '_en'] = '; '.join( recombinant_language_text(choices[v], 'en') for v in value.split(',') if v in choices) solrrec[key + '_fr'] = '; '.join( recombinant_language_text(choices[v], 'fr') for v in value.split(',') if v in choices) else: choice = choices.get(value, {}) _add_choice(solrrec, key, r, choice, f) solrrec['text'] = u' '.join(unicode(v) for v in solrrec.values()) if 'solr_static_fields' in chromo: solrrec.update(chromo['solr_static_fields']) ssrf = chromo.get('solr_sum_range_facet') if ssrf: key = ssrf['sum_field'] float_value = float(solrrec[key]) solrrec.update( numeric_range_facet(key, ssrf['facet_values'], float_value)) if unmatched: match_compare_output(solrrec, out, unmatched, chromo) else: out.append(solrrec) if unmatched: out.extend(unmatched[1].values()) import pysolr for a in reversed(range(10)): try: if out: conn.add(out, commit=False) break except pysolr.SolrError: if not a: raise print "waiting..." import time time.sleep((10 - a) * 5) print "retrying..." return unmatched