def parse_dsp_solution(solution): """ Parses the co2mpas model results. :param solution: Co2mpas model after dispatching. :type solution: co2mpas.dispatcher.Solution :return: Mapped outputs. :rtype: dict[dict] """ res = {} for k, v in solution.items(): dsp_utl.get_nested_dicts(res, *k.split('.'), default=co2_utl.ret_v(v)) for k, v in list(dsp_utl.stack_nested_keys(res, depth=3)): n, k = k[:-1], k[-1] if n == ('output', 'calibration') and k in ('wltp_l', 'wltp_h'): v = dsp_utl.selector(('co2_emission_value', ), v, allow_miss=True) if v: d = dsp_utl.get_nested_dicts(res, 'target', 'prediction') d[k] = dsp_utl.combine_dicts(v, d.get(k, {})) res['pipe'] = solution.pipe return res
def compare_outputs_vs_targets(data): """ Compares model outputs vs targets. :param data: Model data. :type data: dict :return: Comparison results. :rtype: dict """ res = {} metrics = _get_metrics() for k, t in dsp_utl.stack_nested_keys(data.get('target', {}), depth=3): if not dsp_utl.are_in_nested_dicts(data, 'output', *k): continue o = dsp_utl.get_nested_dicts(data, 'output', *k) v = _compare(t, o, metrics=metrics) if v: dsp_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def get_chart_reference(report): r, _map = {}, _map_cycle_report_graphs() out = report.get('output', {}) it = dsp_utl.stack_nested_keys(out, key=('output',), depth=3) for k, v in sorted(it): if k[-1] == 'ts' and 'times' in v: label = '{}/%s'.format(co2_exl._sheet_name(k)) for i, j in sorted(v.items()): param_id = co2_exl._re_params_name.match(i)['param'] m = _map.get(param_id, None) if m: d = { 'x': k + ('times',), 'y': k + (i,), 'label': label % i } n = k[2], param_id, 'series' dsp_utl.get_nested_dicts(r, *n, default=list).append(d) for k, v in dsp_utl.stack_nested_keys(r, depth=2): m = _map[k[1]] m.pop('label', None) v.update(m) return r
def _format_selection(score_by_model, depth=-1, index='model_id'): res = {} for k, v in sorted(dsp_utl.stack_nested_keys(score_by_model, depth=depth)): v = v.copy() v[index] = k[0] dsp_utl.get_nested_dicts(res, *k[1:], default=list).append(v) return res
def _add_special_data2report(data, report, to_keys, *from_keys): if from_keys[-1] != 'times' and \ dsp_utl.are_in_nested_dicts(data, *from_keys): v = dsp_utl.get_nested_dicts(data, *from_keys) n = to_keys + ('{}.{}'.format(from_keys[0], from_keys[-1]),) dsp_utl.get_nested_dicts(report, *n[:-1], default=collections.OrderedDict)[n[-1]] = v return True, v return False, None
def _extract_summary_from_summary(report, extracted): n = ('summary', 'results') if dsp_utl.are_in_nested_dicts(report, *n): for j, w in dsp_utl.get_nested_dicts(report, *n).items(): if j in ('declared_co2_emission', 'co2_emission', 'fuel_consumption'): for k, v in dsp_utl.stack_nested_keys(w, depth=3): if v: dsp_utl.get_nested_dicts(extracted, *k).update(v)
def filter_summary(changes, new_outputs, summary): l, variations = {tuple(k.split('.')[:0:-1]) for k in new_outputs}, {} for k, v in changes.items(): n = k[-2:1:-1] l.add(n) k = n + ('plan.%s' % '.'.join(i for i in k[:-1] if k not in n), k[-1]) dsp_utl.get_nested_dicts(variations, *k, default=co2_utl.ret_v(v)) for k, v in dsp_utl.stack_nested_keys(summary, depth=3): if k[:-1] in l: dsp_utl.get_nested_dicts(variations, *k, default=co2_utl.ret_v(v)) return variations
def overwrite_declaration_config_data(data): config = constants.con_vals.DECLARATION_SELECTOR_CONFIG res = dsp_utl.combine_nested_dicts(data, depth=3) key = ('config', 'selector', 'all') d = copy.deepcopy(dsp_utl.get_nested_dicts(res, *key)) for k, v in dsp_utl.stack_nested_keys(config): dsp_utl.get_nested_dicts(d, *k, default=co2_utl.ret_v(v)) dsp_utl.get_nested_dicts(res, *key[:-1])[key[-1]] = d return res
def select_declaration_data(data, diff=None): res = {} for k, v in dsp_utl.stack_nested_keys(constants.con_vals.DECLARATION_DATA): if v and dsp_utl.are_in_nested_dicts(data, *k): v = dsp_utl.get_nested_dicts(data, *k) dsp_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) if diff is not None: diff.clear() diff.update(v[0] for v in dsp_utl.stack_nested_keys(data, depth=4)) it = (v[0] for v in dsp_utl.stack_nested_keys(res, depth=4)) diff.difference_update(it) return res
def _format_scores(scores): res = {} for k, j in dsp_utl.stack_nested_keys(scores, depth=3): if k[-1] in ('limits', 'errors'): model_id = k[0] extra_field = ('score',) if k[-1] == 'errors' else () for i, v in dsp_utl.stack_nested_keys(j): i = (model_id, i[-1], k[1],) + i[:-1] + extra_field dsp_utl.get_nested_dicts(res, *i, default=co2_utl.ret_v(v)) sco = {} for k, v in sorted(dsp_utl.stack_nested_keys(res, depth=4)): v.update(dsp_utl.map_list(['model_id', 'param_id'], *k[:2])) dsp_utl.get_nested_dicts(sco, *k[2:], default=list).append(v) return sco
def _summary2df(data): res = [] summary = data.get('summary', {}) if 'results' in summary: r = {} index = ['cycle', 'stage', 'usage'] for k, v in dsp_utl.stack_nested_keys(summary['results'], depth=4): l = dsp_utl.get_nested_dicts(r, k[0], default=list) l.append(dsp_utl.combine_dicts(dsp_utl.map_list(index, *k[1:]), v)) if r: df = _dd2df(r, index=index, depth=2, col_key=functools.partial(_sort_key, p_keys=('param', ) * 2), row_key=functools.partial(_sort_key, p_keys=index)) df.columns = pd.MultiIndex.from_tuples(_add_units(df.columns)) setattr(df, 'name', 'results') res.append(df) if 'selection' in summary: df = _dd2df(summary['selection'], ['model_id'], depth=2, col_key=functools.partial(_sort_key, p_keys=('stage', 'cycle')), row_key=functools.partial(_sort_key, p_keys=())) setattr(df, 'name', 'selection') res.append(df) if 'comparison' in summary: r = {} for k, v in dsp_utl.stack_nested_keys(summary['comparison'], depth=3): v = dsp_utl.combine_dicts(v, base={'param_id': k[-1]}) dsp_utl.get_nested_dicts(r, *k[:-1], default=list).append(v) if r: df = _dd2df(r, ['param_id'], depth=2, col_key=functools.partial(_sort_key, p_keys=('stage', 'cycle')), row_key=functools.partial(_sort_key, p_keys=())) setattr(df, 'name', 'comparison') res.append(df) if res: return {'summary': res} return {}
def get_values(data, keys, tag=(), update=lambda k, v: v, base=None): k = ('input', 'target', 'output') data = dsp_utl.selector(k, data, allow_miss=True) base = {} if base is None else base for k, v in dsp_utl.stack_nested_keys(data, depth=3): k = k[::-1] v = dsp_utl.selector(keys, v, allow_miss=True) v = update(k, v) if v: k = tag + k dsp_utl.get_nested_dicts(base, *k, default=co2_utl.ret_v(v)) return base
def _extract_summary_from_model_scores(report, extracted): n = ('data', 'calibration', 'model_scores', 'model_selections') if not dsp_utl.are_in_nested_dicts(report, *n): return False sel = dsp_utl.get_nested_dicts(report, *n) for k, v in dsp_utl.stack_nested_keys(extracted, depth=3): n = k[1::-1] if k[-1] == 'output' and dsp_utl.are_in_nested_dicts(sel, *n): gen = dsp_utl.get_nested_dicts(sel, *n) gen = ((d['model_id'], d['status']) for d in gen if 'status' in d) o = _format_dict(gen, 'status %s') v.update(o) return True
def _process_folder_files(*args, result_listener=None, **kwargs): """ Process all xls-files in a folder with CO2MPAS-model. :param list input_files: A list of input xl-files. :param output_folder: Output folder. :type output_folder: str :param plot_workflow: If to show the CO2MPAS model workflow. :type plot_workflow: bool, optional :param output_template: The xlsx-file to use as template and import existing sheets from. - If file already exists, a clone gets updated with new sheets. - If it is None, it copies and uses the input-file as template. - if it is `False`, it does not use any template and a fresh output xlsx-file is created. :type output_folder: None,False,str """ start_time = datetime.datetime.today() summary, n = {}, ('solution', 'summary') for res in _yield_folder_files_results(start_time, *args, **kwargs): if dsp_utl.are_in_nested_dicts(res, *n): _add2summary(summary, dsp_utl.get_nested_dicts(res, *n)) notify_result_listener(result_listener, res) return summary, start_time
def validate_plan(plan, engineering_mode, soft_validation, use_selector): read_schema = define_data_schema(read=True) flag_read_schema = define_flags_schema(read=True) validated_plan, errors, v_data = [], {}, read_schema.validate v_flag = flag_read_schema.validate for i, data in plan.iterrows(): inputs, inp = {}, {} data.dropna(how='all', inplace=True) plan_id = 'plan id:{}'.format(i[0]) for k, v in excel._parse_values(data, where='in plan'): if k[0] == 'base': d = dsp_utl.get_nested_dicts(inp, *k[1:-1]) v = _add_validated_input(d, v_data, (plan_id, ) + k, v, errors) elif k[0] == 'flag': v = _add_validated_input({}, v_flag, (plan_id, ) + k, v, errors) if v is not dsp_utl.NONE: inputs[k] = v errors = _eng_mode_parser(engineering_mode, soft_validation, use_selector, inp, errors)[1] validated_plan.append((i, inputs)) if _log_errors_msg(errors): return dsp_utl.NONE return validated_plan
def _scores2df(data): n = ('data', 'calibration', 'model_scores') if not dsp_utl.are_in_nested_dicts(data, *n): return {} scores = dsp_utl.get_nested_dicts(data, *n) it = (('model_selections', ['model_id'], 2, ('stage', 'cycle'), ()), ('score_by_model', ['model_id'], 1, ('cycle', ), ()), ('scores', ['model_id', 'param_id'], 2, ('cycle', 'cycle'), ()), ('param_selections', ['param_id'], 2, ('stage', 'cycle'), ()), ('models_uuid', ['cycle'], 0, (), ('cycle', ))) dfs = [] for k, idx, depth, col_keys, row_keys in it: if k not in scores: continue df = _dd2df(scores[k], idx, depth=depth, col_key=functools.partial(_sort_key, p_keys=col_keys), row_key=functools.partial(_sort_key, p_keys=row_keys)) setattr(df, 'name', k) dfs.append(df) if dfs: return {'.'.join(n): dfs} else: return {}
def _validate_base_with_schema(data): read_schema = define_data_schema(read=True) inputs, errors, validate = {}, {}, read_schema.validate for k, v in sorted(dsp_utl.stack_nested_keys(data, depth=4)): d = dsp_utl.get_nested_dicts(inputs, *k[:-1]) _add_validated_input(d, validate, k, v, errors) return inputs, errors
def format_report_output(data): res = {} func = functools.partial(dsp_utl.get_nested_dicts, default=collections.OrderedDict) for k, v in dsp_utl.stack_nested_keys(data.get('output', {}), depth=3): _add_special_data2report(data, res, k[:-1], 'target', *k) s, iv = _add_special_data2report(data, res, k[:-1], 'input', *k) if not s or (s and not _is_equal(iv, v)): func(res, *k[:-1])[k[-1]] = v output = {} for k, v in dsp_utl.stack_nested_keys(res, depth=2): v = _split_by_data_format(v) dsp_utl.get_nested_dicts(output, *k, default=co2_utl.ret_v(v)) return output
def _add_validated_input(data, validate, keys, value, errors): try: k, v = next(iter(validate({keys[-1]: value}).items())) if v is not dsp_utl.NONE: data[k] = v return v except SchemaError as ex: dsp_utl.get_nested_dicts(errors, *keys[:-1])[keys[-1]] = ex return dsp_utl.NONE
def _add2summary(total_summary, summary, base_keys=None): base_keys = base_keys or {} for k, v in dsp_utl.stack_nested_keys(summary, depth=3): d = dsp_utl.get_nested_dicts(total_summary, *k, default=list) if isinstance(v, list): for j in v: d.append(dsp_utl.combine_dicts(j, base_keys)) else: d.append(dsp_utl.combine_dicts(v, base_keys))
def _add_times_base(data, scope='base', usage='input', **match): if scope != 'base': return sh_type = _get_sheet_type(scope=scope, usage=usage, **match) n = (scope, 'target') if sh_type == 'ts' and dsp_utl.are_in_nested_dicts(data, *n): t = dsp_utl.get_nested_dicts(data, *n) for k, v in dsp_utl.stack_nested_keys(t, key=n, depth=2): if 'times' not in v: n = list(k + ('times', )) n[1] = usage if dsp_utl.are_in_nested_dicts(data, *n): v['times'] = dsp_utl.get_nested_dicts(data, *n) else: for i, j in dsp_utl.stack_nested_keys(data, depth=4): if 'times' in j: v['times'] = j['times'] break
def re_sample_targets(data): res = {} for k, v in dsp_utl.stack_nested_keys(data.get('target', {}), depth=2): if dsp_utl.are_in_nested_dicts(data, 'output', *k): o = dsp_utl.get_nested_dicts(data, 'output', *k) o = _split_by_data_format(o) t = dsp_utl.selector(o, _split_by_data_format(v), allow_miss=True) if 'times' not in t.get('ts', {}) or 'times' not in o['ts']: t.pop('ts', None) else: time_series = t['ts'] x, xp = o['ts']['times'], time_series.pop('times') if not _is_equal(x, xp): for i, fp in time_series.items(): time_series[i] = np.interp(x, xp, fp) v = dsp_utl.combine_dicts(*t.values()) dsp_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def _extract_summary_from_output(report, extracted): for k, v in dsp_utl.stack_nested_keys(report.get('output', {}), depth=2): k = k[::-1] for u, i, j in _param_names_values(v.get('pa', {})): o = {} if i == 'co2_params_calibrated': o = _format_dict(j.valuesdict().items(), 'co2_params %s') elif i == 'calibration_status': o = _format_dict(enumerate(j), 'status co2_params step %d', lambda x: x[0]) elif i == 'willans_factors': o = j elif i == 'phases_willans_factors': for n, m in enumerate(j): o.update(_format_dict(m.items(), '%s phase {}'.format(n))) elif i == 'has_sufficient_power': o = {i: j} if o: dsp_utl.get_nested_dicts(extracted, *(k + (u,))).update(o)
def define_new_inputs(data, base): remove, new_base, new_flag, new_data = [], {}, set(), set() for k, v in dsp_utl.stack_nested_keys(base.get('data', {}), ('base', ), 4): dsp_utl.get_nested_dicts(new_base, *k, default=co2_utl.ret_v(v)) for k, v in dsp_utl.stack_nested_keys(base.get('flag', {}), ('flag', ), 1): dsp_utl.get_nested_dicts(new_base, *k, default=co2_utl.ret_v(v)) for k, v in data.items(): if v is dsp_utl.EMPTY: remove.append(k) dsp_utl.get_nested_dicts(new_base, *k[:-1])[k[-1]] = v if k[0] == 'base': new_data.add('.'.join(k[1:4])) elif k[0] == 'flag': new_flag.add(k[1:2]) if 'dsp_solution' in _get_inputs(base, new_flag)[0]: sol = base['dsp_solution'] n, out_id = _get_inputs(sol, new_data) for k in n.intersection(sol): dsp_utl.get_nested_dicts(new_base, 'base', *k.split('.'), default=co2_utl.ret_v(sol[k])) else: d = base.get_node('CO2MPAS model', node_attr='function')[0].dsp out_id = set(d.data_nodes) for n, k in remove: dsp_utl.get_nested_dicts(new_base, n).pop(k) return new_base, out_id
def _parse_sheet(match, sheet, sheet_name, res=None): if res is None: res = {} sh_type = _get_sheet_type(**match) # noinspection PyBroadException try: data = xleash.lasso(_xl_ref[sh_type] % sheet_name, sheet=sheet) except: return res if sh_type == 'pl': try: data = pd.DataFrame(data[1:], columns=data[0]) except IndexError: return None if 'id' not in data: data['id'] = data.index + 1 data.set_index(['id'], inplace=True) data.dropna(how='all', inplace=True) data.dropna(axis=1, how='all', inplace=True) elif sh_type == 'ts': data.dropna(how='all', inplace=True) data.dropna(axis=1, how='all', inplace=True) mask = data.count(0) == len(data._get_axis(0)) # noinspection PyUnresolvedReferences drop = [k for k, v in mask.items() if not v] if drop: msg = 'Columns {} in {} sheet contains nan.\n ' \ 'Please correct the inputs!' raise ValueError(msg.format(drop, sheet_name)) else: data = {k: v for k, v in data.items() if k} for k, v in _parse_values(data, match, "in sheet '%s'" % sheet_name): dsp_utl.get_nested_dicts(res, *k[:-1])[k[-1]] = v return res
def _eng_mode_parser(engineering_mode, soft_validation, use_selector, inputs, errors): if not engineering_mode: inputs, errors, diff = _extract_declaration_data(inputs, errors) if diff: diff = ['.'.join(k) for k in sorted(diff)] log.info( 'Since CO2MPAS is launched in declaration mode the ' 'following data are not used:\n %s\n' 'If you want to include these data add to the batch cmd ' '-D flag.engineering_mode=True', ',\n'.join(diff)) if not use_selector: inputs = validations.overwrite_declaration_config_data(inputs) if not soft_validation: for k, v in dsp_utl.stack_nested_keys(inputs, depth=3): for c, msg in validations.hard_validation(v, *k): dsp_utl.get_nested_dicts(errors, *k)[c] = SchemaError([], [msg]) return inputs, errors
def split_prediction_models( scores, calibrated_models, input_models, cycle_ids=()): sbm, model_sel, par = {}, {}, {} for (k, c), v in dsp_utl.stack_nested_keys(scores, depth=2): r = dsp_utl.selector(['models'], v, allow_miss=True) for m in r.get('models', ()): dsp_utl.get_nested_dicts(par, m, 'calibration')[c] = c r.update(v.get('score', {})) dsp_utl.get_nested_dicts(sbm, k, c, default=co2_utl.ret_v(r)) r = dsp_utl.selector(['success'], r, allow_miss=True) r = dsp_utl.map_dict({'success': 'status'}, r, {'from': c}) dsp_utl.get_nested_dicts(model_sel, k, 'calibration')[c] = r p = {i: dict.fromkeys(input_models, 'input') for i in cycle_ids} models = {i: input_models.copy() for i in cycle_ids} for k, n in sorted(calibrated_models.items()): d = n.get(dsp_utl.NONE, (None, True, {})) for i in cycle_ids: c, s, m = n.get(i, d) if m: s = {'from': c, 'status': s} dsp_utl.get_nested_dicts(model_sel, k, 'prediction')[i] = s models[i].update(m) p[i].update(dict.fromkeys(m, c)) for k, v in dsp_utl.stack_nested_keys(p, ('prediction',), depth=2): dsp_utl.get_nested_dicts(par, k[-1], *k[:-1], default=co2_utl.ret_v(v)) s = { 'param_selections': par, 'model_selections': model_sel, 'score_by_model': sbm, 'scores': scores } return (s,) + tuple(models.get(k, {}) for k in cycle_ids)
def prepare_data(raw_data, variation, input_file_name, overwrite_cache, output_folder, timestamp, type_approval_mode, modelconf): """ Prepare the data to be processed. :param raw_data: Raw data from the input file. :type raw_data: dict :param variation: Variations to be applied. :type variation: dict :param input_file_name: Input file name. :type input_file_name: str :param overwrite_cache: Overwrite saved cache? :type overwrite_cache: bool :param output_folder: Output folder. :type output_folder: str :param timestamp: Run timestamp. :type timestamp: str :param type_approval_mode: Is launched for TA? :type type_approval_mode: bool :param modelconf: Path of modelconf that has modified the defaults. :type modelconf: str :return: Prepared data. :rtype: dict """ has_plan = 'plan' in raw_data and (not raw_data['plan'].empty) match = { 'scope': 'plan' if has_plan else 'base', } r = {} sheets_factory = xleash.SheetsFactory() from co2mpas.io import check_xlasso for k, v in excel._parse_values(variation, match, "in variations"): if isinstance(v, str) and check_xlasso(v): v = xleash.lasso(v, sheets_factory, url_file=input_file_name) dsp_utl.get_nested_dicts(r, *k[:-1])[k[-1]] = v if 'plan' in r: if has_plan: plan = raw_data['plan'].copy() for k, v in dsp_utl.stack_nested_keys(r['plan'], 4): plan['.'.join(k)] = v else: gen = dsp_utl.stack_nested_keys(r['plan'], 4) plan = pd.DataFrame([{'.'.join(k): v for k, v in gen}]) excel._add_index_plan(plan, input_file_name) r['plan'] = plan has_plan = True if 'base' in r: r['base'] = dsp_utl.combine_nested_dicts(raw_data.get('base', {}), r['base'], depth=4) if 'flag' in r: r['flag'] = dsp_utl.combine_nested_dicts(raw_data.get('flag', {}), r['flag'], depth=1) data = dsp_utl.combine_dicts(raw_data, r) if type_approval_mode: variation, has_plan = {}, False if not schema._ta_mode(data): return {}, pd.DataFrame([]) flag = data.get('flag', {}).copy() if 'run_base' not in flag: flag['run_base'] = not has_plan if 'run_plan' not in flag: flag['run_plan'] = has_plan flag['type_approval_mode'] = type_approval_mode flag['output_folder'] = output_folder flag['overwrite_cache'] = overwrite_cache if modelconf: flag['modelconf'] = modelconf if timestamp is not None: flag['timestamp'] = timestamp flag = schema.validate_flags(flag) if flag is dsp_utl.NONE: return {}, pd.DataFrame([]) schema.check_data_version(flag) res = { 'flag': flag, 'variation': variation, 'input_file_name': input_file_name, } res = dsp_utl.combine_dicts(flag, res) base = dsp_utl.combine_dicts(res, {'data': data.get('base', {})}) plan = dsp_utl.combine_dicts(res, {'data': data.get('plan', pd.DataFrame([]))}) return base, plan
def format_report_scores(data): res = {} scores = 'data', 'calibration', 'model_scores' if dsp_utl.are_in_nested_dicts(data, *scores): n = scores + ('param_selections',) v = _format_selection(dsp_utl.get_nested_dicts(data, *n), 2, 'param_id') if v: dsp_utl.get_nested_dicts(res, *n, default=co2_utl.ret_v(v)) n = scores + ('model_selections',) v = _format_selection(dsp_utl.get_nested_dicts(data, *n), 3) if v: dsp_utl.get_nested_dicts(res, *n, default=co2_utl.ret_v(v)) n = scores + ('score_by_model',) v = _format_selection(dsp_utl.get_nested_dicts(data, *n), 2) if v: dsp_utl.get_nested_dicts(res, *n, default=co2_utl.ret_v(v)) n = scores + ('scores',) v = _format_scores(dsp_utl.get_nested_dicts(data, *n)) if v: dsp_utl.get_nested_dicts(res, *n, default=co2_utl.ret_v(v)) v = [] for k in ('nedc_h', 'nedc_l', 'wltp_h', 'wltp_l'): n = 'data', 'prediction', 'models_%s' % k if dsp_utl.are_in_nested_dicts(data, *n): v.append({ 'cycle': k, 'uuid': base64.encodebytes( dill.dumps(dsp_utl.get_nested_dicts(data, *n)) ) }) if v: n = scores + ('models_uuid',) dsp_utl.get_nested_dicts(res, *n, default=co2_utl.ret_v(v)) return res
def get_selection(data): n = ('data', 'calibration', 'model_scores', 'model_selections') if dsp_utl.are_in_nested_dicts(data, *n): return _format_selection(dsp_utl.get_nested_dicts(data, *n), 3) return {}