def get_chart_reference(report): r, _map = {}, _map_cycle_report_graphs() out = report.get('output', {}) it = co2_utl.stack_nested_keys(out, key=('output',), depth=3) for k, v in sorted(it): if k[-1] == 'ts' and 'times' in v: label = '{}/%s'.format(_sheet_name(k)) for i, j in sorted(v.items()): param_id = _re_params_name.match(i)['param'] m = _map.get(param_id, None) if m: d = { 'x': k + ('times',), 'y': k + (i,), 'label': label % i } n = k[2], param_id, 'series' co2_utl.get_nested_dicts(r, *n, default=list).append(d) for k, v in co2_utl.stack_nested_keys(r, depth=2): m = _map[k[1]] m.pop('label', None) v.update(m) return r
def _parse_base_data(res, match, sheet, sheet_name, re_params_name=_re_params_name): r = {} defaults = {'usage': 'input', 'stage': 'calibration'} if 'type' not in match: match['type'] = 'pa' if 'cycle' not in match else 'ts' match = dsp_utl.combine_dicts(defaults, match) if match['type'] == 'pa': xl_ref = '#%s!B2:C_:["pipe", ["dict", "recurse"]]' % sheet_name data = lasso(xl_ref, sheet=sheet) else: # noinspection PyBroadException try: xl_ref = '#%s!A2(R):.3:RD:["df", {"header": 0}]' % sheet_name data = lasso(xl_ref, sheet=sheet) except: return {} data.dropna(how='all', inplace=True) data.dropna(axis=1, how='all', inplace=True) mask = data.count(0) == len(data._get_axis(0)) # noinspection PyUnresolvedReferences drop = [k for k, v in mask.items() if not v] if drop: msg = 'Columns {} in {} sheet contains nan.\n ' \ 'Please correct the inputs!' raise ValueError(msg.format(drop, sheet_name)) for k, v in parse_values(data, match, re_params_name): co2_utl.get_nested_dicts(r, *k[:-1])[k[-1]] = v n = (match['scope'], 'target') if match['type'] == 'ts' and co2_utl.are_in_nested_dicts(r, *n): t = co2_utl.get_nested_dicts(r, *n) for k, v in co2_utl.stack_nested_keys(t, key=n, depth=2): if 'times' not in v: n = list(k + ('times', )) n[1] = match['usage'] if co2_utl.are_in_nested_dicts(r, *n): v['times'] = co2_utl.get_nested_dicts(r, *n) else: for i, j in co2_utl.stack_nested_keys(r, depth=4): if 'times' in j: v['times'] = j['times'] break co2_utl.combine_nested_dicts(r, depth=5, base=res)
def _format_scores(scores): res = {} for k, j in co2_utl.stack_nested_keys(scores, depth=3): if k[-1] in ('limits', 'errors'): model_id = k[0] extra_field = ('score',) if k[-1] == 'errors' else () for i, v in co2_utl.stack_nested_keys(j): i = (model_id, i[-1], k[1],) + i[:-1] + extra_field co2_utl.get_nested_dicts(res, *i, default=co2_utl.ret_v(v)) sco = {} for k, v in co2_utl.stack_nested_keys(res, depth=4): v.update(dsp_utl.map_list(['model_id', 'param_id'], *k[:2])) co2_utl.get_nested_dicts(sco, *k[2:], default=list).append(v) return sco
def format_report_output(data): res = {} for k, v in co2_utl.stack_nested_keys(data.get('output', {}), depth=3): _add_special_data2report(data, res, k[:-1], 'target', *k) s, iv = _add_special_data2report(data, res, k[:-1], 'input', *k) if not s or (s and not _is_equal(iv, v)): co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) output = {} for k, v in co2_utl.stack_nested_keys(res, depth=2): v = _split_by_data_format(v) co2_utl.get_nested_dicts(output, *k, default=co2_utl.ret_v(v)) return output
def compare_outputs_vs_targets(data): """ Compares model outputs vs targets. :param data: Model data. :type data: dict :return: Comparison results. :rtype: dict """ res = {} metrics = { 'mean_absolute_error': mean_absolute_error, 'correlation_coefficient': _correlation_coefficient, 'accuracy_score': accuracy_score, } for k, t in co2_utl.stack_nested_keys(data.get('target', {}), depth=3): if not co2_utl.are_in_nested_dicts(data, 'output', *k): continue o = co2_utl.get_nested_dicts(data, 'output', *k) v = _compare(t, o, metrics=metrics) if v: co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def _summary2df(data): res = [] summary = data.get('summary', {}) if 'results' in summary: r = {} fun = partial(dsp_utl.map_list, [{}, 'cycle', 'stage', 'usage']) for n, m in summary['results'].items(): gen = ((fun(v, *k),) for k, v in co2_utl.stack_nested_keys(m, depth=3)) v = [v[0] for v in _yield_sorted_params(gen)] co2_utl.get_nested_dicts(r, n, default=co2_utl.ret_v(v)) df = _make_summarydf(r, index=['cycle', 'stage', 'usage'], depth=1) c = list(map(_rm_sub_parts, df.columns)) df.columns = pd.MultiIndex.from_tuples(c) setattr(df, 'name', 'results') res.append(df) if 'selection' in summary: df = pd.DataFrame(summary['selection']) df.set_index(['model_id'], inplace=True) setattr(df, 'name', 'selection') res.append(df) if 'comparison' in summary: df = _comparison2df(summary['comparison']) if df is not None: setattr(df, 'name', 'comparison') res.append(df) if res: return {'summary': res} return {}
def validate_inputs(data, soft_validation=False, read_schema=None): res, errors, validate = {}, {}, read_schema.validate for k, v in sorted(co2_utl.stack_nested_keys(data, depth=4)): d = co2_utl.get_nested_dicts(res, *k[:-1]) _add_validated_input(d, validate, k, v, errors) if not soft_validation: for k, v in co2_utl.stack_nested_keys(res, depth=3): for c, msg in hard_validation(v): co2_utl.get_nested_dicts(errors, *k)[c] = SchemaError([], [msg]) if _log_errors_msg(errors): return {} return res
def parse_dsp_model(model): """ Parses the co2mpas model results. :param model: Co2mpas model after dispatching. :type model: co2mpas.dispatcher.Dispatcher :return: Mapped outputs. :rtype: dict[dict] """ res = {} for k, v in model.data_output.items(): co2_utl.get_nested_dicts(res, *k.split('.'), default=co2_utl.ret_v(v)) for k, v in list(co2_utl.stack_nested_keys(res, depth=3)): n, k = k[:-1], k[-1] if n == ('output', 'calibration') and k in ('wltp_l', 'wltp_h'): v = dsp_utl.selector(('co2_emission_value', ), v, allow_miss=True) if v: d = co2_utl.get_nested_dicts(res, 'target', 'prediction') d[k] = dsp_utl.combine_dicts(v, d.get(k, {})) res['pipe'] = model.pipe return res
def _log_errors_msg(errors): if errors: msg = ['\nInput cannot be parsed, due to:'] for k, v in co2_utl.stack_nested_keys(errors, depth=4): msg.append('{} in {}: {}'.format(k[-1], '/'.join(k[:-1]), v)) log.error('\n '.join(msg)) return True return False
def _add2summary(total_summary, summary, base_keys=None): base_keys = base_keys or {} for k, v in co2_utl.stack_nested_keys(summary, depth=3): d = co2_utl.get_nested_dicts(total_summary, *k, default=list) if isinstance(v, list): for j in v: d.append(dsp_utl.combine_dicts(j, base_keys)) else: d.append(dsp_utl.combine_dicts(v, base_keys))
def validate_data(data, soft_validation, read_schema=None): plan = validate_plan(data.get('plan', pd.DataFrame([])), read_schema) inputs = validate_inputs(data.get('base', {}), soft_validation, read_schema) inputs = { '.'.join(k): v for k, v in co2_utl.stack_nested_keys(inputs, depth=3) } return inputs, plan
def test_files(self): mydir = osp.dirname(__file__) if SEATBELT_FILE and osp.isfile(SEATBELT_FILE): res_file = SEATBELT_FILE else: tmpdir = tempfile.gettempdir() res_file = osp.join(tmpdir, 'co2mpas_seatbelt_demos.dill') log.info("\n OVERWRITE_SEATBELT: %s \n" " RUN_INPUT_FOLDER: %s \n" " RUN_ALL_FILES: %s \n" " SEATBELT_FILE: %s", OVERWRITE_SEATBELT, RUN_INPUT_FOLDER, RUN_ALL_FILES, res_file) if not OVERWRITE_SEATBELT and osp.isfile(res_file): old_results = dsp_utl.load_dispatcher(res_file) log.info("Old results loaded!") else: old_results = None path = RUN_INPUT_FOLDER or osp.join(mydir, '..', 'co2mpas', 'demos') file = (path if (RUN_ALL_FILES or RUN_INPUT_FOLDER) else osp.join(path, 'co2mpas_demo-0.xlsx')) model = vehicle_processing_model() results = [] inp_files = file_finder([file]) if not inp_files: raise AssertionError("DataCheck found no input-files in %r!" % file) for fpath in inp_files: fname = osp.splitext(osp.basename(fpath))[0] log.info('Processing: %s', fname) inputs = { 'vehicle_name': fname, 'input_file_name': fpath, 'prediction_wltp': True, } r = model.dispatch(inputs=inputs, outputs=['report', 'summary']) r = dsp_utl.selector(['report', 'summary'], r) r.get('report', {}).pop('pipe', None) results.append(sorted(co2_utl.stack_nested_keys(r))) if not OVERWRITE_SEATBELT and osp.isfile(res_file): log.info('Comparing...') self._check_results(results, old_results) else: os.environ["OVERWRITE_SEATBELT"] = '0' dsp_utl.save_dispatcher(results, res_file) log.info('Overwritten seat belt %r.', res_file)
def _comparison2df(comparison): res = {} it = co2_utl.stack_nested_keys(comparison, depth=3) keys = ['usage', 'cycle', 'param'] gen = [(dsp_utl.map_list(keys, *k), k, v) for k, v in it] for s, k, v in _yield_sorted_params(gen, keys=keys): l = co2_utl.get_nested_dicts(res, *k[:-1], default=list) l.append(dsp_utl.combine_dicts({'param_id': k[-1]}, v)) if res: return _dd2df(res, 'param_id', depth=2)
def _extract_summary_from_summary(report, extracted): n = ('summary', 'results') if co2_utl.are_in_nested_dicts(report, *n): for j, w in co2_utl.get_nested_dicts(report, *n).items(): if j in ('co2_emission', 'fuel_consumption'): for k, v in co2_utl.stack_nested_keys(w, depth=3): if v: co2_utl.get_nested_dicts(extracted, *k).update(v) n = ('summary', 'delta') if co2_utl.are_in_nested_dicts(report, *n): extracted['delta'] = co2_utl.get_nested_dicts(report, *n)
def parse_excel_file(file_path, re_sheet_name=_re_input_sheet_name, re_params_name=_re_params_name): """ Reads cycle's data and simulation plans. :param file_path: Excel file path. :type file_path: str :param re_sheet_name: Regular expression to parse sheet names. :type re_sheet_name: regex.Regex :param re_params_name: Regular expression to parse param names. :type re_params_name: regex.Regex :return: A pandas DataFrame with cycle's time series. :rtype: dict, pandas.DataFrame """ excel_file = pd.ExcelFile(file_path) res, plans = {}, [] defaults = {'scope': 'base'} book = excel_file.book for sheet_name in excel_file.sheet_names: match = re_sheet_name.match(sheet_name) if not match: continue match = {k: v.lower() for k, v in match.groupdict().items() if v} match = dsp_utl.combine_dicts(defaults, match) sheet = _open_sheet_by_name_or_index(book, 'book', sheet_name) if match['scope'] == 'base': _parse_base_data(res, match, sheet, sheet_name, re_params_name) elif match['scope'] == 'plan': _parse_plan_data(plans, match, sheet, sheet_name, re_params_name) for k, v in co2_utl.stack_nested_keys(res.get('base', {}), depth=3): if k[0] != 'target': v['cycle_type'] = v.get('cycle_type', k[-1].split('_')[0]).upper() v['cycle_name'] = v.get('cycle_name', k[-1]).upper() res['plan'] = _finalize_plan(res, plans, file_path) return res
def filter_summary(changes, summary): l, variations = [], {} for k, v in changes.items(): k = tuple(k.split('.')[::-1]) l.append(k[:-1]) k = k[:-1] + ('plan.%s' % k[-1],) co2_utl.get_nested_dicts(variations, *k).update(v) for k, v in co2_utl.stack_nested_keys(summary, depth=3): if k[:-1] in l: co2_utl.get_nested_dicts(variations, *k, default=co2_utl.ret_v(v)) _add_delta2filtered_summary(variations, summary, base=variations) return variations
def extract_summary(report, vehicle_name): extracted = {} _extract_summary_from_summary(report, extracted) _extract_summary_from_output(report, extracted) _extract_summary_from_model_scores(report, extracted) for k, v in co2_utl.stack_nested_keys(extracted, depth=3): v['vehicle_name'] = vehicle_name return extracted
def _cycle2df(data, data_descriptions, write_schema): res = {} out = data.get('output', {}) for k, v in co2_utl.stack_nested_keys(out, key=('output',), depth=3): n, k = _sheet_name(k), k[-1] if 'ts' == k: df = _time_series2df(v, data_descriptions) elif 'pa' == k: df = _parameters2df(v, data_descriptions, write_schema) else: continue if df is not None: res[n] = df return res
def get_values(data, keys, tag=(), update=lambda k, v: v, base=None): k = ('input', 'target', 'output') data = dsp_utl.selector(k, data, allow_miss=True) base = {} if base is None else base for k, v in co2_utl.stack_nested_keys(data, depth=3): k = k[::-1] v = dsp_utl.selector(keys, v, allow_miss=True) v = update(k, v) if v: k = tag + k co2_utl.get_nested_dicts(base, *k, default=co2_utl.ret_v(v)) return base
def define_new_inputs(data, base, dsp_model): remove = [] for k, v in co2_utl.stack_nested_keys(data, depth=2): if v is dsp_utl.EMPTY: remove.append(k) dsp = dsp_model.get_sub_dsp_from_workflow(data, check_inputs=False) n = set(base) - set(dsp.data_nodes) n.update(data) inp = dsp_utl.selector(n, base, allow_miss=True) d = co2_utl.combine_nested_dicts(inp, data, depth=2) for n, k in remove: co2_utl.get_nested_dicts(d, n).pop(k) return d
def _dd2df(dd, index=None, depth=0): """ :return: :rtype: pandas.DataFrame """ frames = [] for k, v in co2_utl.stack_nested_keys(dd, depth=depth): df = pd.DataFrame(v) df.drop_duplicates(subset=index, inplace=True) if index is not None: df.set_index(index, inplace=True) df.columns = pd.MultiIndex.from_tuples([k + (i,) for i in df.columns]) frames.append(df) return pd.concat(frames, copy=False, axis=1, verify_integrity=True)
def combine_scores(scores): scores = {k[:-9]: v for k, v in scores.items() if v} if not scores: return {} s = {} for (k, c), v in co2_utl.stack_nested_keys(scores, depth=2): r = {'models': v['models']} if 'models' in v else {} r.update(v.get('score', {})) co2_utl.get_nested_dicts(s, k, c, default=co2_utl.ret_v(r)) if not co2_utl.are_in_nested_dicts(s, k, 'best'): keys = {'models': 'selected_models', 'success': 'status'} best = dsp_utl.map_dict(keys, dsp_utl.selector(keys, r)) best['from'] = c co2_utl.get_nested_dicts(s, k, 'best', default=co2_utl.ret_v(best)) return {'selections': s, 'scores': scores}
def _finalize_plan(res, plans, file_path): if not plans: return pd.DataFrame() for k, v in co2_utl.stack_nested_keys(res.get('plan', {}), depth=4): n = '.'.join(k) m = '.'.join(k[:-1]) for p in plans: if any(c.startswith(m) for c in p.columns): if n in p: p[n].fillna(value=v, inplace=True) else: p[n] = v plan = pd.concat(plans, axis=1, copy=False, verify_integrity=True) func = partial(osp.join, osp.dirname(file_path)) if 'base' not in plan: plan['base'] = file_path else: plan['base'].fillna(file_path) plan['base'] = plan['base'].apply(lambda x: x or file_path).apply(func) plan['base'] = plan['base'].apply(osp.normpath) if 'defaults' not in plan: plan['defaults'] = '' else: plan['defaults'].fillna('') def _func(x): if x: return str(tuple( osp.normpath(func(v)) for v in tuple(eval(x)))) else: return x plan['defaults'] = plan['defaults'].apply(_func) plan['id'] = plan.index plan.set_index(['id', 'base', 'defaults'], inplace=True) return plan
def _extract_summary_from_output(report, extracted): for k, v in co2_utl.stack_nested_keys(report.get('output', {}), depth=2): k = k[::-1] for u, i, j in _param_names_values(v.get('pa', {})): o = {} if i == 'co2_params_calibrated': o = _format_dict(j.valuesdict().items(), 'co2_params %s') elif i == 'calibration_status': o = _format_dict(enumerate(j), 'status co2_params step %d', lambda x: x[0]) elif i == 'willans_factors': o = j elif i == 'phases_willans_factors': for n, m in enumerate(j): o.update(_format_dict(m.items(), '%s phase {}'.format(n))) elif i == 'has_sufficient_power': o = {i: j} if o: co2_utl.get_nested_dicts(extracted, *(k + (u,))).update(o)
def re_sample_targets(data): res = {} for k, v in co2_utl.stack_nested_keys(data.get('target', {}), depth=2): if co2_utl.are_in_nested_dicts(data, 'output', *k): o = co2_utl.get_nested_dicts(data, 'output', *k) o = _split_by_data_format(o) t = dsp_utl.selector(o, _split_by_data_format(v), allow_miss=True) if 'times' not in t.get('ts', {}) or 'times' not in o['ts']: t.pop('ts', None) else: time_series = t['ts'] x, xp = o['ts']['times'], time_series.pop('times') if not _is_equal(x, xp): for i, fp in time_series.items(): time_series[i] = np.interp(x, xp, fp) v = dsp_utl.combine_dicts(*t.values()) co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res