def get_chart_reference(report): r, _map = {}, _map_cycle_report_graphs() out = report.get('output', {}) it = dsp_utl.stack_nested_keys(out, key=('output',), depth=3) for k, v in sorted(it): if k[-1] == 'ts' and 'times' in v: label = '{}/%s'.format(co2_exl._sheet_name(k)) for i, j in sorted(v.items()): param_id = co2_exl._re_params_name.match(i)['param'] m = _map.get(param_id, None) if m: d = { 'x': k + ('times',), 'y': k + (i,), 'label': label % i } n = k[2], param_id, 'series' dsp_utl.get_nested_dicts(r, *n, default=list).append(d) for k, v in dsp_utl.stack_nested_keys(r, depth=2): m = _map[k[1]] m.pop('label', None) v.update(m) return r
def select_declaration_data(data, diff=None): res = {} for k, v in dsp_utl.stack_nested_keys(constants.con_vals.DECLARATION_DATA): if v and dsp_utl.are_in_nested_dicts(data, *k): v = dsp_utl.get_nested_dicts(data, *k) dsp_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) if diff is not None: diff.clear() diff.update(v[0] for v in dsp_utl.stack_nested_keys(data, depth=4)) it = (v[0] for v in dsp_utl.stack_nested_keys(res, depth=4)) diff.difference_update(it) return res
def _format_scores(scores): res = {} for k, j in dsp_utl.stack_nested_keys(scores, depth=3): if k[-1] in ('limits', 'errors'): model_id = k[0] extra_field = ('score',) if k[-1] == 'errors' else () for i, v in dsp_utl.stack_nested_keys(j): i = (model_id, i[-1], k[1],) + i[:-1] + extra_field dsp_utl.get_nested_dicts(res, *i, default=co2_utl.ret_v(v)) sco = {} for k, v in sorted(dsp_utl.stack_nested_keys(res, depth=4)): v.update(dsp_utl.map_list(['model_id', 'param_id'], *k[:2])) dsp_utl.get_nested_dicts(sco, *k[2:], default=list).append(v) return sco
def _summary2df(data): res = [] summary = data.get('summary', {}) if 'results' in summary: r = {} index = ['cycle', 'stage', 'usage'] for k, v in dsp_utl.stack_nested_keys(summary['results'], depth=4): l = dsp_utl.get_nested_dicts(r, k[0], default=list) l.append(dsp_utl.combine_dicts(dsp_utl.map_list(index, *k[1:]), v)) if r: df = _dd2df(r, index=index, depth=2, col_key=functools.partial(_sort_key, p_keys=('param', ) * 2), row_key=functools.partial(_sort_key, p_keys=index)) df.columns = pd.MultiIndex.from_tuples(_add_units(df.columns)) setattr(df, 'name', 'results') res.append(df) if 'selection' in summary: df = _dd2df(summary['selection'], ['model_id'], depth=2, col_key=functools.partial(_sort_key, p_keys=('stage', 'cycle')), row_key=functools.partial(_sort_key, p_keys=())) setattr(df, 'name', 'selection') res.append(df) if 'comparison' in summary: r = {} for k, v in dsp_utl.stack_nested_keys(summary['comparison'], depth=3): v = dsp_utl.combine_dicts(v, base={'param_id': k[-1]}) dsp_utl.get_nested_dicts(r, *k[:-1], default=list).append(v) if r: df = _dd2df(r, ['param_id'], depth=2, col_key=functools.partial(_sort_key, p_keys=('stage', 'cycle')), row_key=functools.partial(_sort_key, p_keys=())) setattr(df, 'name', 'comparison') res.append(df) if res: return {'summary': res} return {}
def compare_outputs_vs_targets(data): """ Compares model outputs vs targets. :param data: Model data. :type data: dict :return: Comparison results. :rtype: dict """ res = {} metrics = _get_metrics() for k, t in dsp_utl.stack_nested_keys(data.get('target', {}), depth=3): if not dsp_utl.are_in_nested_dicts(data, 'output', *k): continue o = dsp_utl.get_nested_dicts(data, 'output', *k) v = _compare(t, o, metrics=metrics) if v: dsp_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def parse_dsp_solution(solution): """ Parses the co2mpas model results. :param solution: Co2mpas model after dispatching. :type solution: co2mpas.dispatcher.Solution :return: Mapped outputs. :rtype: dict[dict] """ res = {} for k, v in solution.items(): dsp_utl.get_nested_dicts(res, *k.split('.'), default=co2_utl.ret_v(v)) for k, v in list(dsp_utl.stack_nested_keys(res, depth=3)): n, k = k[:-1], k[-1] if n == ('output', 'calibration') and k in ('wltp_l', 'wltp_h'): v = dsp_utl.selector(('co2_emission_value', ), v, allow_miss=True) if v: d = dsp_utl.get_nested_dicts(res, 'target', 'prediction') d[k] = dsp_utl.combine_dicts(v, d.get(k, {})) res['pipe'] = solution.pipe return res
def _format_selection(score_by_model, depth=-1, index='model_id'): res = {} for k, v in sorted(dsp_utl.stack_nested_keys(score_by_model, depth=depth)): v = v.copy() v[index] = k[0] dsp_utl.get_nested_dicts(res, *k[1:], default=list).append(v) return res
def format_report_output(data): res = {} func = functools.partial(dsp_utl.get_nested_dicts, default=collections.OrderedDict) for k, v in dsp_utl.stack_nested_keys(data.get('output', {}), depth=3): _add_special_data2report(data, res, k[:-1], 'target', *k) s, iv = _add_special_data2report(data, res, k[:-1], 'input', *k) if not s or (s and not _is_equal(iv, v)): func(res, *k[:-1])[k[-1]] = v output = {} for k, v in dsp_utl.stack_nested_keys(res, depth=2): v = _split_by_data_format(v) dsp_utl.get_nested_dicts(output, *k, default=co2_utl.ret_v(v)) return output
def _validate_base_with_schema(data): read_schema = define_data_schema(read=True) inputs, errors, validate = {}, {}, read_schema.validate for k, v in sorted(dsp_utl.stack_nested_keys(data, depth=4)): d = dsp_utl.get_nested_dicts(inputs, *k[:-1]) _add_validated_input(d, validate, k, v, errors) return inputs, errors
def _log_errors_msg(errors): if errors: msg = ['\nInput cannot be parsed, due to:'] for k, v in dsp_utl.stack_nested_keys(errors, depth=4): msg.append('{} in {}: {}'.format(k[-1], '/'.join(k[:-1]), v)) log.error('\n '.join(msg)) return True return False
def _extract_summary_from_summary(report, extracted): n = ('summary', 'results') if dsp_utl.are_in_nested_dicts(report, *n): for j, w in dsp_utl.get_nested_dicts(report, *n).items(): if j in ('declared_co2_emission', 'co2_emission', 'fuel_consumption'): for k, v in dsp_utl.stack_nested_keys(w, depth=3): if v: dsp_utl.get_nested_dicts(extracted, *k).update(v)
def _add_times_base(data, scope='base', usage='input', **match): if scope != 'base': return sh_type = _get_sheet_type(scope=scope, usage=usage, **match) n = (scope, 'target') if sh_type == 'ts' and dsp_utl.are_in_nested_dicts(data, *n): t = dsp_utl.get_nested_dicts(data, *n) for k, v in dsp_utl.stack_nested_keys(t, key=n, depth=2): if 'times' not in v: n = list(k + ('times', )) n[1] = usage if dsp_utl.are_in_nested_dicts(data, *n): v['times'] = dsp_utl.get_nested_dicts(data, *n) else: for i, j in dsp_utl.stack_nested_keys(data, depth=4): if 'times' in j: v['times'] = j['times'] break
def _add2summary(total_summary, summary, base_keys=None): base_keys = base_keys or {} for k, v in dsp_utl.stack_nested_keys(summary, depth=3): d = dsp_utl.get_nested_dicts(total_summary, *k, default=list) if isinstance(v, list): for j in v: d.append(dsp_utl.combine_dicts(j, base_keys)) else: d.append(dsp_utl.combine_dicts(v, base_keys))
def test_files(self): mydir = osp.dirname(__file__) if SEATBELT_FILE and osp.isfile(SEATBELT_FILE): res_file = SEATBELT_FILE else: tmpdir = tempfile.gettempdir() res_file = osp.join(tmpdir, 'co2mpas_seatbelt_demos.dill') log.info( "\n OVERWRITE_SEATBELT: %s \n" " RUN_INPUT_FOLDER: %s \n" " RUN_ALL_FILES: %s \n" " SEATBELT_FILE: %s", OVERWRITE_SEATBELT, RUN_INPUT_FOLDER, RUN_ALL_FILES, res_file) if not OVERWRITE_SEATBELT and osp.isfile(res_file): old_results = dsp_utl.load_dispatcher(res_file) log.info("Old results loaded!") else: old_results = None path = RUN_INPUT_FOLDER or osp.join(mydir, '..', 'co2mpas', 'demos') file = (path if (RUN_ALL_FILES or RUN_INPUT_FOLDER) else osp.join( path, 'co2mpas_demo-0.xlsx')) model = vehicle_processing_model() results = [] inp_files = file_finder([file]) if not inp_files: raise AssertionError("DataCheck found no input-files in %r!" % file) for fpath in inp_files: fname = osp.splitext(osp.basename(fpath))[0] log.info('Processing: %s', fname) inputs = { 'input_file_name': fpath, 'variation': { 'flag.only_summary': True } } r = model.dispatch(inputs=inputs) r = dsp_utl.selector(['report', 'summary'], r['solution']) r.get('report', {}).pop('pipe', None) results.append(sorted(dsp_utl.stack_nested_keys(r))) if not OVERWRITE_SEATBELT and osp.isfile(res_file): log.info('Comparing...') self._check_results(results, old_results) else: os.environ["OVERWRITE_SEATBELT"] = '0' dsp_utl.save_dispatcher(results, res_file) log.info('Overwritten seat belt %r.', res_file)
def validate_base(data, engineering_mode, soft_validation, use_selector): i, e = _validate_base_with_schema(data) i, e = _eng_mode_parser(engineering_mode, soft_validation, use_selector, i, e) if _log_errors_msg(e): return dsp_utl.NONE return {'.'.join(k): v for k, v in dsp_utl.stack_nested_keys(i, depth=3)}
def filter_summary(changes, new_outputs, summary): l, variations = {tuple(k.split('.')[:0:-1]) for k in new_outputs}, {} for k, v in changes.items(): n = k[-2:1:-1] l.add(n) k = n + ('plan.%s' % '.'.join(i for i in k[:-1] if k not in n), k[-1]) dsp_utl.get_nested_dicts(variations, *k, default=co2_utl.ret_v(v)) for k, v in dsp_utl.stack_nested_keys(summary, depth=3): if k[:-1] in l: dsp_utl.get_nested_dicts(variations, *k, default=co2_utl.ret_v(v)) return variations
def split_prediction_models( scores, calibrated_models, input_models, cycle_ids=()): sbm, model_sel, par = {}, {}, {} for (k, c), v in dsp_utl.stack_nested_keys(scores, depth=2): r = dsp_utl.selector(['models'], v, allow_miss=True) for m in r.get('models', ()): dsp_utl.get_nested_dicts(par, m, 'calibration')[c] = c r.update(v.get('score', {})) dsp_utl.get_nested_dicts(sbm, k, c, default=co2_utl.ret_v(r)) r = dsp_utl.selector(['success'], r, allow_miss=True) r = dsp_utl.map_dict({'success': 'status'}, r, {'from': c}) dsp_utl.get_nested_dicts(model_sel, k, 'calibration')[c] = r p = {i: dict.fromkeys(input_models, 'input') for i in cycle_ids} models = {i: input_models.copy() for i in cycle_ids} for k, n in sorted(calibrated_models.items()): d = n.get(dsp_utl.NONE, (None, True, {})) for i in cycle_ids: c, s, m = n.get(i, d) if m: s = {'from': c, 'status': s} dsp_utl.get_nested_dicts(model_sel, k, 'prediction')[i] = s models[i].update(m) p[i].update(dict.fromkeys(m, c)) for k, v in dsp_utl.stack_nested_keys(p, ('prediction',), depth=2): dsp_utl.get_nested_dicts(par, k[-1], *k[:-1], default=co2_utl.ret_v(v)) s = { 'param_selections': par, 'model_selections': model_sel, 'score_by_model': sbm, 'scores': scores } return (s,) + tuple(models.get(k, {}) for k in cycle_ids)
def overwrite_declaration_config_data(data): config = constants.con_vals.DECLARATION_SELECTOR_CONFIG res = dsp_utl.combine_nested_dicts(data, depth=3) key = ('config', 'selector', 'all') d = copy.deepcopy(dsp_utl.get_nested_dicts(res, *key)) for k, v in dsp_utl.stack_nested_keys(config): dsp_utl.get_nested_dicts(d, *k, default=co2_utl.ret_v(v)) dsp_utl.get_nested_dicts(res, *key[:-1])[key[-1]] = d return res
def extract_summary(report, vehicle_name): extracted = {} _extract_summary_from_summary(report, extracted) _extract_summary_from_output(report, extracted) _extract_summary_from_model_scores(report, extracted) for k, v in dsp_utl.stack_nested_keys(extracted, depth=3): v['vehicle_name'] = vehicle_name return extracted
def parse_excel_file(file_path): """ Reads cycle's data and simulation plans. :param file_path: Excel file path. :type file_path: str :return: A pandas DataFrame with cycle's time series. :rtype: dict, pandas.DataFrame """ try: excel_file = pd.ExcelFile(file_path) except FileNotFoundError: log.error("No such file or directory: '%s'", file_path) return dsp_utl.NONE res, plans = {}, [] book = excel_file.book for sheet_name in excel_file.sheet_names: match = _re_input_sheet_name.match(sheet_name) if not match: log.debug("Sheet name '%s' cannot be parsed!", sheet_name) continue match = {k: v.lower() for k, v in match.groupdict().items() if v} sheet = pnd_xlrd._open_sheet_by_name_or_index(book, 'book', sheet_name) is_plan = match.get('scope', None) == 'plan' if is_plan: r = {'plan': pd.DataFrame()} else: r = {} r = _parse_sheet(match, sheet, sheet_name, res=r) if is_plan: plans.append(r['plan']) else: _add_times_base(r, **match) dsp_utl.combine_nested_dicts(r, depth=5, base=res) for k, v in dsp_utl.stack_nested_keys(res.get('base', {}), depth=3): if k[0] != 'target': v['cycle_type'] = v.get('cycle_type', k[-1].split('_')[0]).upper() v['cycle_name'] = v.get('cycle_name', k[-1]).upper() res['plan'] = _finalize_plan(res, plans, file_path) return res
def _extract_summary_from_model_scores(report, extracted): n = ('data', 'calibration', 'model_scores', 'model_selections') if not dsp_utl.are_in_nested_dicts(report, *n): return False sel = dsp_utl.get_nested_dicts(report, *n) for k, v in dsp_utl.stack_nested_keys(extracted, depth=3): n = k[1::-1] if k[-1] == 'output' and dsp_utl.are_in_nested_dicts(sel, *n): gen = dsp_utl.get_nested_dicts(sel, *n) gen = ((d['model_id'], d['status']) for d in gen if 'status' in d) o = _format_dict(gen, 'status %s') v.update(o) return True
def get_values(data, keys, tag=(), update=lambda k, v: v, base=None): k = ('input', 'target', 'output') data = dsp_utl.selector(k, data, allow_miss=True) base = {} if base is None else base for k, v in dsp_utl.stack_nested_keys(data, depth=3): k = k[::-1] v = dsp_utl.selector(keys, v, allow_miss=True) v = update(k, v) if v: k = tag + k dsp_utl.get_nested_dicts(base, *k, default=co2_utl.ret_v(v)) return base
def define_new_inputs(data, base): remove, new_base, new_flag, new_data = [], {}, set(), set() for k, v in dsp_utl.stack_nested_keys(base.get('data', {}), ('base', ), 4): dsp_utl.get_nested_dicts(new_base, *k, default=co2_utl.ret_v(v)) for k, v in dsp_utl.stack_nested_keys(base.get('flag', {}), ('flag', ), 1): dsp_utl.get_nested_dicts(new_base, *k, default=co2_utl.ret_v(v)) for k, v in data.items(): if v is dsp_utl.EMPTY: remove.append(k) dsp_utl.get_nested_dicts(new_base, *k[:-1])[k[-1]] = v if k[0] == 'base': new_data.add('.'.join(k[1:4])) elif k[0] == 'flag': new_flag.add(k[1:2]) if 'dsp_solution' in _get_inputs(base, new_flag)[0]: sol = base['dsp_solution'] n, out_id = _get_inputs(sol, new_data) for k in n.intersection(sol): dsp_utl.get_nested_dicts(new_base, 'base', *k.split('.'), default=co2_utl.ret_v(sol[k])) else: d = base.get_node('CO2MPAS model', node_attr='function')[0].dsp out_id = set(d.data_nodes) for n, k in remove: dsp_utl.get_nested_dicts(new_base, n).pop(k) return new_base, out_id
def _cycle2df(data): res = {} out = data.get('output', {}) write_schema = schema.define_data_schema(read=False) data_descriptions = get_doc_description() for k, v in dsp_utl.stack_nested_keys(out, key=('output', ), depth=3): n, k = excel._sheet_name(k), k[-1] if 'ts' == k: df = _time_series2df(v, data_descriptions) elif 'pa' == k: df = _parameters2df(v, data_descriptions, write_schema) else: continue if df is not None: res[n] = df return res
def _finalize_plan(res, plans, file_path): if not plans: plans = (pd.DataFrame(), ) for k, v in dsp_utl.stack_nested_keys(res.get('plan', {}), depth=4): n = '.'.join(k) m = '.'.join(k[:-1]) for p in plans: if any(c.startswith(m) for c in p.columns): if n in p: p[n].fillna(value=v, inplace=True) else: p[n] = v plan = pd.concat(plans, axis=1, copy=False, verify_integrity=True) # noinspection PyTypeChecker return _add_index_plan(plan, file_path)
def _extract_summary_from_output(report, extracted): for k, v in dsp_utl.stack_nested_keys(report.get('output', {}), depth=2): k = k[::-1] for u, i, j in _param_names_values(v.get('pa', {})): o = {} if i == 'co2_params_calibrated': o = _format_dict(j.valuesdict().items(), 'co2_params %s') elif i == 'calibration_status': o = _format_dict(enumerate(j), 'status co2_params step %d', lambda x: x[0]) elif i == 'willans_factors': o = j elif i == 'phases_willans_factors': for n, m in enumerate(j): o.update(_format_dict(m.items(), '%s phase {}'.format(n))) elif i == 'has_sufficient_power': o = {i: j} if o: dsp_utl.get_nested_dicts(extracted, *(k + (u,))).update(o)
def re_sample_targets(data): res = {} for k, v in dsp_utl.stack_nested_keys(data.get('target', {}), depth=2): if dsp_utl.are_in_nested_dicts(data, 'output', *k): o = dsp_utl.get_nested_dicts(data, 'output', *k) o = _split_by_data_format(o) t = dsp_utl.selector(o, _split_by_data_format(v), allow_miss=True) if 'times' not in t.get('ts', {}) or 'times' not in o['ts']: t.pop('ts', None) else: time_series = t['ts'] x, xp = o['ts']['times'], time_series.pop('times') if not _is_equal(x, xp): for i, fp in time_series.items(): time_series[i] = np.interp(x, xp, fp) v = dsp_utl.combine_dicts(*t.values()) dsp_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def _eng_mode_parser(engineering_mode, soft_validation, use_selector, inputs, errors): if not engineering_mode: inputs, errors, diff = _extract_declaration_data(inputs, errors) if diff: diff = ['.'.join(k) for k in sorted(diff)] log.info( 'Since CO2MPAS is launched in declaration mode the ' 'following data are not used:\n %s\n' 'If you want to include these data add to the batch cmd ' '-D flag.engineering_mode=True', ',\n'.join(diff)) if not use_selector: inputs = validations.overwrite_declaration_config_data(inputs) if not soft_validation: for k, v in dsp_utl.stack_nested_keys(inputs, depth=3): for c, msg in validations.hard_validation(v, *k): dsp_utl.get_nested_dicts(errors, *k)[c] = SchemaError([], [msg]) return inputs, errors
def _dd2df(dd, index=None, depth=0, col_key=None, row_key=None): """ :return: :rtype: pandas.DataFrame """ frames = [] for k, v in dsp_utl.stack_nested_keys(dd, depth=depth): df = pd.DataFrame(v) df.drop_duplicates(subset=index, inplace=True) if index is not None: df.set_index(index, inplace=True) df.columns = pd.MultiIndex.from_tuples([k + (i, ) for i in df.columns]) frames.append(df) df = pd.concat(frames, copy=False, axis=1, verify_integrity=True) if col_key is not None: ax = sorted(df.columns, key=col_key) if isinstance(df.columns, pd.MultiIndex): ax = pd.MultiIndex.from_tuples(ax) # noinspection PyUnresolvedReferences df = df.reindex_axis(ax, axis='columns', copy=False) if row_key is not None: ax = sorted(df.index, key=row_key) if isinstance(df.index, pd.MultiIndex): ax = pd.MultiIndex.from_tuples(ax) df = df.reindex_axis(ax, axis='index', copy=False) if index is not None: df.index.set_names(index, inplace=True) return df
def _chart2excel(writer, sheet, charts): try: add_chart = writer.book.add_chart m, h, w = 3, 300, 512 for i, (k, v) in enumerate(sorted(charts.items())): chart = add_chart({'type': 'scatter', 'subtype': 'straight'}) for s in v['series']: chart.add_series({ 'name': s['label'], 'categories': _data_ref(s['x']), 'values': _data_ref(s['y']), }) chart.set_size({'width': w, 'height': h}) for s, o in v['set'].items(): eval('chart.set_%s(o)' % s) n = int(i / m) j = i - n * m sheet.insert_chart('A1', chart, { 'x_offset': w * n, 'y_offset': h * j }) except AttributeError: from openpyxl.chart import ScatterChart, Series from xlrd import colname as xl_colname sn = writer.book.get_sheet_names() named_ranges = { '%s!%s' % (sn[d.localSheetId], d.name): d.value for d in writer.book.defined_names.definedName } m, h, w = 3, 7.94, 13.55 for i, (k, v) in enumerate(sorted(charts.items())): chart = ScatterChart() chart.height = h chart.width = w _map = { ('title', 'name'): ('title', ), ('y_axis', 'name'): ('y_axis', 'title'), ('x_axis', 'name'): ('x_axis', 'title'), } _filter = { ('legend', 'position'): lambda x: x[0], } it = { s: _filter[s](o) if s in _filter else o for s, o in dsp_utl.stack_nested_keys(v['set']) } for s, o in dsp_utl.map_dict(_map, it).items(): c = chart for j in s[:-1]: c = getattr(c, j) setattr(c, s[-1], o) for s in v['series']: xvalues = named_ranges[_data_ref(s['x'])] values = named_ranges[_data_ref(s['y'])] series = Series(values, xvalues, title=s['label']) chart.series.append(series) n = int(i / m) j = i - n * m sheet.add_chart(chart, '%s%d' % (xl_colname(8 * n), 1 + 15 * j))