def _safe_node(node): try: parse_jsonpath(node) except Exception: # quote nodes with special characters return '"{}"'.format(node) else: return node
def test_split_leftmost(self): assert split_leftmost( parse_jsonpath('foo')) == (jsonpath.Fields('foo'), jsonpath.This()) assert split_leftmost( parse_jsonpath('foo.baz')) == (jsonpath.Fields('foo'), jsonpath.Fields('baz')) assert split_leftmost( parse_jsonpath('foo.baz.bar')) == (jsonpath.Fields('foo'), jsonpath.Fields('baz').child( jsonpath.Fields('bar'))) assert split_leftmost( parse_jsonpath('[*].baz')) == (jsonpath.Slice(), jsonpath.Fields('baz')) assert split_leftmost( parse_jsonpath('foo[*].baz')) == (jsonpath.Fields('foo'), jsonpath.Slice().child( jsonpath.Fields('baz')))
def compile_source(worksheet): """ Compiles just the part of the Excel Spreadsheet that indicates the API endpoint to hit along with optional filters and an optional JSONPath within that endpoint, For example, this spreadsheet Data Source Filter Name Filter Value Include Referenced Items ----------------------------- ------------ ------------------ -------------------------- form[*].form.child_questions app_id <app id> cases xmlns.exact <some form xmlns> Should fetch from api/form?app_id=<app id>&xmlns.exact=<some form xmlns>&cases__full=true and then iterate (FlatMap) over all child questions. :return: tuple of the 'data source' expression and the 'root doc expression'. 'data source': The MiniLinq that calls 'api_data' function to get data from CommCare 'root doc expression': The MiniLinq that is applied to each doc, can be None. """ data_source_column = get_column_by_name(worksheet, 'data source') if not data_source_column: raise Exception('Sheet has no "Data Source" column.') data_source_str = data_source_column[0].value filters = compile_filters(worksheet) include_referenced_items = [cell.value for cell in (get_column_by_name(worksheet, 'include referenced items') or [])] data_source, data_source_jsonpath = split_leftmost(parse_jsonpath(data_source_str)) maybe_redundant_slice, remaining_jsonpath = split_leftmost(data_source_jsonpath) # The leftmost _must_ be of type Fields with one field and will pull out the first field if not isinstance(data_source, jsonpath.Fields) or len(data_source.fields) > 1: raise Exception('Bad value for data source: %s' % str(data_source)) data_source = data_source.fields[0] if isinstance(maybe_redundant_slice, jsonpath.Slice): data_source_jsonpath = remaining_jsonpath api_query_args = [Reference("api_data"), Literal(data_source)] if not filters: if include_referenced_items: api_query_args.append(Literal(None)) # Pad the argument list if we have further args; keeps tests and user code more readable at the expense of this conditional else: api_query_args.append(Literal(dict(filters))) if include_referenced_items: api_query_args.append(Literal(include_referenced_items)) api_query = Apply(*api_query_args) if data_source_jsonpath is None or isinstance(data_source_jsonpath, jsonpath.This) or isinstance(data_source_jsonpath, jsonpath.Root): return api_query, None else: return api_query, Reference(str(data_source_jsonpath))
def compile_source(worksheet): """ Compiles just the part of the Excel Spreadsheet that indicates the API endpoint to hit along with optional filters and an optional JSONPath within that endpoint, For example, this spreadsheet Data Source Filter Name Filter Value Include Referenced Items ----------------------------- ------------ ------------------ -------------------------- form[*].form.child_questions app_id <app id> cases xmlns.exact <some form xmlns> Should fetch from api/form?app_id=<app id>&xmlns.exact=<some form xmlns>&cases__full=true and then iterate (FlatMap) over all child questions. """ data_source_str = get_column_by_name(worksheet, 'Data Source')[0].value filters = compile_filters(worksheet) include_referenced_items = [cell.value for cell in (get_column_by_name(worksheet, 'Include Referenced Items') or [])] data_source, data_source_jsonpath = split_leftmost(parse_jsonpath(data_source_str)) maybe_redundant_slice, remaining_jsonpath = split_leftmost(data_source_jsonpath) # The leftmost _must_ be of type Fields with one field and will pull out the first field if not isinstance(data_source, jsonpath.Fields) or len(data_source.fields) > 1: raise Exception('Bad value for data source: %s' % str(data_source)) data_source = data_source.fields[0] if isinstance(maybe_redundant_slice, jsonpath.Slice): data_source_jsonpath = remaining_jsonpath api_query_args = [Reference("api_data"), Literal(data_source)] if not filters: if include_referenced_items: api_query_args.append(Literal(None)) # Pad the argument list if we have further args; keeps tests and user code more readable at the expense of this conditional else: if data_source == 'form': api_query_args.append(Literal( {'filter': {'and': [{'term': {filter_name: filter_value}} for filter_name, filter_value in filters]}})) elif data_source == 'case': api_query_args.append(Literal(dict(filters))) if include_referenced_items: api_query_args.append(Literal(include_referenced_items)) api_query = Apply(*api_query_args) if data_source_jsonpath is None or isinstance(data_source_jsonpath, jsonpath.This) or isinstance(data_source_jsonpath, jsonpath.Root): return api_query else: return FlatMap(source=api_query, body=Reference(str(data_source_jsonpath)))
def _get_safe_source_field(source_field): def _safe_node(node): try: parse_jsonpath(node) except Exception: # quote nodes with special characters return '"{}"'.format(node) else: return node try: parse_jsonpath(source_field) except Exception: source_field = '.'.join([ _safe_node(node) if node else node for node in source_field.split('.') ]) if source_field.endswith('.'): raise Exception("Blank node path: {}".format(source_field)) return Reference(source_field)
def compile_source(worksheet): """ Compiles just the part of the Excel Spreadsheet that indicates the API endpoint to hit along with optional filters and an optional JSONPath within that endpoint, For example, this spreadsheet Data Source Filter Name Filter Value ----------------------------- ------------ ------------------ form[*].form.child_questions app_id <app id> xmlns.exact <some form xmlns> Should fetch from api/form?app_id=<app id>&xmlns.exact=<some form xmlns> and then iterate (FlatMap) over all child questions. """ data_source_str = get_column_by_name(worksheet, 'Data Source')[0].value filters = compile_filters(worksheet) data_source, data_source_jsonpath = split_leftmost(parse_jsonpath(data_source_str)) maybe_redundant_slice, remaining_jsonpath = split_leftmost(data_source_jsonpath) # The leftmost _must_ be of type Fields with one field and will pull out the first field if not isinstance(data_source, jsonpath.Fields) or len(data_source.fields) > 1: raise Exception('Bad value for data source: %s' % str(data_source)) data_source = data_source.fields[0] if isinstance(maybe_redundant_slice, jsonpath.Slice): data_source_jsonpath = remaining_jsonpath if filters: api_query = Apply(Reference("api_data"), Literal(data_source), Literal( {'filter': {'and': [{'term': {filter_name: filter_value}} for filter_name, filter_value in filters]}} )) else: api_query = Apply(Reference("api_data"), Literal(data_source)) if data_source_jsonpath is None or isinstance(data_source_jsonpath, jsonpath.This) or isinstance(data_source_jsonpath, jsonpath.Root): return api_query else: return FlatMap(source=api_query, body=Reference(str(data_source_jsonpath)))
def parse(self, jsonpath_string): if jsonpath_string not in JSONPATH_CACHE: JSONPATH_CACHE[jsonpath_string] = parse_jsonpath(jsonpath_string) return JSONPATH_CACHE[jsonpath_string]
def compile_source(worksheet): """ Compiles just the part of the Excel Spreadsheet that indicates the API endpoint to hit along with optional filters and an optional JSONPath within that endpoint, For example, this spreadsheet Data Source Filter Name Filter Value Include Referenced Items ----------------------------- ------------ ------------------ -------------------------- form[*].form.child_questions app_id <app id> cases xmlns.exact <some form xmlns> Should fetch from api/form?app_id=<app id>&xmlns.exact=<some form xmlns>&cases__full=true and then iterate (FlatMap) over all child questions. :return: tuple of the 'data source' expression and the 'root doc expression'. 'data source': The MiniLinq that calls 'api_data' function to get data from CommCare 'root doc expression': The MiniLinq that is applied to each doc, can be None. """ data_source_column = get_column_by_name(worksheet, 'data source') if not data_source_column: raise Exception('Sheet has no "Data Source" column.') data_source_str = data_source_column[0].value filters = compile_filters(worksheet) include_referenced_items = [ cell.value for cell in ( get_column_by_name(worksheet, 'include referenced items') or []) ] data_source, data_source_jsonpath = split_leftmost( parse_jsonpath(data_source_str)) maybe_redundant_slice, remaining_jsonpath = split_leftmost( data_source_jsonpath) # The leftmost _must_ be of type Fields with one field and will pull out the first field if not isinstance(data_source, jsonpath.Fields) or len(data_source.fields) > 1: raise Exception('Bad value for data source: %s' % str(data_source)) data_source = data_source.fields[0] if isinstance(maybe_redundant_slice, jsonpath.Slice): data_source_jsonpath = remaining_jsonpath api_query_args = [ Reference("api_data"), Literal(data_source), Reference('checkpoint_manager') ] if not filters: if include_referenced_items: api_query_args.append( Literal(None) ) # Pad the argument list if we have further args; keeps tests and user code more readable at the expense of this conditional else: api_query_args.append(Literal(dict(filters))) if include_referenced_items: api_query_args.append(Literal(include_referenced_items)) api_query = Apply(*api_query_args) if data_source_jsonpath is None or isinstance( data_source_jsonpath, jsonpath.This) or isinstance( data_source_jsonpath, jsonpath.Root): return data_source, api_query, None else: return data_source, api_query, Reference(str(data_source_jsonpath))
def test_split_leftmost(self): assert split_leftmost(parse_jsonpath('foo')) == (jsonpath.Fields('foo'), jsonpath.This()) assert split_leftmost(parse_jsonpath('foo.baz')) == (jsonpath.Fields('foo'), jsonpath.Fields('baz')) assert split_leftmost(parse_jsonpath('foo.baz.bar')) == (jsonpath.Fields('foo'), jsonpath.Fields('baz').child(jsonpath.Fields('bar'))) assert split_leftmost(parse_jsonpath('[*].baz')) == (jsonpath.Slice(), jsonpath.Fields('baz')) assert split_leftmost(parse_jsonpath('foo[*].baz')) == (jsonpath.Fields('foo'), jsonpath.Slice().child(jsonpath.Fields('baz')))