def get_children(column: ResultSetColumnType) -> List[ResultSetColumnType]: """ Get the children of a complex Presto type (row or array). For arrays, we return a single list with the base type: >>> get_children(dict(name="a", type="ARRAY(BIGINT)", is_dttm=False)) [{"name": "a", "type": "BIGINT", "is_dttm": False}] For rows, we return a list of the columns: >>> get_children(dict(name="a", type="ROW(BIGINT,FOO VARCHAR)", is_dttm=False)) [{'name': 'a._col0', 'type': 'BIGINT', 'is_dttm': False}, {'name': 'a.foo', 'type': 'VARCHAR', 'is_dttm': False}] # pylint: disable=line-too-long :param column: dictionary representing a Presto column :return: list of dictionaries representing children columns """ pattern = re.compile(r"(?P<type>\w+)\((?P<children>.*)\)") if not column["type"]: raise ValueError match = pattern.match(column["type"]) if not match: raise Exception(f"Unable to parse column type {column['type']}") group = match.groupdict() type_ = group["type"].upper() children_type = group["children"] if type_ == "ARRAY": return [{ "name": column["name"], "type": children_type, "is_dttm": False }] if type_ == "ROW": nameless_columns = 0 columns = [] for child in utils.split(children_type, ","): parts = list(utils.split(child.strip(), " ")) if len(parts) == 2: name, type_ = parts name = name.strip('"') else: name = f"_col{nameless_columns}" type_ = parts[0] nameless_columns += 1 _column: ResultSetColumnType = { "name": f"{column['name']}.{name.lower()}", "type": type_, "is_dttm": False, } columns.append(_column) return columns raise Exception(f"Unknown type {type_}!")
def get_children(column: Dict[str, str]) -> List[Dict[str, str]]: """ Get the children of a complex Presto type (row or array). For arrays, we return a single list with the base type: >>> get_children(dict(name="a", type="ARRAY(BIGINT)")) [{"name": "a", "type": "BIGINT"}] For rows, we return a list of the columns: >>> get_children(dict(name="a", type="ROW(BIGINT,FOO VARCHAR)")) [{'name': 'a._col0', 'type': 'BIGINT'}, {'name': 'a.foo', 'type': 'VARCHAR'}] :param column: dictionary representing a Presto column :return: list of dictionaries representing children columns """ pattern = re.compile(r"(?P<type>\w+)\((?P<children>.*)\)") match = pattern.match(column["type"]) if not match: raise Exception(f"Unable to parse column type {column['type']}") group = match.groupdict() type_ = group["type"].upper() children_type = group["children"] if type_ == "ARRAY": return [{"name": column["name"], "type": children_type}] elif type_ == "ROW": nameless_columns = 0 columns = [] for child in utils.split(children_type, ","): parts = list(utils.split(child.strip(), " ")) if len(parts) == 2: name, type_ = parts name = name.strip('"') else: name = f"_col{nameless_columns}" type_ = parts[0] nameless_columns += 1 columns.append({ "name": f"{column['name']}.{name.lower()}", "type": type_ }) return columns else: raise Exception(f"Unknown type {type_}!")
def test_split(self): self.assertEqual(list(split("a b")), ["a", "b"]) self.assertEqual(list(split("a,b", delimiter=",")), ["a", "b"]) self.assertEqual(list(split("a,(b,a)", delimiter=",")), ["a", "(b,a)"]) self.assertEqual( list(split('a,(b,a),"foo , bar"', delimiter=",")), ["a", "(b,a)", '"foo , bar"'], ) self.assertEqual(list(split("a,'b,c'", delimiter=",", quote="'")), ["a", "'b,c'"]) self.assertEqual(list(split('a "b c"')), ["a", '"b c"']) self.assertEqual(list(split(r'a "b \" c"')), ["a", r'"b \" c"'])