def transform_resource(self, resource): target = resource source = self.get("resource") field_names = self.get("fieldNames") ignore_fields = self.get("ignoreFields") sort_by_field = self.get("sortByField") if isinstance(source, str): source = target.package.get_resource(source) source.infer() view1 = target.to_petl() view2 = source.to_petl() # Ignore fields if ignore_fields: for field in source.schema.fields[len(target.schema.fields):]: target.schema.add_field(field) resource.data = petl.stack(view1, view2) # Default else: for field in source.schema.fields: if field.name not in target.schema.field_names: target.schema.add_field(field) if field_names: for field in list(target.schema.fields): if field.name not in field_names: target.schema.remove_field(field.name) if sort_by_field: key = sort_by_field resource.data = petl.mergesort(view1, view2, key=key, header=field_names) else: resource.data = petl.cat(view1, view2, header=field_names)
def transform_resource(self, source, target): if isinstance(self.__resource, str): self.__resource = source.package.get_resource(self.__resource) self.__resource.infer(only_sample=True) view1 = source.to_petl() view2 = self.__resource.to_petl() # Ignore fields if self.__ignore_fields: target.data = petl.stack(view1, view2) for field in self.__resource.schema.fields[len(target.schema.fields ):]: target.schema.add_field(field) # Default else: if self.__sort: target.data = petl.mergesort(view1, view2, key=self.__sort, header=self.__field_names) else: target.data = petl.cat(view1, view2, header=self.__field_names) for field in self.__resource.schema.fields: if field.name not in target.schema.field_names: target.schema.add_field(field) if self.__field_names: for field in list(target.schema.fields): if field.name not in self.__field_names: target.schema.remove_field(field.name)
def test_stack(self): tbl1 = self.tbl tbl2 = Table([{'first': 'Mary', 'last': 'Nichols'}]) # Different column names shouldn't matter for stack() tbl3 = Table([{'f': 'Lucy', 'l': 'Peterson'}]) tbl1.stack(tbl2, tbl3) expected_tbl = Table(petl.stack(tbl1.table, tbl2.table, tbl3.table)) assert_matching_tables(expected_tbl, tbl1)
def stack(self, *tables, missing=None): """ Stack Parsons tables on top of one another. Similar to ``table.concat()``, except no attempt is made to align fields from different tables. `Args:` tables: Parsons Table or list A single table, or a list of tables missing: bool The value to use when padding missing values `Returns:` ``None`` """ if type(tables) not in [list, tuple]: tables = [tables] petl_tables = [tbl.table for tbl in tables] self.table = petl.stack(self.table, *petl_tables, missing=missing)
def get_historical_quotes(trade_date= '20160701'): tse = get_historical_quotes_tse(trade_date=trade_date) otc = get_historical_quotes_otc(trade_date=trade_date) table = etl.stack(tse, otc) return (table)