positions=None, quote_character=None)) # Delete empty rows w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.Empty(column=[], table=0, status="active", drop=False) ]))) # Extract from split after 'in ' w.add( dw.Extract(column=["split"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on=".*", before=None,
def get_transform(): w = dw.DataWrangler() # Split data repeatedly on newline into rows w.add( dw.Split(column=["data"], table=0, status="active", drop=True, result="row", update=False, insert_position="right", row=None, on="\n", before=None, after=None, ignore_between=None, which=1, max=0, positions=None, quote_character=None)) # Delete empty rows w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.Empty(column=[], table=0, status="active", drop=False, percent_valid=0, num_valid=0) ]))) # Delete rows where data starts with '===' w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.StartsWith(column=[], table=0, status="active", drop=False, lcol="data", value="===", op_str="starts with") ]))) # Delete rows where data = '<!-- KBDX was Broadus Airport ... w.add( dw.Filter( column=[], table=0, status="active", drop=False, row=dw.Row( column=[], table=0, status="active", drop=False, conditions=[ dw. Eq(column=[], table=0, status="active", drop=False, lcol="data", value= "<!-- KBDX was Broadus Airport in Broadus, Montana. Replaced by new airport with FAA ID: 00F -->", op_str="=") ]))) # Delete rows where data contains '<s>'''' w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.Contains(column=[], table=0, status="active", drop=False, lcol="data", value="<s>'''", op_str="contains") ]))) # Extract from data between positions 5, 9 w.add( dw.Extract(column=["data"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on=None, before=None, after=None, ignore_between=None, which=1, max=1, positions=[5, 9])) # Drop data w.add(dw.Drop(column=["data"], table=0, status="active", drop=True)) return w
which=1, max="0", positions=None)) # Wrap empty rows w.add(dw.Wrap(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[dw.Empty(column=[], table=0, status="active", drop=False, percent_valid=0, num_valid=0)]))) # Fold wrap, wrap1, wrap2, wrap3... using header as a key w.add(dw.Fold(column=["wrap","wrap1","wrap2","wrap3","wrap4","wrap5"], table=0, status="active", drop=False, keys=[-1])) # Translate value up w.add(dw.Translate(column=["value"], table=0, status="active", drop=False,
which=1, max=0, positions=None, quote_character=None)) # Delete empty rows w.add(dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[dw.Empty(column=[], table=0, status="active", drop=False)]))) # Extract from split after 'in ' w.add(dw.Extract(column=["split"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on=".*", before=None, after="in ", ignore_between=None,