def get_transform(): w = dw.DataWrangler() # Split data repeatedly on newline into rows w.add( dw.Split(column=["data"], table=0, status="active", drop=True, result="row", update=False, insert_position="right", row=None, on="\n", before=None, after=None, ignore_between=None, which=1, max=0, positions=None, quote_character=None)) # Delete empty rows w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.Empty(column=[], table=0, status="active", drop=False, percent_valid=0, num_valid=0) ]))) # Delete rows where data starts with '===' w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.StartsWith(column=[], table=0, status="active", drop=False, lcol="data", value="===", op_str="starts with") ]))) # Delete rows where data = '<!-- KBDX was Broadus Airport ... w.add( dw.Filter( column=[], table=0, status="active", drop=False, row=dw.Row( column=[], table=0, status="active", drop=False, conditions=[ dw. Eq(column=[], table=0, status="active", drop=False, lcol="data", value= "<!-- KBDX was Broadus Airport in Broadus, Montana. Replaced by new airport with FAA ID: 00F -->", op_str="=") ]))) # Delete rows where data contains '<s>'''' w.add( dw.Filter(column=[], table=0, status="active", drop=False, row=dw.Row(column=[], table=0, status="active", drop=False, conditions=[ dw.Contains(column=[], table=0, status="active", drop=False, lcol="data", value="<s>'''", op_str="contains") ]))) # Extract from data between positions 5, 9 w.add( dw.Extract(column=["data"], table=0, status="active", drop=False, result="column", update=False, insert_position="right", row=None, on=None, before=None, after=None, ignore_between=None, which=1, max=1, positions=[5, 9])) # Drop data w.add(dw.Drop(column=["data"], table=0, status="active", drop=True)) return w
from wrangler import dw w = dw.DataWrangler() # Split data repeatedly on newline into rows w.add(dw.Split(column=["data"], table=0, status="active", drop=True, result="row", update=False, insert_position="right", row=None, on="\n", before=None, after=None, ignore_between=None, which=1, max=0, positions=None, quote_character=None)) # Split data repeatedly on ',' w.add(dw.Split(column=["data"], table=0, status="active", drop=True, result="column", update=False, insert_position="right", row=None,
sys.exit( 'Error: Please include an input and output file. Example python script.py input.csv output.csv' ) w = dw.DataWrangler() # Split data repeatedly on '|-' into rows w.add( dw.Split(column=["data"], table=0, status="active", drop=True, result="row", update=False, insert_position="right", row=None, on="\\|-", before=None, after=None, ignore_between=None, which=1, max=0, positions=None, quote_character=None)) # Cut from data on '| any lowercase word =#FFF any number any word \|' w.add( dw.Cut(column=["data"], table=0, status="active", drop=False, result="column",