Exemplo n.º 1
0
             positions=None,
             quote_character=None))

# Delete empty rows
w.add(
    dw.Filter(column=[],
              table=0,
              status="active",
              drop=False,
              row=dw.Row(column=[],
                         table=0,
                         status="active",
                         drop=False,
                         conditions=[
                             dw.Empty(column=[],
                                      table=0,
                                      status="active",
                                      drop=False)
                         ])))

# Extract from split after 'in '
w.add(
    dw.Extract(column=["split"],
               table=0,
               status="active",
               drop=False,
               result="column",
               update=False,
               insert_position="right",
               row=None,
               on=".*",
               before=None,
Exemplo n.º 2
0
def get_transform():
    w = dw.DataWrangler()

    # Split data repeatedly on newline  into  rows
    w.add(
        dw.Split(column=["data"],
                 table=0,
                 status="active",
                 drop=True,
                 result="row",
                 update=False,
                 insert_position="right",
                 row=None,
                 on="\n",
                 before=None,
                 after=None,
                 ignore_between=None,
                 which=1,
                 max=0,
                 positions=None,
                 quote_character=None))

    # Delete empty rows
    w.add(
        dw.Filter(column=[],
                  table=0,
                  status="active",
                  drop=False,
                  row=dw.Row(column=[],
                             table=0,
                             status="active",
                             drop=False,
                             conditions=[
                                 dw.Empty(column=[],
                                          table=0,
                                          status="active",
                                          drop=False,
                                          percent_valid=0,
                                          num_valid=0)
                             ])))

    # Delete  rows where data starts with '==='
    w.add(
        dw.Filter(column=[],
                  table=0,
                  status="active",
                  drop=False,
                  row=dw.Row(column=[],
                             table=0,
                             status="active",
                             drop=False,
                             conditions=[
                                 dw.StartsWith(column=[],
                                               table=0,
                                               status="active",
                                               drop=False,
                                               lcol="data",
                                               value="===",
                                               op_str="starts with")
                             ])))

    # Delete  rows where data = '<!-- KBDX was Broadus Airport ...
    w.add(
        dw.Filter(
            column=[],
            table=0,
            status="active",
            drop=False,
            row=dw.Row(
                column=[],
                table=0,
                status="active",
                drop=False,
                conditions=[
                    dw.
                    Eq(column=[],
                       table=0,
                       status="active",
                       drop=False,
                       lcol="data",
                       value=
                       "<!-- KBDX was Broadus Airport in Broadus, Montana. Replaced by new airport with FAA ID: 00F -->",
                       op_str="=")
                ])))

    # Delete  rows where data contains '<s>''''
    w.add(
        dw.Filter(column=[],
                  table=0,
                  status="active",
                  drop=False,
                  row=dw.Row(column=[],
                             table=0,
                             status="active",
                             drop=False,
                             conditions=[
                                 dw.Contains(column=[],
                                             table=0,
                                             status="active",
                                             drop=False,
                                             lcol="data",
                                             value="<s>'''",
                                             op_str="contains")
                             ])))

    # Extract from data between positions 5, 9
    w.add(
        dw.Extract(column=["data"],
                   table=0,
                   status="active",
                   drop=False,
                   result="column",
                   update=False,
                   insert_position="right",
                   row=None,
                   on=None,
                   before=None,
                   after=None,
                   ignore_between=None,
                   which=1,
                   max=1,
                   positions=[5, 9]))

    # Drop data
    w.add(dw.Drop(column=["data"], table=0, status="active", drop=True))

    return w
Exemplo n.º 3
0
             which=1,
             max="0",
             positions=None))

# Wrap empty rows
w.add(dw.Wrap(column=[],
              table=0,
              status="active",
              drop=False,
              row=dw.Row(column=[],
             table=0,
             status="active",
             drop=False,
             conditions=[dw.Empty(column=[],
               table=0,
               status="active",
               drop=False,
               percent_valid=0,
               num_valid=0)])))

# Fold wrap, wrap1, wrap2, wrap3...  using  header as a key
w.add(dw.Fold(column=["wrap","wrap1","wrap2","wrap3","wrap4","wrap5"],
              table=0,
              status="active",
              drop=False,
              keys=[-1]))

# Translate value up
w.add(dw.Translate(column=["value"],
                   table=0,
                   status="active",
                   drop=False,
Exemplo n.º 4
0
               which=1,
               max=0,
               positions=None,
               quote_character=None))

# Delete empty rows
w.add(dw.Filter(column=[],
                table=0,
                status="active",
                drop=False,
                row=dw.Row(column=[],
             table=0,
             status="active",
             drop=False,
             conditions=[dw.Empty(column=[],
               table=0,
               status="active",
               drop=False)])))

# Extract from split after 'in '
w.add(dw.Extract(column=["split"],
                 table=0,
                 status="active",
                 drop=False,
                 result="column",
                 update=False,
                 insert_position="right",
                 row=None,
                 on=".*",
                 before=None,
                 after="in ",
                 ignore_between=None,