Пример #1
0
  def go(self):
    base = lambda X: sorted(X)[-1] - sorted(X)[0]
    newRows = lambda newTab: map(lambda Rows: Rows.cells[:-1], newTab._rows)
    after = lambda newTab: self.pred(
        train_DF,
        newTab,
        tunings=self.tunedParams,
        smoteit=True)
    frac = lambda aft: 1 - (sum([0 if a < 1 else 1 for a in aft]) \
                            / sum([0 if b < 1 else 1 for b in actual]))

    for planner in ['XTREE', 'BIC', 'CD', 'CD+FS']:
      out = [planner]
      for _ in xrange(self.reps):
        predRows = []
        train_DF = createTbl(self.train[self._n], isBin=True)
        test_df = createTbl(self.test[self._n], isBin=True)
        actual = np.array(Bugs(test_df))
        before = self.pred(train_DF, test_df,
                           tunings=self.tunedParams,
                           smoteit=True)

        predRows = [row.cells for row in createTbl(
            self.test[self._n], isBin=True)._rows if row.cells[-2] > 0]

        predTest = genTable(test_df, rows=predRows, name='Before_temp')

        "Apply Different Planners"
        if planner == 'xtrees':
          newTab = xtrees(train=self.train[-1],
                          test_DF=predTest,
                          bin=False,
                          majority=True).main()
          genTable(test_df, rows=newRows(newTab), name='After_xtrees')
#          set_trace()
        elif planner == 'XTREE' or planner == 'XTREE':
          newTab = xtrees(train=self.train[-1],
                          test_DF=predTest,
                          bin=False,
                          majority=False).main()

        elif planner == 'BIC':
          newTab = HOW(train=self.train[-1],
                       test=self.test[-1],
                       test_df=predTest).main()

        elif planner == 'CD':
          newTab = strawman(train=self.train[-1], test=self.test[-1]).main()

        elif planner == 'CD+FS':
          newTab = strawman(train=self.train[-1], test=self.test[-1]
                            , prune=True).main()

        out.append(frac(after(newTab)))
#      self.logResults(out)
      yield out
Пример #2
0
  def go(self):
    rseed(1)
    for planner in ['xtrees', 'cart', 'HOW', 'baseln0', 'baseln1']:
      out = [planner]
      predRows = []
      train_DF = createTbl(self.train[self._n], isBin=True)
      test_df = createTbl(self.test[self._n], isBin=True)
      actual = np.array(Bugs(test_df))
      before = self.pred(train_DF, test_df,
                         tunings=self.tunedParams,
                         smoteit=True)

      base = lambda X: sorted(X)[-1] - sorted(X)[0]
      newRows = lambda newTab: map(lambda Rows: Rows.cells[:-1], newTab._rows)
      after = lambda newTab: self.pred(train_DF, newTab, tunings=self.tunedParams
                                       , smoteit=True)
      frac = lambda aft: sum([0 if a < 1 else 1 for a in aft]
                             ) / sum([0 if b < 1 else 1 for b in before])
      predRows = [row.cells for predicted, row in zip(before
                             , createTbl(self.test[self._n]
                             , isBin=False)._rows) if predicted > 0]

      predTest = genTable(test_df, rows=predRows)

      for _ in xrange(self.reps):
        "Apply Different Planners"
        if planner == 'xtrees':
          newTab = xtrees(train=self.train[-1],
                        test_DF=predTest,
                        bin=False,
                        majority=True).main()

        elif planner == 'cart' or planner == 'CART':
          newTab = xtrees(train=self.train[-1],
                      test_DF=predTest,
                      bin=False,
                      majority=False).main()

        elif planner == 'HOW':
          newTab = HOW(train=self.train[-1],
                  test=self.test[-1],
                  test_df=predTest).main()

        elif planner == 'baseln0':
          newTab = strawman(train=self.train[-1], test=self.test[-1]).main()

        elif planner == 'baseln1':
          newTab = strawman(train=self.train[-1]
                            , test=self.test[-1], prune=True).main()

        out.append(frac(after(newTab)))

      self.logResults(out)
      yield out
Пример #3
0
  def main(self, name='Apache', reps=20):
    out_xtrees = ['xtrees']
    out_HOW = ['HOW']
    out_cart = ['CART']
    out_basln = ['Base']
    out_baslnFss = ['Base+FSS']
    for _ in xrange(reps):
      data = self.explorer(name)
      for d in data:
        if name == d[0].strip().split('/')[-2]:
          #           set_trace()
          train = [d[0] + d[1][1]]
          test = [d[0] + d[1][0]]
#           set_trace()
          train_df = formatData(createTbl(train, _smote=False, isBin=False))
          test_df = formatData(createTbl(test, _smote=False, isBin=False))
          actual = test_df[test_df.columns[-2]].astype('float32').tolist()
          before = predictor(train=train_df, test=test_df).rforest()

          "Apply Different Planners"

          xTrees = xtrees(train=train,
                          test=test,
                          bin=False,
                          majority=True).main()

          cart = xtrees(train=train,
                        test=test,
                        bin=False,
                        majority=False).main()

          how = HOW(name)
          baseln = strawman(
              train=train,
              test=test).main(
              config=True)

          baselnFss = strawman(
              train=train,
              test=test,
              prune=True).main(config=True)

          after = lambda newTab: predictor(
              train=train_df,
              test=formatData(newTab)).rforest()
          frac = lambda aft: sum(aft) / sum(before)
    #       set_trace()
          out_xtrees.append(frac(after(xTrees)))
          out_cart.append(frac(after(cart)))
          out_HOW.extend(how)
          out_basln.append(frac(after(baseln)))
          out_baslnFss.append(frac(after(baselnFss)))

    return [out_xtrees, out_cart, out_HOW, out_basln, out_baslnFss]
Пример #4
0
    def main(self, name='Apache', reps=20):
        out_xtrees = ['xtrees']
        out_HOW = ['HOW']
        out_cart = ['CART']
        out_basln = ['Base']
        out_baslnFss = ['Base+FSS']
        for _ in xrange(reps):
            data = self.explorer(name)
            for d in data:
                if name == d[0].strip().split('/')[-2]:
                    #           set_trace()
                    train = [d[0] + d[1][1]]
                    test = [d[0] + d[1][0]]
                    #           set_trace()
                    train_df = formatData(
                        createTbl(train, _smote=False, isBin=False))
                    test_df = formatData(
                        createTbl(test, _smote=False, isBin=False))
                    actual = test_df[test_df.columns[-2]].astype(
                        'float32').tolist()
                    before = predictor(train=train_df, test=test_df).rforest()

                    "Apply Different Planners"

                    xTrees = xtrees(train=train,
                                    test=test,
                                    bin=False,
                                    majority=True).main()

                    cart = xtrees(train=train,
                                  test=test,
                                  bin=False,
                                  majority=False).main()

                    how = HOW(name)
                    baseln = strawman(train=train, test=test).main(config=True)

                    baselnFss = strawman(train=train, test=test,
                                         prune=True).main(config=True)

                    after = lambda newTab: predictor(
                        train=train_df, test=formatData(newTab)).rforest()
                    frac = lambda aft: sum(aft) / sum(before)
                    #       set_trace()
                    out_xtrees.append(frac(after(xTrees)))
                    out_cart.append(frac(after(cart)))
                    out_HOW.extend(how)
                    out_basln.append(frac(after(baseln)))
                    out_baslnFss.append(frac(after(baselnFss)))

        return [out_xtrees, out_cart, out_HOW, out_basln, out_baslnFss]
Пример #5
0
def learner(mdl=XOMO, n=0, reps=24, numel=1000):
  train, test = mdl(n=0).genData(N=numel)
  for planner in ['dtree', 'HOW', 'baseln0', 'baseln1']:
    E = [planner]
    before = array(predictor(Model=mdl, n=n, tbl=createTbl(train)))
    after = lambda newTab: array(predictor(Model=mdl, n=n, tbl=newTab))
    frac = lambda aft: sum(aft) / sum(before)
    for _ in xrange(reps):
      "Apply Different Planners"
      if planner == 'xtrees':
        if mdl == POM3 and n == 1:
          newTab = xtrees(train=train,
                          test=test,
                          bin=False,
                          smoteit=False,
                          majority=True).main(which='Best')
        else:
          newTab = xtrees(train=train,
                          test=test,
                          bin=False,
                          smoteit=False,
                          majority=True).main(which='Best')
      if planner == 'dtree':
        if mdl == POM3 and n == 1:
          newTab = xtrees(train=train,
                          test=test,
                          bin=False,
                          smoteit=False,
                          majority=False).main(which='Best')
        else:
          newTab = xtrees(train=train,
                          test=test,
                          bin=False,
                          smoteit=False,
                          majority=False).main(which='Best')
      if planner == 'HOW':
        newTab = HOW(train=train, test=test).main()
      if planner == 'baseln0':
        newTab = strawman(
            train=train,
            test=test).main(mode='models')
      if planner == 'baseln1':
        newTab = strawman(
            train=train,
            test=test,
            prune=True).main(mode='models')
      E.append(frac(after(newTab)))
    yield E
Пример #6
0
  def deltas(self, planner):
    delta = []
    train_DF = createTbl(self.train[self._n], isBin=True, bugThres=1)
    test_df = createTbl(self.test[self._n], isBin=True, bugThres=1)
    actual = np.array(Bugs(test_df))
    before = self.pred(train_DF, test_df, tunings=self.tunedParams,
                       smoteit=True)
    allRows = np.array(
        map(
            lambda Rows: np.array(
                Rows.cells[
                    :-
                    1]),
            train_DF._rows +
            test_df._rows))

    def min_max():
      N = len(allRows[0])
      base = lambda X: sorted(X)[-1] - sorted(X)[0]
      return [base([r[i] for r in allRows]) for i in xrange(N)]

    predRows = [row.cells for row in createTbl(
        self.test[self._n], isBin=True)._rows if row.cells[-2] > 0]

    write2file(predRows, fname='before')  # save file

    """
    Apply Learner
    """
    for _ in xrange(1):
      predTest = genTable(test_df, rows=predRows)

      newRows = lambda newTab: map(lambda Rows: Rows.cells[:-1], newTab._rows)

      "Apply Different Planners"
      if planner == 'xtrees':
        xTrees = xtrees(train=self.train[-1],
                        test_DF=predTest,
                        bin=False,
                        majority=True).main(justDeltas=True)
        delta.append(
            [d for d in self.delta1(xTrees, train_DF.headers, norm=len(predRows))])
        return (np.sum(
            delta[0], axis=0) / np.array((len(predRows[0]) - 2) * [len(predRows)])).tolist()

      elif planner == 'XTREE' or planner == 'XTREE':
        C4_5 = xtrees(train=self.train[-1],
                      test_DF=predTest,
                      bin=False, majority=False).main(justDeltas=True)

        delta.append(
            [d for d in self.delta1(C4_5, train_DF.headers, norm=len(predRows))])
        return (np.sum(
            delta[0], axis=0) / np.array((len(predRows[0]) - 2) * [len(predRows)])).tolist()

      elif planner == 'BIC':
        how = HOW(train=self.train[-1],
                  test=self.test[-1],
                  test_df=predTest).main(justDeltas=True)
        delta.append(
            [d for d in self.delta1(how, train_DF.headers, norm=len(predRows))])
        return (np.sum(
            delta[0], axis=0) / np.array((len(predRows[0]) - 2) * [len(predRows)])).tolist()

      elif planner == 'CD':
        baseln = strawman(
            train=self.train[-1], test=self.test[-1]).main(justDeltas=True)
        delta.append(
            [d for d in self.delta1(baseln, train_DF.headers, norm=len(predRows))])
        return (np.sum(
            delta[0], axis=0) / np.array((len(predRows[0]) - 2) * [len(predRows)])).tolist()

      elif planner == 'CD+FS':
        baselnFss = strawman(
            train=self.train[-1], test=self.test[-1], prune=True).main(justDeltas=True)
        delta.append(
            [d for d in self.delta1(baselnFss, train_DF.headers, norm=len(predRows))])
        return (np.sum(
            delta[0], axis=0) / np.array((len(predRows[0]) - 2) * [len(predRows)])).tolist()
Пример #7
0
  def deltas(self, planner):
    predRows = []
    delta = []
    train_DF = createTbl(self.train[self._n], isBin=True, bugThres=1)
    test_df = createTbl(self.test[self._n], isBin=True, bugThres=1)
    before = self.pred(train_DF, test_df, tunings=self.tunedParams,
                       smoteit=True)
    allRows = np.array(map(lambda Rows: np.array(Rows.cells[:-1])
                           , train_DF._rows + test_df._rows))

    def min_max():
      N = len(allRows[0])
      base = lambda X: sorted(X)[-1] - sorted(X)[0]
      return [base([r[i] for r in allRows]) for i in xrange(N)]

    predRows = [row.cells for predicted,
                   row in zip(before , createTbl(self.test[self._n]
                                     , isBin=False)._rows) if predicted > 0]

    write2file(predRows, fname='before')  # save file

    """
    Apply Learner
    """
    for _ in xrange(1):
      predTest = genTable(test_df, rows=predRows)

      newRows = lambda newTab: map(lambda Rows: Rows.cells[:-1]
                                      , newTab._rows)

      "Apply Different Planners"
      if planner == 'xtrees':
        xTrees = xtrees(train=self.train[-1],
                        test_DF=predTest,
                        bin=False,
                        majority=True).main(justDeltas=True)
        delta.append([d for d in self.delta1(xTrees, train_DF.headers, norm=min_max())])
        return delta[0]

      elif planner == 'cart' or planner == 'CART':
        cart = xtrees(train=self.train[-1],
                      test_DF=predTest,
                      bin=False,
                      majority=False).main(justDeltas=True)
        delta.append([d for d in self.delta1(cart, train_DF.headers, norm=min_max())])
        set_trace()
        return delta[0]

      elif planner == 'HOW':
        how = HOW(train=self.train[-1],
                test=self.test[-1],
                test_df=predTest).main()
        write2file(newRows(xTrees), fname='HOW')  # save file
        delta.append([d for d in self.delta0(Planner='HOW', norm=min_max())])
        return delta[0]

      elif planner == 'Baseline':
        baseln = strawman(train=self.train[-1], test=self.test[-1]).main()
        write2file(newRows(xTrees), fname='base0')  # save file
        delta.append([d for d in self.delta0(Planner='base0', norm=min_max())])
        return delta[0]

      elif planner == 'Baseline+FS':
        baselnFss = strawman(
          train=self.train[-1], test=self.test[-1], prune=True).main()
        write2file(newRows(xTrees), fname='base1')  # save file
        delta.append([d for d in self.delta0(Planner='base1', norm=min_max())])
        return delta[0]
Пример #8
0
    def deltas(self, planner):
        predRows = []
        delta = []
        train_DF = createTbl(self.train[self._n], isBin=True, bugThres=1)
        test_df = createTbl(self.test[self._n], isBin=True, bugThres=1)
        before = self.pred(train_DF,
                           test_df,
                           tunings=self.tunedParams,
                           smoteit=True)
        allRows = np.array(
            map(lambda Rows: np.array(Rows.cells[:-1]),
                train_DF._rows + test_df._rows))

        def min_max():
            N = len(allRows[0])
            base = lambda X: sorted(X)[-1] - sorted(X)[0]
            return [base([r[i] for r in allRows]) for i in xrange(N)]

        predRows = [
            row.cells for predicted, row in zip(
                before,
                createTbl(self.test[self._n], isBin=False)._rows)
            if predicted > 0
        ]

        write2file(predRows, fname='before')  # save file
        """
    Apply Learner
    """
        for _ in xrange(1):
            predTest = genTable(test_df, rows=predRows)

            newRows = lambda newTab: map(lambda Rows: Rows.cells[:-1], newTab.
                                         _rows)

            "Apply Different Planners"
            if planner == 'xtrees':
                xTrees = xtrees(train=self.train[-1],
                                test_DF=predTest,
                                bin=False,
                                majority=True).main(justDeltas=True)
                delta.append([
                    d for d in self.delta1(
                        xTrees, train_DF.headers, norm=min_max())
                ])
                return delta[0]

            elif planner == 'cart' or planner == 'CART':
                cart = xtrees(train=self.train[-1],
                              test_DF=predTest,
                              bin=False,
                              majority=False).main(justDeltas=True)
                delta.append([
                    d for d in self.delta1(
                        cart, train_DF.headers, norm=min_max())
                ])
                set_trace()
                return delta[0]

            elif planner == 'HOW':
                how = HOW(train=self.train[-1],
                          test=self.test[-1],
                          test_df=predTest).main()
                write2file(newRows(xTrees), fname='HOW')  # save file
                delta.append(
                    [d for d in self.delta0(Planner='HOW', norm=min_max())])
                return delta[0]

            elif planner == 'Baseline':
                baseln = strawman(train=self.train[-1],
                                  test=self.test[-1]).main()
                write2file(newRows(xTrees), fname='base0')  # save file
                delta.append(
                    [d for d in self.delta0(Planner='base0', norm=min_max())])
                return delta[0]

            elif planner == 'Baseline+FS':
                baselnFss = strawman(train=self.train[-1],
                                     test=self.test[-1],
                                     prune=True).main()
                write2file(newRows(xTrees), fname='base1')  # save file
                delta.append(
                    [d for d in self.delta0(Planner='base1', norm=min_max())])
                return delta[0]
Пример #9
0
    def go(self):
        rseed(1)
        for planner in ['xtrees', 'cart', 'HOW', 'baseln0', 'baseln1']:
            out = [planner]
            predRows = []
            train_DF = createTbl(self.train[self._n], isBin=True)
            test_df = createTbl(self.test[self._n], isBin=True)
            actual = np.array(Bugs(test_df))
            before = self.pred(train_DF,
                               test_df,
                               tunings=self.tunedParams,
                               smoteit=True)

            base = lambda X: sorted(X)[-1] - sorted(X)[0]
            newRows = lambda newTab: map(lambda Rows: Rows.cells[:-1], newTab.
                                         _rows)
            after = lambda newTab: self.pred(
                train_DF, newTab, tunings=self.tunedParams, smoteit=True)
            frac = lambda aft: sum([0 if a < 1 else 1 for a in aft]) / sum(
                [0 if b < 1 else 1 for b in before])
            predRows = [
                row.cells for predicted, row in zip(
                    before,
                    createTbl(self.test[self._n], isBin=False)._rows)
                if predicted > 0
            ]

            predTest = genTable(test_df, rows=predRows)

            for _ in xrange(self.reps):
                "Apply Different Planners"
                if planner == 'xtrees':
                    newTab = xtrees(train=self.train[-1],
                                    test_DF=predTest,
                                    bin=False,
                                    majority=True).main()

                elif planner == 'cart' or planner == 'CART':
                    newTab = xtrees(train=self.train[-1],
                                    test_DF=predTest,
                                    bin=False,
                                    majority=False).main()

                elif planner == 'HOW':
                    newTab = HOW(train=self.train[-1],
                                 test=self.test[-1],
                                 test_df=predTest).main()

                elif planner == 'baseln0':
                    newTab = strawman(train=self.train[-1],
                                      test=self.test[-1]).main()

                elif planner == 'baseln1':
                    newTab = strawman(train=self.train[-1],
                                      test=self.test[-1],
                                      prune=True).main()

                out.append(frac(after(newTab)))

            self.logResults(out)
            yield out
Пример #10
0
  def main(self, name='Apache', reps=20):
    rseed(1)
    for planner in ['DTREE', 'CD+FS', 'CD', 'BIC']:
      out = [planner]
      after = lambda newTab: predictor(
          train=train_df,
          test=formatData(newTab)).rforest()

      frac = lambda aft: (1 - sum(aft) / sum(before))

      data = self.explorer(name)
      for d in data:
        if name == d[0].strip().split('/')[-2]:
          #           set_trace()
          train = [d[0] + d[1][1]]
          test = [d[0] + d[1][0]]
#           set_trace()
          train_df = formatData(createTbl(train, _smote=False, isBin=False))
          test_df = formatData(createTbl(test, _smote=False, isBin=False))
          valid = [
              isValid(
                  new.cells, name=name) for new in createTbl(
                  test,
                  _smote=False,
                  isBin=False)._rows]
          actual = test_df[test_df.columns[-2]].astype('float32').tolist()
          before = predictor(train=train_df, test=test_df).rforest()
          for _ in xrange(reps):
            newTab = None  # Just so I am sure, there isn't any residue.
            "Apply Different Planners"
            if planner == 'xtrees':
              newTab = xtrees(train=train,
                              test=test,
                              bin=False,
                              majority=True,
                              name=name).main()
            if planner == 'DTREE':
              newTab = xtrees(train=train,
                              test=test,
                              bin=False,
                              majority=False,
                              name=name).main()
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            if planner == 'BIC':
              newTab = HOW(name)
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            if planner == 'CD':
              newTab = strawman(name=name,
                                train=train,
                                test=test).main(mode="config")
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            if planner == 'CD+FS':
              newTab = strawman(name=name,
                                train=train,
                                test=test,
                                prune=True).main(mode="config")
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            try:
              out.append(frac(after(newTab)))
            except:
              set_trace()

      yield out
Пример #11
0
  def deltas(self, name, planner):
    predRows = []
    delta = []
    data = self.explorer(name)
    rows = lambda newTab: map(lambda r: r.cells[:-2], newTab._rows)
    for d in data:
      if name == d[0].strip().split('/')[-2]:
        train = [d[0] + '/' + d[1][1]]
        test = [d[0] + '/' + d[1][0]]
        train_DF = createTbl(train, isBin=False)
        test_df = createTbl(test, isBin=False)
        self.headers = train_DF.headers
        write2file(rows(test_df), fname='before_cpm')  # save file

        """
        Apply Learner
        """
        if planner == 'xtrees':
          newTab = xtrees(train=train,
                          test=test,
                          bin=False,
                          majority=True,
                          name=name).main(justDeltas=True)
          delta.append(
              [d for d in self.delta1(newTab, train_DF.headers, norm=len(predRows))])
          return np.array(
              np.sum(delta[0], axis=0), dtype='float') / np.size(newTab, axis=0)
        if planner == 'DTREE':
          newTab = xtrees(train=train,
                          test=test,
                          bin=False,
                          majority=False,
                          name=name).main(justDeltas=True)
          delta.append(
              [d for d in self.delta1(newTab, train_DF.headers, norm=len(predRows))])
          return np.array(
              np.sum(delta[0], axis=0), dtype='float') / np.size(newTab, axis=0)
        if planner == 'BIC':
          newTab = HOW(name, justDeltas=True)
          delta.append(
              [d for d in self.delta1(newTab, train_DF.headers, norm=len(predRows))])
          return np.array(
              np.sum(delta[0], axis=0), dtype='float') / np.size(newTab, axis=0)

        if planner == 'CD':
          newTab = strawman(name=name,
                            train=train,
                            test=test).main(mode="config", justDeltas=True)
          delta.append(
              [d for d in self.delta1(newTab, train_DF.headers, norm=len(predRows))])
          return np.array(
              np.sum(delta[0], axis=0), dtype='float') / np.size(newTab, axis=0)
        if planner == 'CD+FS':
          newTab = strawman(name=name,
                            train=train,
                            test=test,
                            prune=True).main(mode="config", justDeltas=True)
          delta.append(
              [d for d in self.delta1(newTab, train_DF.headers, norm=len(predRows))])
          return np.array(
              np.sum(delta[0], axis=0), dtype='float') / np.size(newTab, axis=0)