示例#1
0
文件: data.py 项目: vivek7266/fss18
        def argmin(c, lo, hi):
            cut = None
            xl, yl = Num(), Num(
            )  # left split for both features and label cols
            xr, yr = Num(), Num(
            )  # right split for both features and label cols

            # push everything in the right
            for i in range(lo, hi):
                xr.numInc(rows[i][c])
                yr.numInc(rows[i][goal])

            best_x = xr.sd  # currently all data is in right so best is sd on right
            best_y = yr.sd  # currently all data is in right so best is sd on right
            mu = yr.mu
            # print(best)
            # push to the left one by one and keep track of best
            if hi - lo > 2 * enough:
                for i in range(lo, hi):
                    x = rows[i][c]
                    y = rows[i][goal]
                    xl.numInc(x)
                    yl.numInc(y)
                    xr.numDec(x)
                    yr.numDec(y)
                    if xl.n >= enough and xr.n >= enough:
                        tmp_x = xl.numXpect(xr) * 1.05
                        tmp_y = yl.numXpect(yr) * 1.05
                        # print(tmp, x)
                        try:
                            if tmp_x < best_x:
                                if tmp_y < best_y:
                                    cut, best_x, best_y = i, tmp_x, tmp_y
                                    # print(tmp_x, tmp_y, best_x, best_y)
                        except:
                            print(tmp_x, tmp_y)
            return cut, mu
示例#2
0
文件: data.py 项目: vivek7266/fss18
        def argmin(c, lo, hi):
            cut = None
            if hi - lo > 2 * enough:
                l = Num()  # left split
                r = Num()  # right split

                # push everything in the right
                for i in range(lo, hi):
                    r.numInc(rows[i][c])

                best = r.sd  # currently all data is in right so best is sd on right
                # print(best)
                # push to the left one by one and keep track of best
                for i in range(lo, hi):
                    x = rows[i][c]
                    l.numInc(x)
                    r.numDec(x)
                    if l.n >= enough and r.n >= enough:
                        tmp = Num.numXpect(l, r) * 1.05
                        # print(tmp, x)
                        if tmp < best:
                            cut, best = i, tmp
                            # print(tmp, best)
            return cut