Пример #1
0
def test_case1():
    df2 = df1.copy()
    df2 = df1[~df1.isnull().any(axis=1)]
    df2['Sex'] = df2['Sex'].map({'male': 0, 'female': 1})
    checker = PatternSynthesizer(dispatch_gen(df1, "df1", 0, 0),
                                 dispatch_gen(df2, "df2", 0, 1),
                                 Info(None, None), [])
    l = checker.check(df1, df2)
    print("df1", "->", "df2", "\033[96m", checker.summary, "\033[0m")
Пример #2
0
def test_groupby():
    df2 = df1.copy()
    df2 = df2.groupby(['Name'])['Age'].sum()
    df2 = df2.reset_index()
    df2 = df2.sort_values('Age', ascending=False)
    checker = PatternSynthesizer(dispatch_gen(df1, "df1", 0, 0),
                                 dispatch_gen(df2, "df2", 0, 1),
                                 Info(None, None), [])
    l = checker.check(df1, df2)
    print("df1", "->", "df2", "\033[96m", checker.summary, "\033[0m")
Пример #3
0
def test_double():
    df2 = df1.copy()
    df2.dropna(inplace=True)
    checker = PatternSynthesizer(dispatch_gen(df1, "df1", 0, 0),
                                 dispatch_gen(df2, "df2", 0, 1),
                                 Info(None, None), [])
    l = checker.check(df1, df2)
    print("df1", "->", "df2", "\033[96m", checker.summary, "\033[0m")
    l = checker.check(df1, df2)
    print("df1", "->", "df2", "\033[96m", checker.summary, "\033[0m")
Пример #4
0
def test_io():
    df2 = df1
    with open("test.dat", "wb") as f:
        pickle.dump([df1, df2], f)
    with open("test.dat", "rb") as f:
        vars = pickle.load(f)
        df1, df2 = vars[0], vars[1]
        checker = PatternSynthesizer(dispatch_gen(df1, "df1", 0, 0),
                                     dispatch_gen(df2, "df2", 0, 1),
                                     Info(None, None), [])
        l = checker.check(df1, df2)
        print("df1", "->", "df2", "\033[96m", l, "\033[0m")
Пример #5
0
def test_case2():
    df2 = df1.copy()
    df2.drop(['Sex', 'Name'], axis=1)
    df2['Age'].fillna(0, inplace=True)
    print(id(df2.index), id(df2['Age'].index))
    # df2['Age'] = df2['Age'].map(lambda x: x + 1)
    df2['Age'] = df2['Age'].astype(str)
    df2.Survived = df2.Survived.astype(int)
    checker = PatternSynthesizer(dispatch_gen(df1, "df1", 0, 0),
                                 dispatch_gen(df2, "df2", 0, 1),
                                 Info(None, None), [])
    l = checker.check(df1, df2)
    print("df1", "->", "df2", "\033[96m", checker.summary, "\033[0m")
Пример #6
0
    for file in sorted(os.listdir(data_path)):
        myvars = []
        if file == "info.json" or file.endswith("_f.dat"):
            continue
        elif file.endswith(".dat"):
            with open(os.path.join(data_path, file), "rb") as f:
                try:
                    vars = pickle.load(f)
                except Exception as e:
                    print_error("error when pickle from " + file)
                    print_error(e)
                    continue
                for i in range(len(vars)):
                    try:
                        myvars.append(
                            dispatch_gen(vars[i][0], vars[i][1][2],
                                         vars[i][1][0], vars[i][1][1]))
                        if type(vars[i][0]) == list and len(vars[i][0]) <= 3:
                            for j in range(len(vars[i][0])):
                                myvars.append(
                                    dispatch_gen(vars[i][0][j],
                                                 vars[i][1][2] + f"[{j}]",
                                                 vars[i][1][0], vars[i][1][1]))
                    except:
                        print_error("error when dispatch var " + vars[i][1][2])
                        pass
                # comments = static_comments[vars[0][1][0]] if vars[0][1][0] in static_comments.keys() else []
                comment, json_map = handlecell(myvars, 0,
                                               len(myvars) - 1,
                                               Info(info, vars[0][1][0]))
                # notebook.cells[code_indices[vars[0][1][0] - 1]].source = f"'''\n{comment}\n'''\n" + notebook.cells[code_indices[vars[0][1][0] - 1]].source