def get_target_funcs(file, key_lib='', return_all=False, python2=False):
    '''
    抽取一个 python file 中所有 target functions
    '''
    assert file.endswith('.py'), '{} is not a python file'.format(file)
    filename = file.split('/')[-1]
    # pdb.set_trace()
    if not python2:
        assert filename in data, (
            "File {} cannot be parsed in python3.6".format(filename))
        # funcs, linenos = process_file(file)
        funcs = data[filename]["funcs"]
        linenos = data[filename]["linenos"]
    else:
        funcs, linenos = process_file(file)
        # raise NotImplementedError
    if return_all:
        return funcs, linenos
    target_funcs = [(func, lineno) for func, lineno in zip(funcs, linenos)
                    if func.startswith(key_lib) and is_decision_point(func)]
    # try:
    if target_funcs:
        funcs, linenos = list(zip(*target_funcs))
    else:
        funcs = []
        linenos = []
    # except:
    #     pdb.set_trace()
    funcs = list(funcs)
    linenos = list(linenos)
    return funcs, linenos
Exemplo n.º 2
0
def count_all_functions(out_path, notebooks=None):
    """
    extract all functions in target notebooks
    calculate the number of occurrences
    """
    if notebooks is None:
        notebooks = [f for f in os.listdir(nb_path) if f.endswith('.py')]
    print('len(notebooks): {}'.format(len(notebooks)))

    func_counter = {}
    error_files = []
    for f in tqdm(notebooks):
        try:
            funcs, linenos = process_file(os.path.join(nb_path, f))
            for f in funcs:
                if f not in func_counter:
                    func_counter[f] = 0
                func_counter[f] += 1
        except Exception as e:
            error_files.append(f)

    print('len(error_files): {}'.format(len(error_files)))
    with open(out_path, 'w') as fout:
        json.dump({"func_counter": func_counter,
                   "error_files": error_files}, fout, ensure_ascii=False, indent=2)
def test(file):
    funcs, linenos = process_file(file)
    model = Classifier()
    # pdb.set_trace()
    model.load_state_dict(torch.load('./classifier.pth'))
    for func, lineno in zip(funcs, linenos):
        tokens = func.replace('.', ' ').replace('_', ' ').split()
        pred = predict(model, tokens)
        if pred == 1:
            print(func, pred, lineno)
def get_target_funcs(file):
    '''
    抽取一个 python file 中所有 target functions
    '''
    assert file.endswith('.py'), '{} is not a python file'.format(file)

    funcs, linenos = process_file(file)

    target_funcs = [(func, lineno) for func, lineno in zip(
        funcs, linenos) if new_is_selected_func(func)]
    funcs, linenos = list(zip(*target_funcs))
    funcs = list(funcs)
    linenos = list(linenos)
    return funcs, linenos
def add_annotation(file):
    with open(file, 'r') as f:
        lines = f.read().split('\n')

    funcs, linenos = process_file(file)
    model = Classifier()
    model.load_state_dict(torch.load('./classifier.pth'))
    for func, lineno in zip(funcs, linenos):
        tokens = func.replace('.', ' ').replace('_', ' ').split()
        pred = predict(model, tokens)
        if pred == 1:
            print(func, pred, lineno)
            lines[lineno - 1] += '  # DECISIONPOINT'

    with open('./temp.out', 'w') as fout:
        fout.write('\n'.join(lines))
def create_dataset():
    path = '/home/gezhang/data/jupyter/target'
    files = [f for f in os.listdir(path) if f.endswith('.py')]

    all_funcs = []
    for file in tqdm(files):
        try:
            funcs, _ = process_file(os.path.join(path, file))
            all_funcs.append(funcs)
        except:
            pass

    with open('./decision_points.txt', 'a') as f:
        for funcs in all_funcs:
            for func in funcs:
                if any([func.startswith(lib) for lib in key_libs]):
                    label = 1
                else:
                    label = 0
                f.write('{}\t{}\n'.format(func, label))
def process_test_notebook_by_cell(file, graph_obj=MetaGraph):
    global target_linenos
    global target_nodes
    global lineno
    global linenos
    global node_index
    target_linenos = {}
    target_nodes = {}
    linenos = []
    lineno = 0
    node_index = -1

    # load labeled stages
    nb_id = file.split('/')[-1].split('.')[0]
    with open(os.path.join('./templates', '{}.html'.format(nb_id)), 'r') as f:
        html_source = f.read()
    stages = get_cell_labels_from_html(html_source)
    stages = [s for s in stages if s != '0']
    if file.endswith('.ipynb'):
        file = ipynb2py(file)

    funcs, linenos = process_file(file)

    cells = cut_cells_from_py(file)
    cells_len = [len(c.split('\n')) for c in cells]

    cells_func = []

    prev_len = 0
    for i, l in enumerate(cells_len):

        lines = [ll for ll in linenos if ll < prev_len + l and ll > prev_len]

        cells_func.append(funcs[:len(lines)])
        funcs = funcs[len(lines):]
        linenos = linenos[len(lines):]
        prev_len += l - 1

    cells_func = [
        cells_func[i] for i, c in enumerate(cells)
        if c.strip() and ast.parse(c).body
    ]
    cells = [c for c in cells if c.strip() and ast.parse(c).body]
    cells_len = [len(c.split('\n')) for c in cells]

    assert len(cells) == len(stages), "{}, cell split error".format(nb_id)
    if len(cells) == len(stages) + 1:
        cells = cells[1:]
        cells_len = cells_len[1:]
    prev_len = 0
    graphs = []
    for c, l, s, f in zip(cells, cells_len, stages, cells_func):
        root = ast.parse(c)
        if root.body:
            graph = MetaGraph(root.body,
                              prev_len,
                              root.body[0],
                              file,
                              'none_func',
                              stage=s,
                              funcs=f)
            # pdb.set_trace()
            graphs.append(graph)
            graph.dump_into_file(args.out_path, merge=True)

        prev_len += l - 1

    return graphs
]
error_files = []
all_cells = []
for file in files:
    with open(os.path.join(path, file), 'r') as f:
        cells = json.load(f)

    cells = [c for c in cells if c["cell_type"] == 'code']
    code_lines = list(
        itertools.chain.from_iterable([c["source"] for c in cells]))
    code_lines = [l for l in code_lines if is_python_line(l)]
    source = '# In[]:\n'.join(code_lines)

    try:
        # root = ast.parse(source)
        funcs, linenos = process_file(os.path.join(path, file), content=source)
        py_cells = source.split('# In[]:\n')
        prev_len = 0
        cell_funcs = []
        for cell in py_cells:

            cell_len = len(cell.split('\n'))

            temp_func = [
                f for f, l in zip(funcs, linenos)
                if l <= prev_len + cell_len and l > prev_len
            ]
            # pdb.set_trace()
            cell_funcs.append(temp_func)
            prev_len += cell_len
        # for cf, pc in zip(cell_funcs, py_cells):
Exemplo n.º 9
0
def remove_output(filename):
    if filename.endswith('.ipynb'):
        # convert ipynb to html
        html_source = html_exporter.from_file(os.path.join(
            app.config['UPLOAD_FOLDER'], filename))[0]
        # remove cell output
        html_source = remove_output_from_html(html_source)
        # remove some html codes so that we can add our own elements
        # I'm not sure about why but it works
        html_source = remove_script(html_source)
        # manually import "boostrap" things
        html_source = bootstrap_script + html_source
        # modify opacity so that our own elements are visible
        html_source = html_source.replace('.fade {\n  opacity: 0;',
                                          '.fade {\n  opacity: 90;')
        html_source = html_source.replace('.popover {\n  position: absolute;\n  top: 0;\n  left: 0;\n  z-index: 1060;\n  display: none;',
                                          '.popover {\n  position: absolute;\n  top: 0;\n  left: 0;\n  z-index: 1060;\n  display: block;')
        # add popover function at the bottom of html
        html_source = html_source.replace(
            '</html>', popover_script + '</html>')
        # convert ipynb to py
        py_source = py_exporter.from_file(os.path.join(
            app.config['UPLOAD_FOLDER'], filename))[0]

        soup = BeautifulSoup(html_source)

        # find decision points and their line numbers
        funcs, linenos = process_file('_', content=py_source)
        code_lines = py_source.split('\n')
        index = 0
        # highlight decision points and add popover windows
        for f, l in zip(funcs, linenos):
            if is_decision_point(f):
                print(f)
                target_line = code_lines[l - 1]
                for i, tag in enumerate(soup.find_all('div', class_='input_area')[index:]):
                    input_content = html_text_exporter.handle(tag.prettify())
                    input_lines = input_content.strip().split('\n')
                    match = False
                    for il in input_lines:
                        if il.strip() == target_line.strip():
                            elements = tag.find_all(
                                'span', text=f.split('.')[-1])
                            for e in elements:
                                if is_target_element(e, target_line):
                                    element = e
                                    break
                            new_element = soup.new_tag('button')
                            new_element.string = f.split('.')[-1]
                            new_element["data-toggle"] = "popover"
                            new_element["data-html"] = "true"
                            new_element["style"] = "background-color:#BEC23F;"
                            new_element["title"] = "Alternatives"
                            new_element["data-content"] = "{}".format(
                                '<br/>'.join(alt[f.split('.')[0]]["similar_sets"][alt[f.split('.')[0]]["func2set"][f]]))
                            element.replaceWith(new_element)
                            match = True
                            break
                    if match:
                        index = i
                        break

        html_source = soup.prettify()

        # dump new html
        with open('./templates/{}'.format(filename.replace('.ipynb', '.html')), 'w') as fout:
            fout.write(html_source)
    else:
        # file not ends with ".ipynb"
        pass
    return render_template(filename.replace('.ipynb', '.html'))