示例#1
0
 def _save_json(self, node):
     '''export the catalog in json'''
     exp = JsonExporter(indent=2, sort_keys=True)
     with open(self.path, 'w') as f:
         exp.write(node, f)
     self._debug('Catalog saved to json \"{}\"'.format(self.path))
     return True
def create(tweetsFile):
    # A node should have an nodeNr (starting on 0), idStr(tweet id), parent.
    propTree = PropTree()  # an instance of a tree
    nodeNr = 0  # to be ordered by time
    unknownNodeNr = 0
    requestCounter = 1
    posts = []
    print(tweetsFile)
    for line in open(tweetsFile, 'r'):
        posts.append(json.loads(line))  # make a list of json arrays
    print("This file contains " + str(len(posts)) + " posts.")
    for post in posts:
        post[
            'tweet_nr'] = nodeNr  # adds a new key, which is the id for a post when in the tree (does this do anything really? Should we include post as a JSON in the AnyNode object?)
        idStr = post['id_str']
        idUser = post['user']['id_str']

        if 'retweeted_status' in post:
            parentIdStr = post['retweeted_status']['id_str']
            parentIdUser = post['retweeted_status']['user']['id_str']
        elif 'quoted_status' in post:
            parentIdStr = post['quoted_status']['id_str']
            parentIdUser = post['quoted_status']['user']['id_str']

        if 'retweeted_status' in post or 'quoted_status' in post:
            # make retweet or quote node
            parentNode = getFriendInTree(propTree, idUser, parentIdStr,
                                         requestCounter, len(posts))
            requestCounter += 1
            if parentNode is None:  # if this node has no parent we want to artificially create one
                parentNodeNr = "ex" + str(
                    unknownNodeNr
                )  # artificial parents can be distinguished by an ex in their id
                parentNode = AnyNode(nodeNr=parentNodeNr,
                                     idStr=parentIdStr,
                                     idUser=parentIdUser)
                propTree.addRoot(parentNode)
                unknownNodeNr += 1
            AnyNode(nodeNr=nodeNr,
                    idStr=idStr,
                    idUser=idUser,
                    parent=parentNode)
        else:
            # this is original content
            reference = AnyNode(nodeNr=nodeNr, idStr=idStr, idUser=idUser)
            propTree.addRoot(reference)
        nodeNr += 1
    propTree.updatePosts(posts)
    exporter = JsonExporter(indent=2, sort_keys=True)

    open('./data/tree/trees/' + tweetsFile[30:-4] + '.txt', 'w').close
    savedFile = open('./data/tree/trees/' + tweetsFile[30:-4] + '.txt', 'r+')
    for root in propTree.roots:
        exporter.write(root, savedFile)
        savedFile.write("&\n")

    savedFile.close()
    propTree.makeNodeTree()
    writeToFile(propTree)
    return propTree
示例#3
0
def main(ioc_file, output_dir):

    with open(ioc_file) as csvfile:
        iocreader = csv.reader(csvfile, delimiter=',')
        for row in iocreader:
            root = AnyNode(id=row[1], type=row[0])

            logger.info('=========Start to explore IOC: %s', root.id)

            ioc_list = build_ioc_relation(root)

            timestamp = datetime.now().strftime('%Y%m%d%H%M')
            query_depth = config.get('general', 'depth')

            txtfile = output_dir + root.id + '_depth_' + query_depth + '_' + timestamp + '.txt'
            file = open(txtfile, "w")
            file.write(str(RenderTree(root)))
            file.close()

            logger.info('Export IOCs to TXT file: %s', txtfile)

            jsonfile = output_dir + root.id + '_depth_' + query_depth + '_' + timestamp + '.json'
            file = open(jsonfile, "w")
            exporter = JsonExporter(indent=2, sort_keys=False)
            exporter.write(root, file)
            file.close()

            logger.info('Export IOCs to JSON file: %s', jsonfile)

            logger.info('=========Done exploration for IOC: %s', root.id)

    return
示例#4
0
def test_json_exporter():
    """Json Exporter."""
    root = AnyNode(id="root")
    s0 = AnyNode(id="sub0", parent=root)
    AnyNode(id="sub0B", parent=s0)
    AnyNode(id="sub0A", parent=s0)
    s1 = AnyNode(id="sub1", parent=root)
    AnyNode(id="sub1A", parent=s1)
    AnyNode(id="sub1B", parent=s1)
    s1c = AnyNode(id="sub1C", parent=s1)
    AnyNode(id="sub1Ca", parent=s1c)

    lines = [
        '{',
        '  "children": [',
        '    {',
        '      "children": [',
        '        {',
        '          "id": "sub0B"',
        '        },',
        '        {',
        '          "id": "sub0A"',
        '        }',
        '      ],',
        '      "id": "sub0"',
        '    },',
        '    {',
        '      "children": [',
        '        {',
        '          "id": "sub1A"',
        '        },',
        '        {',
        '          "id": "sub1B"',
        '        },',
        '        {',
        '          "children": [',
        '            {',
        '              "id": "sub1Ca"',
        '            }',
        '          ],',
        '          "id": "sub1C"',
        '        }',
        '      ],',
        '      "id": "sub1"',
        '    }',
        '  ],',
        '  "id": "root"',
        '}'
    ]

    exporter = JsonExporter(indent=2, sort_keys=True)
    exported = exporter.export(root).split("\n")
    exported = [e.rstrip() for e in exported]  # just a fix for a strange py2x behavior.
    eq_(exported, lines)
    with NamedTemporaryFile(mode="w+") as ref:
        with NamedTemporaryFile(mode="w+") as gen:
            ref.write("\n".join(lines))
            exporter.write(root, gen)
            assert filecmp.cmp(ref.name, gen.name)
示例#5
0
def _serialize_ontology(root, filename=None):
  """Serializes an ontology given by its root to a JSON file.

  If no output filename is given, return the serialized as string.
  """
  exporter = JsonExporter(indent=2, sort_keys=True)
  if filename:
    exporter.write(root, filename)
  else:
    return exporter.export(root)
示例#6
0
    def export_tree(self, filename):
        exporter_dict = DictExporter(dictcls=OrderedDict, attriter=sorted)
        self.export_cst_dict = exporter_dict.export(self.cst)

        exporter_json = JsonExporter(indent=2, sort_keys=True)
        with open(filename, 'w') as filehandle:
            exporter_json.write(self.cst, filehandle)

        print('CST tree export to JSON successful!')
        return
示例#7
0
def main():
    """Make a full tree from the default targets, and export it in graphviz and JSON form."""
    tree = make_tree()
    DotExporter(tree).to_dotfile('full_tree.dot')

    with open('full_tree.json', 'w') as f:
        exporter = JsonExporter(indent=4, sort_keys=True)
        exporter.write(tree, f)

    print(f'node count: {len(tree.descendants)}')
示例#8
0
 def save_to_file(self, path, start_node=None):
     path = osp.expanduser(path)
     if osp.exists(osp.dirname(path)):
         exp = JsonExporter(indent=2, sort_keys=True, default=tools.default)
         with open(path, "w") as f:
             if start_node is None:
                 exp.write(self._tree, f)
             else:
                 exp.write(start_node, f)
     else:
         print("Error saving to disk. Dir {} not existing.".format(
             osp.dirname(path)))
示例#9
0
def test_json_exporter():
    """Json Exporter."""
    root = AnyNode(id="root")
    s0 = AnyNode(id="sub0", parent=root)
    AnyNode(id="sub0B", parent=s0)
    AnyNode(id="sub0A", parent=s0)
    s1 = AnyNode(id="sub1", parent=root)
    AnyNode(id="sub1A", parent=s1)
    AnyNode(id="sub1B", parent=s1)
    s1c = AnyNode(id="sub1C", parent=s1)
    AnyNode(id="sub1Ca", parent=s1c)

    exporter = JsonExporter(indent=2, sort_keys=True)
    exported = exporter.export(root).split("\n")
    exported = [e.rstrip()
                for e in exported]  # just a fix for a strange py2x behavior.
    lines = [
        '{', '  "children": [', '    {', '      "children": [', '        {',
        '          "id": "sub0B"', '        },', '        {',
        '          "id": "sub0A"', '        }', '      ],',
        '      "id": "sub0"', '    },', '    {', '      "children": [',
        '        {', '          "id": "sub1A"', '        },', '        {',
        '          "id": "sub1B"', '        },', '        {',
        '          "children": [', '            {',
        '              "id": "sub1Ca"', '            }', '          ],',
        '          "id": "sub1C"', '        }', '      ],',
        '      "id": "sub1"', '    }', '  ],', '  "id": "root"', '}'
    ]
    eq_(exported, lines)

    exporter = JsonExporter(indent=2, sort_keys=True, maxlevel=2)
    exported = exporter.export(root).split("\n")
    exported = [e.rstrip()
                for e in exported]  # just a fix for a strange py2x behavior.
    limitedlines = [
        '{', '  "children": [', '    {', '      "id": "sub0"', '    },',
        '    {', '      "id": "sub1"', '    }', '  ],', '  "id": "root"', '}'
    ]

    eq_(exported, limitedlines)

    try:
        with NamedTemporaryFile(mode="w+", delete=False) as ref:
            with NamedTemporaryFile(mode="w+", delete=False) as gen:
                ref.write("\n".join(lines))
                exporter.write(root, gen)
        # on Windows, you must close the files before comparison
        filecmp.cmp(ref.name, gen.name)
    finally:
        os.remove(ref.name)
        os.remove(gen.name)
示例#10
0
    def to_json(self, sink= None, **kwargs):
        """ writes region tree info to json

        Arguments:
            sink (str or None): file to save to. if None, will return json object.
            kwargs: addtional arguments to pass to anytree.exporter.jsonexporter.JsonExporter and json.dumps.
        """

        exporter = JsonExporter(indent=2, **kwargs)
        if sink:
            with open(sink, 'w') as outfile:
                exporter.write(self.tree_root, outfile)
            return sink
        else:
            data = exporter.export(self.tree_root)
            return data
示例#11
0
def createJson():
    os.chdir(os.path.dirname(__file__))
    os.chdir("..")
    os.chdir("data")
    philFile = open('PhilpapersTaxonomy.txt', 'r')

    nodes = {"1":Node("root")}

    #for every line in our extracted philpapers taxonomy, make a new node for our tree
    for line in reader(philFile):
        #get just the integers from our ID (get rid of spaces, commas, etc...) though we keep them as strings
        currID=''.join(filter(str.isdigit, line[1]))

        #create N nodes. the key for each node is the ID from philFile, meaning that we can
        #access any arbitrary category using its ID
        nodes[currID]=(Node("temp"))

    #reset out position in philFile so we can restart from the front
    philFile.seek(0)

    #remember that our file is organized as follows ["name" "ID" "parent IDs" "primary parent ID"]
    for line in reader(philFile):
        #Take the ID number of the category and find the corresponding node from our dictionary
        #after finding said node, we set its parent node to the corresponding node from our dictionary
        #We also get just the integers from our ID (get rid of spaces, commas, etc...) though we keep them
        #as strings so that they play nice with the dictionary
        currID=''.join(filter(str.isdigit, line[1]))
        currParentID=''.join(filter(str.isdigit, line[len(line)-1]))
        currName = line[0]
        currName=str(currName)

        #set the current node's name to the corresponding name
        nodes[currID].name = currName
        #we then set the parent ID to the primary parent ID
        nodes[currID].parent=nodes[currParentID]

    philFile.close()

    #export our tree to json
    exporter = JsonExporter(indent=2, sort_keys=True)
    with open('data.json', 'w') as f:
        exporter.write(nodes["1"],f)

    data = pd.read_json("data.json")
    df = pd.DataFrame(data['children'])
def saving_tree_test():
    # For now user should start by creating a root node
    root_node = Node(root)

    # Maybe the user wants to create more nodes to add to the tree
    a_node = Node(_a)
    b_node = Node(_b)

    # Then user should create a tree and initialize it with a root node
    tree_to_save = TTree("root", root_node)

    # Then add nodes to the tree
    tree_to_save.add_node(root_node, a_node)
    tree_to_save.add_node(root_node, b_node)

    """ Tree in this example looks like this...
    *    root (0)
    *    ├── _a (1)
    *    └── _b (2)
    """

    print('\n')
    print("Confirm that tree matches example code:")
    tree_to_save.print_tree(True)
    print('\n')

    from anytree.exporter import JsonExporter

    # The default lambda expression tells json what the default value of an
    # objects stuff should be if the value cannot be serialized
    js_exporter = JsonExporter(
        indent=2, sort_keys=True, default=lambda o: '<not serializable>')

    with open("./ts_modeling/saved_trees/tree_to_save.json", 'w') as js_file:
        js_exporter.write(tree_to_save.root, js_file)
        print("Here is the json formatting:")
        print(js_exporter.export(tree_to_save.root))
        print('\n')
示例#13
0
    sentences = list(map(lambda x: x.lower(), sentences))

    for index in range(len(sentences)):
        extracted_sentence = sentences[index]
        correct_tree = None
        all_trees = parser(extracted_sentence, grammar_path)

        tree = best_tree(all_trees)
        semantic = str(tree.label()['SEM'])

        k = 0
        for i in range(len(correct_regex)):
            if re.match(correct_regex[i], semantic):
                k = i
                break

        print("Match with RegExpr {}.\n{}\n".format(str(k + 1), semantic))

        root = None
        if k == 0:
            root = sentence_1(tree)
        elif k == 1:
            root = sentence_2(tree)
        elif k == 2:
            root = sentence_3(tree)

        exporter = JsonExporter(indent=2, sort_keys=True)
        with open('../output/' + 'sentence_plan_' + str(index) + '.json',
                  'w') as file:
            exporter.write(root, file)
示例#14
0
def create(tweetsFile, generalFileName):
    # A node should have an nodeNr (starting on 0), idStr(tweet id), parent.
    propTree = PropTree()  # an instance of a tree
    nodeNr = 0  # to be ordered by time
    unknownNodeNr = 0
    requestCounter = 1
    posts = []
    for line in open(tweetsFile, 'r'):
        posts.append(json.loads(line))  # make a list of json arrays
    print(len(posts))
    quotesAndRetweets = 0
    repostedUsers = {}
    for post in posts:
        userID = None
        if 'retweeted_status' in post:
            userID = post['retweeted_status']['user']['id_str']
        elif 'quoted_status' in post:
            userID = post['quoted_status']['user']['id_str']
        if userID is not None:
            quotesAndRetweets += 1
            if userID in repostedUsers:
                repostedUsers[userID] += 1
            else:
                repostedUsers[userID] = 1
    for post in posts:
        post['tweet_nr'] = nodeNr  # adds a new key, which is the id for a post when in the tree (does this do anything really? Should we include post as a JSON in the AnyNode object?)
        idStr = post['id_str']
        idUser = post['user']['id_str']
        timeStamp = post['created_at']
        followerCount = post['user']['followers_count']
        if 'retweeted_status' in post:
            parentIdStr = post['retweeted_status']['id_str']
            parentIdUser = post['retweeted_status']['user']['id_str']
            parentTimeStamp = post['retweeted_status']['created_at']
            parentFollowerCount = post['retweeted_status']['user']['followers_count']
        elif 'quoted_status' in post:
            parentIdStr = post['quoted_status']['id_str']
            parentIdUser = post['quoted_status']['user']['id_str']
            parentTimeStamp = post['quoted_status']['created_at']
            parentFollowerCount = post['quoted_status']['user']['followers_count']
        if 'retweeted_status' in post or 'quoted_status' in post:
            # make retweet or quote node
            parentNode = getFriendInTree(propTree, idUser, parentIdStr, parentIdUser, requestCounter, len(posts))
            requestCounter += 1
            if parentNode is None:  # if this node has no parent we want to artificially create one
                parentNodeNr = "x" + str(unknownNodeNr)  # artificial parents can be distinguished by an ex in their id
                parentNode = AnyNode(nodeNr=parentNodeNr, idStr=parentIdStr, idUser=parentIdUser, time=parentTimeStamp, followerCount=parentFollowerCount)
                propTree.addRoot(parentNode)
                if str(parentIdUser) in repostedUsers:
                    if int(repostedUsers[parentIdUser]) > int(parentFollowerCount)/5000:
                        if parentIdUser not in propTree.rootFollowers:
                            propTree.addRootFollowers(parentIdUser, getFollowers(parentIdUser, requestCounter, len(posts)))
                            requestCounter += 1
                unknownNodeNr += 1
            AnyNode(nodeNr=nodeNr, idStr=idStr, idUser=idUser, parent=parentNode, time=timeStamp, followerCount=followerCount)
        else:
            # this is original content
            reference = AnyNode(nodeNr=nodeNr, idStr=idStr, idUser=idUser, time=timeStamp, followerCount=followerCount)
            propTree.addRoot(reference)
            if str(idUser) in repostedUsers:
                if int(repostedUsers[idUser]) > int(followerCount)/5000:
                    if idUser not in propTree.rootFollowers:
                        propTree.addRootFollowers(idUser, getFollowers(idUser, requestCounter, len(posts)))
                        requestCounter += 1
        nodeNr += 1
    propTree.updatePosts(posts)
    exporter = JsonExporter(indent=2, sort_keys=True)
    saveFileName = propTree.getFileName()
    open('./data/tree/trees/other/' + saveFileName + '.txt', 'w').close
    savedFile = open('./data/tree/trees/other/' + saveFileName + '.txt', 'w')
    for root in propTree.roots:
        exporter.write(root, savedFile)
        savedFile.write("&\n")

    savedFile.close()
    writeToFile(propTree, generalFileName)
    return propTree
示例#15
0
                    hasDependencies=dep["node"]["hasDependencies"],
                    parent=library,
                )
                if dep["node"]["repository"]["licenseInfo"] is not None:
                    tnode.licenseString = dep["node"]["repository"][
                        "licenseInfo"]["spdxId"]
            else:
                print(dep)
                tnode = SbomLibrary(
                    dep["node"]["packageName"],
                    version=dep["node"]["requirements"],
                    packageManager=dep["node"]["packageManager"],
                    hasDependencies=False,
                    incompleteReason="Unable to determine repository host",
                    parent=library,
                )


foom = SbomLibrary(
    "snipe-it",
    packageRepositoryURL="https://github.com/snipe/snipe-it",
    hasDependencies=True,
)
retrieveDependencies(foom)
# for child in foom.children:
#    retrieveDependencies(child)

with open("out.json", "w") as f:
    exporter = JsonExporter(indent=2, sort_keys=True)
    exporter.write(foom, f)
示例#16
0
    json_structures.remove(structure)

index = 0
while (len(json_structures) > 0):
    structure = json_structures[index]
    found = find_by_attr(root, name="id", value=structure["parentId"])
    if found:
        content = json_structures.pop(index)["documents"][0]["content"]
        content = REGEX_WRAP.findall(content.strip())[0]
        links = REGEX_LINK.finditer(content)
        for link in links:
            content = content.replace(link.group(0),
                                      "@JournalEntry[" + link.group(1) + "]")

        special = REGEX_SPEC.finditer(content)
        for spec in special:
            content = content.replace(spec.group(0), "")

        node = AnyNode(id=structure["id"],
                       parent=found,
                       name=structure["name"],
                       data=content)
        index = 0
    else:
        index = index + 1
        if index > len(json_structures):
            index = 0

exporter = JsonExporter()
exporter.write(root, open(args.output, "w"))
示例#17
0
文件: draw.py 项目: Pex2892/PETAL
def export_tree_in_json(tree, path):
    f = open(os.path.join(path, 'data-flare.json'), 'w')
    exporter = JsonExporter(indent=4)
    exporter.write(tree, f)
示例#18
0
    def analyze(self):
        """Do everything."""
        def reset_display():
            # Reset display
            self.suffix = "?JSTool=none"
            self.script_buttons.clear()
            self.choice_boxes.clear()
            self.number_of_buttons = 0
            # self.diff_btn.Show()
            self.apply_btn.Show()
            self.save_btn.Show()
            self.content_panel.Show()
            self.content_text.SetValue("Script code")
            while self.script_sizer.GetChildren():
                self.script_sizer.Hide(0)
                self.script_sizer.Remove(0)
            self.images.clear()

        def get_index_html():
            # Get index.html from remote proxy
            return get_resource(self.url)

        def parse_html(html: str):
            # Add index.html scripts to self.script_tree
            cnt = 1
            if not html:
                return
            while "<script" in html:
                src = ""
                script_name = "script" + str(cnt)
                start_index = html.find("<script")
                end_index = html.find("</script>")
                text = html[start_index:end_index + 9]
                new_node = AnyNode(id=script_name,
                                   parent=self.script_tree,
                                   content=text,
                                   vector=extract_features(text),
                                   count=1)
                if ' src="' in text:  # BeautifulSoup turns all single quotes into double quotes
                    src = text.split(' src="')[1].split('"')[0]
                    src = self.format_src(src)
                    try:
                        node = anytree.cachedsearch.find(
                            self.script_tree, lambda node: node.id == src)
                    except anytree.search.CountError:
                        logging.warning(
                            'multiple possible parents: more than one node with id = %s',
                            src)
                    if node:
                        node.parent = new_node
                html = html.replace(text, "\n<!--" + script_name + "-->\n")
                cnt += 1

        def create_buttons():
            # Add checkboxes to display
            # Check all
            self.add_button('Check all', 0, 1, None)

            index = 1
            # All other script checkboxes
            for node in PreOrderIter(self.script_tree):
                if node.is_root:
                    continue
                node.button = index
                # vector = extract_features(node.content)
                self.add_button(node.id, index, node.depth,
                                get_attribute(node, 'vector'))  # node.count
                checkbox = self.script_buttons[index]
                if (get_attribute(checkbox, 'confidence') is not None
                        and get_attribute(
                            checkbox, 'confidence') < CONFIDENCE_THRESHOLD):
                    # run clustering if confidence less than threshold
                    checkbox.category = CLUSTER.predict(script=str(
                        node.content),
                                                        preprocess=True)
                    label = get_attribute(checkbox, 'label')
                    if label:
                        label.SetLabel(checkbox.category)
                        label.SetBackgroundColour(
                            tuple(CATEGORIES[checkbox.category]['color']))
                        label.SetToolTip(
                            CATEGORIES[checkbox.category]['description'])
                if get_attribute(checkbox,
                                 'category') not in BLOCKED_CATEGORIES:
                    # ads / marketing scripts disabled by default
                    try:
                        if node.id[:6] != "script":
                            self.blocked_urls.remove(node.id)
                    except ValueError:
                        logging.debug("Could not remove %s from blocked urls",
                                      node.id)
                    self.check_boxes(True, node)
                index += 1
            self.scripts_panel.SetSizer(self.script_sizer)
            self.frame.frame_sizer.Layout()

        def functional_dependency():
            # functional dependencies?
            try:
                tmp_dep = perf.get_dependency(self.url)
                # tmp_dep = [['https://ws.sharethis.com/button/async-buttons.js', 'https://www.google-analytics.com/analytics.js', 'https://ws.sharethis.com/button/buttons.js'], ['https://www.googletagmanager.com/gtm.js?id=GTM-WBDQQ5', 'https://www.googleadservices.com/pagead/conversion_async.js'], ['https://www.unicef.org/sites/default/files/js/js_B7pS3ddmNLFYOJi3j28odiodelMu-EhaOeKlHZ8E6y0.js', 'https://www.unicef.org/themes/custom/unicef/assets/src/js/init-blazy.js?v=1.x', 'https://www.unicef.org/sites/default/files/js/js_dWWS6YNlsZWmXLboSy3PIiSD_Yg3sRxwjbMb52mdNyw.js', 'https://www.unicef.org/sites/default/files/js/js_cLlwgRdoiVfjtFxLqlXX-aVbv3xxfX_uMCsn7iJqNpA.js']]

                print("\n\n-------- DEPENDENCY LABELS CHANGED --------")
                mapping = {'non-critical': 0, 'translatable': 1, 'critical': 2}
                mapping2 = {
                    0: 'non-critical',
                    1: 'translatable',
                    2: 'critical'
                }
                for a in tmp_dep:
                    tmp_label = 0

                    for i in a:
                        if i not in self.yasir or self.yasir[
                                i].category not in mapping:
                            continue

                        if mapping[self.yasir[i].category] > tmp_label:
                            tmp_label = mapping[self.yasir[i].category]

                    for i in a:
                        if i not in self.yasir or self.yasir[
                                i].category not in mapping:
                            continue

                        if self.yasir[i].category != mapping2[tmp_label]:
                            print("****", i, mapping2[tmp_label],
                                  self.yasir[i].category)

                print("\n\n")
            except RuntimeError:
                pass

        def display_loading_message():
            # Never managed to get this part to display before spinning wheel of death
            self.err_msg.SetForegroundColour((0, 0, 0))
            self.err_msg.SetLabel("Loading page... please wait")
            self.Update()

        def similarity():
            # Print script pairs in self.script_tree with Jaccard similarity > SIMILARITY_THRESHOLD
            names = []
            scripts = []
            for node in PreOrderIter(self.script_tree):
                if node.is_root:
                    continue
                names.append(node.id)
                scripts.append(str(node.content))
            results = similarity_comparison(scripts, SIMILARITY_THRESHOLD)
            if results:
                print("---" * 20)
                print('scripts with similarity > %.2f' % SIMILARITY_THRESHOLD)
            for tup in results:
                print('%s %s %.2f' % (names[tup[0]], names[tup[1]], tup[2]))

        def compare_image_sizes(images):
            # Print difference in original and rendered image sizes for image URLs in images
            for url in images:
                if url[:4] == 'data':
                    # URI rather than URL
                    url = url.partition(';')[-1]
                    body = url.partition(',')[-1]
                    if url[:6] == 'base64':
                        body = base64.b64decode(body)
                else:
                    body = get_resource(url)
                try:
                    stream = BytesIO(body)
                except TypeError:
                    logging.warning("body in %s, not in bytes", type(body))
                    stream = BytesIO(body.encode(ENCODING))
                try:
                    width, height = get_image_size_from_bytesio(
                        stream, DEFAULT_BUFFER_SIZE)
                    self.images[url] = {}
                    self.images[url]['ow'] = width
                    self.images[url]['oh'] = height
                except UnknownImageFormat as error:
                    logging.exception(str(error))
                except struct.error as error:
                    logging.error(str(error))

            for img in self.driver.find_elements_by_tag_name('img'):
                url = img.get_attribute('src')
                if url not in self.images.keys():
                    self.images[url] = {}
                self.images[url]['rw'] = img.size['width']
                self.images[url]['rh'] = img.size['height']

            logging.info("---" * 20)
            logging.info("potential image improvements:")
            for url, dimensions in self.images.items():
                if len(dimensions.keys()) == 4:
                    # Successfully parsed original and rendered dimensions
                    logging.info(url)
                    logging.info("original: %d x %d", dimensions['ow'],
                                 dimensions['oh'])
                    logging.info("rendered: %d x %d", dimensions['rw'],
                                 dimensions['rh'])

        display_loading_message()

        # Reset values
        self.url = self.url_input.GetValue()
        if self.url[-1] != "/":
            self.url = self.url + "/"
        if not self.url:
            return
        reset_display()
        self.script_tree = AnyNode(id=self.url)

        try:
            file_path = PATH + "/reports/" + self.url.split("/")[2]
            if not os.path.exists(file_path):
                os.mkdir(file_path)
            with open(file_path + "/script_tree.txt", 'r') as f:
                logging.debug('importing script tree...')
                importer = JsonImporter()
                self.script_tree = importer.read(f)
            with open(file_path + "/images.json", 'r') as f:
                images = json.load(f)

        except FileNotFoundError:
            logging.debug('script tree does not yet exist, building now')
            # Get original page and parse external scripts
            self.driver.execute_cdp_cmd('Network.setBlockedURLs', {'urls': []})
            epoch_in_milliseconds = time.time() * 1000
            try:
                self.driver.get(self.url)
                self.err_msg.SetLabel("")
            except InvalidArgumentException as exception:
                self.err_msg.SetForegroundColour((255, 0, 0))  # make text red
                self.err_msg.SetLabel(str(exception))
                return
            self.wait_for_load()
            self.script_tree = AnyNode(id=self.url)
            scripts, images = self.parse_log(epoch_in_milliseconds)
            for script in scripts:
                # pylint: disable=undefined-loop-variable
                # pylint: disable=cell-var-from-loop
                parent = anytree.cachedsearch.find(
                    self.script_tree,
                    lambda node: node.id == self.format_src(script['parent']))
                # Check if this node already exists
                node = anytree.cachedsearch.find(
                    self.script_tree,
                    lambda node: node.id == self.format_src(script['url']))
                if node and node.parent == parent:
                    logging.warning('duplicate script! %s',
                                    self.format_src(script['url']))
                    node.count += 1
                else:
                    AnyNode(id=self.format_src(script['url']),
                            parent=parent,
                            content=script['content'],
                            vector=extract_features(script['content']),
                            count=1)

            # Check image differences
            compare_image_sizes(images)

            # Parse inline scripts
            html = get_index_html()
            parse_html(html)
            # self.print_scripts()

            # Export script tree
            logging.debug('exporting script tree...')
            exporter = JsonExporter()
            with open(
                    PATH + "/reports/" + self.url.split("/")[2] +
                    "/script_tree.json", "w") as f:
                exporter.write(self.script_tree, f)
            logging.debug('done')

            # Export images
            with open(
                    PATH + "/reports/" + self.url.split("/")[2] +
                    "/images.json", "w") as f:
                json.dump(images, f)

        # Check similarity
        # similarity()

        # Create buttons
        self.block_all_scripts()
        create_buttons()

        # Print functional dependencies
        # functional_dependency()

        # Get page with all scripts removed
        self.on_apply_press()

        try:
            self.original.get(self.url)
        except InvalidArgumentException as e:
            logging.error(e.what())
示例#19
0
#Returns:
# 1 - nothing, but node will be a full tree once it returns
def get_label_tree(label_list_to_do, node):
    if len(label_list_to_do) == 0:
        return
    else:
        for label in label_list_to_do:
            try:
                time.sleep(1)
                #label_names.append(str(label.id) + " " + label.name)
                #print(str(label.id) +" "+ label.name)
                curr_node = Node(label.name,parent = node)
                get_label_tree(label.sublabels,curr_node)
            except:
                return
        return

#Discogs Client Initialization
d = discogs_client.Client('getalluniversallabels/0.1',
                          user_token='NMEOClFdbylQxiIwvtLKvwIJioGlKIdzQFxDlVzQ')

boss_label = d.label(38404) # 38404 is the Discogs ID for UMG
root = Node("Universal Music Group")
get_label_tree(boss_label.sublabels,root)

#Exporting to JSON
exporter = JsonExporter(indent = 2, sort_keys = False)
#exports to tree.json file
fh = open("tree.json","w")
exporter.write(root,fh)
 def to_json(self, filename='minmax.json'):
     from anytree.exporter import JsonExporter
     exporter = JsonExporter(indent=2)
     with open(filename, 'w') as file:
         exporter.write(self._root, file)
import anytree as at
import commons as com
import pandas as pd
from anytree.exporter import JsonExporter

symptom_df =pd.read_csv(com.BASEDIR + "csvs/symptom_tree_MEDCIN/symptom_tree_MEDCIN.csv")


def add_children(parent_node, parent_aui):
    print("add children of: ", parent_node)
    for index in (symptom_df.index[symptom_df['PAUI'] == parent_aui].tolist()):
        child_aui = symptom_df.iloc[index, 0] # AUI is the first column
        child_cui = symptom_df.iloc[index, 2] # where here CUI is the third column
        child_name = symptom_df.iloc[index, 3] # the name of the child
        child_node = at.Node(child_cui, parent=parent_node, concept_name=child_name)
    
        add_children(child_node, child_aui)


root_cui = "C1457887"
root_aui = "A21010092"
root_node = at.Node(root_cui, concept_name="symptoms")

add_children(root_node, root_aui)

exporter = JsonExporter(indent=2, sort_keys=True)
fh = open("/home/niksart/...", "w")
exporter.write(root_node, fh)
示例#22
0
文件: SeaCOW.py 项目: rsling/seacow
class DependencyBuilder(Processor):
  """SeaCOW processor class for concordance writing"""

  def __init__(self):
    self.column_index    = None
    self.column_head     = None
    self.column_relation = None
    self.column_token    = None
    self.fileprefix      = None
    self.savejson        = False
    self.saveimage       = None   # others: 'png' or 'dot'
    self.printtrees      = False
    self.imagemetaid1    = None
    self.imagemetaid2    = None

  def prepare(self, query):

    if self.saveimage and not self.imagemetaid1:
      raise ProcessorError('You cannot save to image files without setting at least imagemetaid1.')

    if not (self.column_token, self.column_index and self.column_head and self.column_relation):
      raise ProcessorError('You have to set the column indices for the dependency information.')

    self.has_attributes = True if len(query.attributes) > 1 else False
    self.rex            = re.compile('^<.+>$')

    if self.savejson:
      self.exporter = JsonExporter(indent = 2, sort_keys = False)
      self.writer = open(self.fileprefix + '.json', 'w')

  def finalise(self, query):
    return True

    if self.savejson:
      self.writer.close()


  def filtre(self, tree, line):
    return True

  def process(self, query, region, meta, match_offset, match_length):

    # Turn Mantee stuff into usable structure.
    line         = cow_region_to_conc(region, self.has_attributes)

    # Find true tokens via indices (not structs) for separating match from context.
    # Turn everything into nodes already - to be linked into tree in next step.
    indices      = [i for i, s in enumerate(line) if not self.rex.match(s[0])]
    nodes        = [Node("0", token = "TOP", relation = "", head = "", linear = 0, meta = dict(zip(query.references, meta))),] + \
                     [Node(make_token_safe(line[x][self.column_index]),
                     token    = line[x][self.column_token],
                     relation = line[x][self.column_relation],
                     head     = line[x][self.column_head],
                     linear   = int(line[x][self.column_index]),
                     **dict(zip([query.attributes[a] for a in self.attribs], [line[x][a] for a in self.attribs])) ) for x in indices]

    # Build tree from top.
    for n in nodes[1:]:
      n.parent = next((x for x in nodes if x.name == n.head), None)

    # If a descendant implements the filter, certain structures can be
    # discarded.
    if not self.filtre(nodes, line):
      return

    # Export as desired. Three independent formats.
    if self.printtrees:
      for pre, _, node in RenderTree(nodes[0]):
        print("%s%s (%s)" % (pre, node.token, node.name))

    if self.savejson:
      self.exporter.write(nodes[0], self.writer)

    if self.saveimage:
      fnam = self.fileprefix + '_' + meta[self.imagemetaid1]
      if self.imagemetaid2:
        fnam = fnam + '_' + meta[self.imagemetaid2]
      if self.saveimage is 'dot':
        DotExporter(nodes[0]).to_dotfile(fnam + '.dot')
      elif self.saveimage is 'png':
        DotExporter(nodes[0], edgeattrfunc = edgeattrfunc, nodenamefunc = nodenamefunc).to_picture(fnam + '.png')
示例#23
0
    def toJson(self, root):
        exporter = JsonExporter(indent=2, sort_keys=False)
        print(exporter.export(root))

        with open('trie_disk.json', 'w') as f:
            exporter.write(root, f)