Пример #1
0
    def compare(self, text_block1, text_block2):

        self.svg = SVG(insert=(self.pos_x, self.pos_y),
                       font_family=self.font_family,
                       font_size=self.font_size)

        _len_textblock1 = len(text_block1)
        _len_textblock2 = len(text_block2)

        if text_block1 == text_block2:
            _fill = rgb(0x00, 0x0, 0x0)
            self.build_svg_line(text_block2, _fill)

        elif _len_textblock1 > 0 and _len_textblock2 > 0:
            _matcher = SequenceMatcher(None, text_block1, text_block2)

            for tag, _s1, _e1, _s2, _e2 in _matcher.get_opcodes():

                if tag == "replace":
                    _text = text_block2[_s2:_e2]
                    _fill = rgb(0x00, 0x80, 0xff)
                    self.build_svg_line(_text, _fill)

                elif tag == "delete":
                    pass

                elif tag == "insert":
                    _text = text_block2[_s2:_e2]
                    _fill = rgb(0x00, 0x80, 0xff)
                    self.build_svg_line(_text, _fill)

                elif tag == "equal":
                    _text = text_block1[_s1:_e1]
                    _fill = rgb(0x0, 0x0, 0x0)
                    self.build_svg_line(_text, _fill)

        elif _len_textblock1 == 0 and _len_textblock2 > 0:
            _fill = rgb(0x00, 0x0, 0x0)
            self.build_svg_line(text_block2, _fill)

        elif _len_textblock1 > 0 and _len_textblock2 == 0:
            _fill = rgb(0x00, 0x0, 0x0)
            self.build_svg_line(text_block1, _fill)

        else:
            _fill = rgb(0x00, 0x0, 0x0)
            self.build_svg_line("", _fill)

        if self.svg_text is not None:
            self.svg_text["x"] = 0
            self.svg_text["y"] = self.height
            self.svg.add(self.svg_text)

            self.height += self.height_line * 0.3
            self.svg_text = None

        self.svg['height'] = self.height
        self.svg['width'] = self.width_max

        return self.svg, self.width_max, self.height
Пример #2
0
    def __init__(self):

        self.dwg = None
        self.pos_x = 0
        self.pos_y = 0
        self.pos_y_max = 0
        self.pos_x_max = 0
        self.unit = 10
        self.font_size = 10
        self.font_family = "Lucida Console"
        self.filepath = None

        render_text.Render.set_font_family(self.font_family)
        render_text.Render.set_font_size(self.font_size)

        self.dwg = svgwrite.Drawing()
        self._move_right()

        _svg = SVG(insert=(self.pos_x, self.pos_y))

        for _class in base.gen_available_dx_node_types():
            _svg.add(self.add_line(_class))

        _svg["width"] = self.pos_x_max
        _svg["height"] = self.pos_y_max
        self.dwg["width"] = self.pos_x_max
        self.dwg["height"] = self.pos_y_max

        self.dwg.add(_svg)
Пример #3
0
 def test_add_svg_as_subelement(self):
     svg = SVG(id='svg')
     subsvg = SVG(id='subsvg')
     svg.add(subsvg)
     self.assertEqual(
         svg.tostring(),
         '<svg id="svg"><defs /><svg id="subsvg"><defs /></svg></svg>')
Пример #4
0
    def add_line(self, instance_xtype):
        '''
        Draw svg line representing DiffxElement.

        :param instance_xtype: XTypes.DiffxElement
        '''

        _text = instance_xtype.name()
        _w, _h = render_text.Render.get_text_size(_text)

        _h += _h * 0.25

        _svg = SVG(insert=(self.pos_x, self.pos_y), width=_w, height=_h)

        _text_svg = Text(_text)
        _text_svg['x'] = 0
        _text_svg['y'] = _h - _h * 0.25
        _text_svg['font-size'] = self.font_size
        _text_svg['font-family'] = self.font_family
        _text_svg['opacity'] = 1.0
        _text_svg['fill'] = rgb(0, 0, 0)

        _rect_svg = Rect()
        _rect_svg['x'] = 0
        _rect_svg['y'] = 0
        _rect_svg['fill'] = instance_xtype.fill
        _rect_svg['opacity'] = instance_xtype.opacity
        _rect_svg['height'] = _h
        _rect_svg['width'] = _w

        _svg.add(_text_svg)
        _svg.add(_rect_svg)

        _svg.viewbox(0, 0, _w, _h)

        self.pos_y = self.pos_y + _h
        self.pos_x_max = max(self.pos_x_max, _w + self.pos_x)
        self.pos_y_max = max(self.pos_y_max, self.pos_y)

        return _svg
Пример #5
0
    def add_text_box(self, dx_nodes):
        '''
        Simple text box with fixed width.

        :param dx_nodes: XTypes.DiffxElement
        '''
        _text = self.get_element_text(dx_nodes.node)
        _lines = self._lines_callback(_text)

        _y = copy.deepcopy(self.pos_y)

        _svg = SVG(insert=(self.pos_x, self.pos_y))
        _t = Text('',
                  insert=(0, 0),
                  font_size=self.font_size,
                  font_family=self.font_family)

        _h = 0
        _w = 0
        for _line, _width, _height in _lines:
            _h = _h + float(_height)
            _w = max(_w, float(_width))

            _text = TSpan(_line, fill="black", insert=(0, _h))
            _t.add(_text)

        self.pos_y = self.pos_y + _h
        self.pos_y_max = max(self.pos_y_max, self.pos_y)
        self.pos_x_max = max(self.pos_x_max, _w + self.pos_x)

        _svg['height'] = _h
        _svg['width'] = _w
        _svg.viewbox(0, 0, _w, _h)

        _svg.add(_t)

        return _svg
Пример #6
0
    def process(self):
        # parse parameters
        input_words = self.parameters.get("words", "")
        if not input_words or not input_words.split(","):
            self.dataset.update_status(
                "No input words provided, cannot look for similar words.",
                is_final=True)
            self.dataset.finish(0)
            return

        input_words = input_words.split(",")

        try:
            threshold = float(
                self.parameters.get("threshold",
                                    self.options["threshold"]["default"]))
        except ValueError:
            threshold = float(self.options["threshold"]["default"])

        threshold = max(-1.0, min(1.0, threshold))
        num_words = convert_to_int(self.parameters.get("num-words"),
                                   self.options["num-words"]["default"])
        overlay = self.parameters.get("overlay")
        reduction_method = self.parameters.get("method")
        all_words = self.parameters.get("all-words")

        # load model files and initialise
        self.dataset.update_status("Unpacking word embedding models")
        staging_area = self.unpack_archive_contents(self.source_file)
        common_vocab = None
        vector_size = None
        models = {}

        # find words that are common to all models
        self.dataset.update_status("Determining cross-model common vocabulary")
        for model_file in staging_area.glob("*.model"):
            if self.interrupted:
                shutil.rmtree(staging_area)
                raise ProcessorInterruptedException(
                    "Interrupted while processing word embedding models")

            model = KeyedVectors.load(str(model_file)).wv
            models[model_file.stem] = model
            if vector_size is None:
                vector_size = model.vector_size  # needed later for dimensionality reduction

            if common_vocab is None:
                common_vocab = set(model.vocab.keys())
            else:
                common_vocab &= set(model.vocab.keys())  # intersect

        # sort common vocabulary by combined frequency across all models
        # this should make filtering for common words a bit faster further down
        self.dataset.update_status("Sorting vocabulary")
        common_vocab = list(common_vocab)
        common_vocab.sort(key=lambda w: sum(
            [model.vocab[w].count for model in models.values()]),
                          reverse=True)

        # initial boundaries of 2D space (to be adjusted later based on t-sne
        # outcome)
        max_x = 0.0 - sys.float_info.max
        max_y = 0.0 - sys.float_info.max
        min_x = sys.float_info.max
        min_y = sys.float_info.max

        # for each model, find the words that we may want to plot - these are
        # the nearest neighbours for the given query words
        relevant_words = {}

        # the vectors need to be reduced all at once - but the vectors are
        # grouped by model. To solve this, keep one numpy array of vectors,
        # but also keep track of which indexes of this array belong to which
        # model, by storing the index of the first vector for a model
        vectors = numpy.empty((0, vector_size))
        vector_offsets = {}

        # now process each model
        for model_name, model in models.items():
            relevant_words[model_name] = set(
            )  # not a set, since order needs to be preserved
            self.dataset.update_status("Finding similar words in model '%s'" %
                                       model_name)

            for query in input_words:
                if query not in model.vocab:
                    self.dataset.update_status(
                        "Query '%s' was not found in model %s; cannot find nearest neighbours."
                        % (query, model_name),
                        is_final=True)
                    self.dataset.finish(0)
                    return

                if self.interrupted:
                    shutil.rmtree(staging_area)
                    raise ProcessorInterruptedException(
                        "Interrupted while finding similar words")

                # use a larger sample (topn) than required since some of the
                # nearest neighbours may not be in the common vocabulary and
                # will therefore need to be ignored
                context = set([
                    word[0] for word in model.most_similar(query, topn=1000)
                    if word[0] in common_vocab and word[1] >= threshold
                ][:num_words])

                relevant_words[model_name] |= {
                    query
                } | context  # always include query word

        # now do another loop to determine which words to plot for each model
        # this is either the same as relevant_words, or a superset which
        # combines all relevant words for all models
        plottable_words = {}
        last_model = max(relevant_words.keys())
        all_relevant_words = set().union(*relevant_words.values())

        for model_name, words in relevant_words.items():
            plottable_words[model_name] = []
            vector_offsets[model_name] = len(vectors)

            # determine which words to plot for this model. either the nearest
            # neighbours for this model, or all nearest neighbours found across
            # all models
            words_to_include = all_relevant_words if all_words else relevant_words[
                model_name]

            for word in words_to_include:
                if word in plottable_words[model_name] or (
                        not overlay and model_name != last_model
                        and word not in input_words):
                    # only plot each word once per model, or if 'overlay'
                    # is not set, only once overall (for the most recent
                    # model)
                    continue

                vector = models[model_name][word]
                plottable_words[model_name].append(word)
                vectors = numpy.append(vectors, [vector], axis=0)

        del models  # no longer needed

        # reduce the vectors of all words to be plotted for this model to
        # a two-dimensional coordinate with the previously initialised tsne
        # transformer. here the two-dimensional vectors are interpreted as
        # cartesian coordinates
        if reduction_method == "PCA":
            pca = PCA(n_components=2, random_state=0)
            vectors = pca.fit_transform(vectors)
        elif reduction_method == "t-SNE":
            # initialise t-sne transformer
            # parameters taken from Hamilton et al.
            # https://github.com/williamleif/histwords/blob/master/viz/common.py
            tsne = TSNE(n_components=2,
                        random_state=0,
                        learning_rate=150,
                        init="pca")
            vectors = tsne.fit_transform(vectors)
        elif reduction_method == "TruncatedSVD":
            # standard sklearn parameters made explicit
            svd = TruncatedSVD(n_components=2,
                               algorithm="randomized",
                               n_iter=5,
                               random_state=0)
            vectors = svd.fit_transform(vectors)
        else:
            shutil.rmtree(staging_area)
            self.dataset.update_status(
                "Invalid dimensionality reduction technique selected",
                is_final=True)
            self.dataset.finish(0)
            return

        # also keep track of the boundaries of our 2D space, so we can plot
        # them properly later
        for position in vectors:
            max_x = max(max_x, position[0])
            max_y = max(max_y, position[1])
            min_x = min(min_x, position[0])
            min_y = min(min_y, position[1])

        # now we know for each model which words should be plotted and at what
        # position
        # with this knowledge, we can normalize the positions, and start
        # plotting them in a graph

        # a palette generated with https://medialab.github.io/iwanthue/
        colours = [
            "#d58eff", "#cf9000", "#3391ff", "#a15700", "#911ca7", "#00ddcb",
            "#cc25a9", "#d5c776", "#6738a8", "#ff9470", "#47c2ff", "#a4122c",
            "#00b0ca", "#9a0f76", "#ff70c8", "#713c88"
        ]
        colour_index = 0

        # make sure all coordinates are positive
        max_x -= min_x
        max_y -= min_y

        # determine graph dimensions and proportions
        width = 1000  # arbitrary
        height = width * (max_y / max_x)  # retain proportions
        scale = width / max_x

        # margin around the plot to give room for labels and to look better
        margin = width * 0.1
        width += 2 * margin
        height += 2 * margin

        # normalize all known positions to fit within the graph
        vectors = [(margin + ((position[0] - min_x) * scale),
                    margin + ((position[1] - min_y) * scale))
                   for position in vectors]

        # now all positions are finalised, we can determine the "journey" of
        # each query - the sequence of positions in the graph it takes, so we
        # can draw lines from position to position later
        journeys = {}
        for query in input_words:
            journeys[query] = []
            for model_name, words in plottable_words.items():
                index = words.index(query)
                journeys[query].append(vectors[vector_offsets[model_name] +
                                               index])

        # font sizes proportional to width (which is static and thus predictable)
        fontsize_large = width / 50
        fontsize_normal = width / 75
        fontsize_small = width / 100

        # now we have the dimensions, the canvas can be instantiated
        model_type = self.source_dataset.parameters.get(
            "model-type", "word2vec")
        canvas = get_4cat_canvas(
            self.dataset.get_results_path(),
            width,
            height,
            header="%s nearest neighbours (fitting: %s) - '%s'" %
            (model_type, reduction_method, ",".join(input_words)),
            fontsize_normal=fontsize_normal,
            fontsize_large=fontsize_large,
            fontsize_small=fontsize_small)

        # use colour-coded backgrounds to distinguish the query words in the
        # graph, each model (= interval) with a separate colour
        for model_name in plottable_words:
            solid = Filter(id="solid-%s" % model_name)
            solid.feFlood(flood_color=colours[colour_index])
            solid.feComposite(in_="SourceGraphic")
            canvas.defs.add(solid)
            colour_index += 1

        # now plot each word for each model
        self.dataset.update_status("Plotting graph")
        words = SVG(insert=(0, 0), size=(width, height))
        queries = SVG(insert=(0, 0), size=(width, height))
        colour_index = 0

        for model_name, labels in plottable_words.items():
            positions = vectors[
                vector_offsets[model_name]:vector_offsets[model_name] +
                len(labels)]

            label_index = 0
            for position in positions:
                word = labels[label_index]
                is_query = word in input_words
                label_index += 1

                filter = ("url(#solid-%s)" %
                          model_name) if is_query else "none"
                colour = "#FFF" if is_query else colours[colour_index]
                fontsize = fontsize_normal if is_query else fontsize_small

                if word in input_words:
                    word += " (" + model_name + ")"

                label_container = SVG(insert=position,
                                      size=(1, 1),
                                      overflow="visible")
                label_container.add(
                    Text(insert=("50%", "50%"),
                         text=word,
                         dominant_baseline="middle",
                         text_anchor="middle",
                         style="fill:%s;font-size:%ipx" % (colour, fontsize),
                         filter=filter))

                # we make sure the queries are always rendered on top by
                # putting them in a separate SVG container
                if is_query:
                    queries.add(label_container)
                else:
                    words.add(label_container)

            colour_index = 0 if colour_index >= len(
                colours) else colour_index + 1

        # plot a line between positions for query words
        lines = SVG(insert=(0, 0), size=(width, height))
        for query, journey in journeys.items():
            previous_position = None
            for position in journey:
                if previous_position is None:
                    previous_position = position
                    continue

                lines.add(
                    Line(start=previous_position,
                         end=position,
                         stroke="#CE1B28",
                         stroke_width=2))
                previous_position = position

        canvas.add(lines)
        canvas.add(words)
        canvas.add(queries)

        canvas.save(pretty=True)
        shutil.rmtree(staging_area)
        self.dataset.finish(len(journeys))
Пример #7
0
    def render(self,
               canvas,
               level,
               x=0,
               y=0,
               origin=None,
               height=None,
               side=1,
               init=True,
               level_index=0):
        """
		Render node set to canvas

		:param canvas:  SVG object
		:param list level:  List of nodes to render
		:param int x:  X coordinate of top left of level block
		:param int y:  Y coordinate of top left of level block
		:param tuple origin:  Coordinates to draw 'connecting' line to
		:param float height:  Block height budget
		:param int side:  What direction to move into: 1 for rightwards, -1 for leftwards
		:param bool init:  Whether the draw the top level of nodes. Only has an effect if
						   side == self.SIDE_LEFT
		:return:  Updated canvas
		"""
        if not level:
            return canvas

        # this eliminates a small misalignment where the left side of the
        # graph starts slightly too far to the left
        if init and side == self.SIDE_LEFT:
            x += self.step

        # determine how many nodes we'll need to fit on top of each other
        # within this block
        required_space_level = sum([self.max_breadth(node) for node in level])

        # draw each node and the tree below it
        for node in level:
            # determine how high this block will be based on the available
            # height and the nodes we'll need to fit in it
            required_space_node = self.max_breadth(node)

            block_height = (required_space_node /
                            required_space_level) * height

            # determine how much we want to enlarge the text
            occurrence_ratio = node.occurrences / self.max_occurrences[
                level_index]
            if occurrence_ratio >= 0.75:
                embiggen = 3
            elif occurrence_ratio > 0.5:
                embiggen = 2
            elif occurrence_ratio > 0.25:
                embiggen = 1.75
            elif occurrence_ratio > 0.15:
                embiggen = 1.5
            else:
                embiggen = 1

            # determine how large the text block will be (this is why we use a
            # monospace font)
            characters = len(node.name)
            text_width = characters * self.step
            text_width *= (embiggen * 1)

            text_offset_y = self.fontsize if self.align == "top" else (
                (block_height) / 2)

            # determine where in the block to draw the text and where on the
            # canvas the block appears
            block_position = (x, y)
            block_offset_x = -(text_width +
                               self.step) if side == self.SIDE_LEFT else 0

            self.x_min = min(self.x_min, block_position[0] + block_offset_x)
            self.x_max = max(self.x_max,
                             block_position[0] + block_offset_x + text_width)

            # the first node on the left side of the graph does not need to be
            # drawn if the right side is also being drawn because in that case
            # it's already going to be included through that part of the graph
            if not (init and side == self.SIDE_LEFT):
                container = SVG(x=block_position[0] + block_offset_x,
                                y=block_position[1],
                                width=text_width,
                                height=block_height,
                                overflow="visible")
                container.add(
                    Text(text=node.name,
                         insert=(0, text_offset_y),
                         alignment_baseline="middle",
                         style="font-size:" + str(embiggen) + "em"))
                canvas.add(container)
            else:
                # adjust position to make left side connect to right side
                x += text_width
                block_position = (block_position[0] + text_width,
                                  block_position[1])

            # draw the line connecting this node to the parent node
            if origin:
                destination = (x - self.step, y + text_offset_y)

                # for the left side of the graph, draw a curve leftwards
                # instead of rightwards
                if side == self.SIDE_RIGHT:
                    bezier_origin = origin
                    bezier_destination = destination
                else:
                    bezier_origin = (destination[0] + self.step,
                                     destination[1])
                    bezier_destination = (origin[0] - self.step, origin[1])

                # bezier curve control points
                control_x = bezier_destination[0] - (
                    (bezier_destination[0] - bezier_origin[0]) / 2)
                control_left = (control_x, bezier_origin[1])
                control_right = (control_x, bezier_destination[1])

                # draw curve
                flow = Path(stroke="#000", fill_opacity=0, stroke_width=1.5)
                flow.push("M %f %f" % bezier_origin)
                flow.push("C %f %f %f %f %f %f" % tuple(
                    [*control_left, *control_right, *bezier_destination]))
                canvas.add(flow)

            # bezier curves for the next set of nodes will start at these
            # coordinates
            new_origin = (block_position[0] +
                          ((text_width + self.step) * side),
                          block_position[1] + text_offset_y)

            # draw this node's children
            canvas = self.render(canvas,
                                 node.children,
                                 x=x + ((text_width + self.gap) * side),
                                 y=y,
                                 origin=new_origin,
                                 height=int(block_height),
                                 side=side,
                                 init=False,
                                 level_index=level_index + 1)
            y += block_height

        return canvas
Пример #8
0
    def process(self):
        """
		This takes a 4CAT results file as input, and outputs a plain text file
		containing all post bodies as one continuous string, sanitized.
		"""

        link_regex = re.compile(r"https?://[^\s]+")
        delete_regex = re.compile(r"[^a-zA-Z)(.,\n -]")

        # settings
        strip_urls = self.parameters.get("strip-urls",
                                         self.options["strip-urls"]["default"])
        strip_symbols = self.parameters.get(
            "strip-symbols", self.options["strip-symbols"]["default"])
        sides = self.parameters.get("sides", self.options["sides"]["default"])
        self.align = self.parameters.get("align",
                                         self.options["align"]["default"])
        window = convert_to_int(
            self.parameters.get("window", self.options["window"]["default"]),
            5) + 1
        query = self.parameters.get("query", self.options["query"]["default"])
        self.limit = convert_to_int(
            self.parameters.get("limit", self.options["limit"]["default"]),
            100)

        left_branches = []
        right_branches = []

        # do some validation
        if not query.strip() or re.sub(r"\s", "", query) != query:
            self.dataset.update_status(
                "Invalid query for word tree generation. Query cannot be empty or contain whitespace."
            )
            self.dataset.finish(0)
            return

        window = min(window, self.options["window"]["max"] + 1)
        window = max(1, window)

        # find matching posts
        processed = 0
        for post in self.iterate_csv_items(self.source_file):
            processed += 1
            if processed % 500 == 0:
                self.dataset.update_status(
                    "Processing and tokenising post %i" % processed)
            body = post["body"]

            if strip_urls:
                body = link_regex.sub("", body)

            if strip_symbols:
                body = delete_regex.sub("", body)

            body = word_tokenize(body)
            positions = [
                i for i, x in enumerate(body) if x.lower() == query.lower()
            ]

            # get lists of tokens for both the left and right side of the tree
            # on the left side, all lists end with the query, on the right side,
            # they start with the query
            for position in positions:
                right_branches.append(body[position:position + window])
                left_branches.append(body[max(0, position - window):position +
                                          1])

        # Some settings for rendering the tree later
        self.step = self.fontsize * 0.6  # approximately the width of a monospace char
        self.gap = (7 * self.step)  # space for lines between nodes
        width = 1  # will be updated later

        # invert the left side of the tree (because that's the way we want the
        # branching to work for that side)
        # we'll visually invert the nodes in the tree again later
        left_branches = [list(reversed(branch)) for branch in left_branches]

        # first create vertical slices of tokens per level
        self.dataset.update_status("Generating token tree from posts")
        levels_right = [{} for i in range(0, window)]
        levels_left = [{} for i in range(0, window)]
        tokens_left = []
        tokens_right = []

        # for each "level" (each branching point representing a level), turn
        # tokens into nodes, record the max amount of occurences for any
        # token in that level, and keep track of what nodes are in which level.
        # The latter is needed because a token may occur multiple times, at
        # different points in the graph. Do this for both the left and right
        # side of the tree.
        for i in range(0, window):
            for branch in right_branches:
                if i >= len(branch):
                    continue

                token = branch[i].lower()
                if token not in levels_right[i]:
                    parent = levels_right[i - 1][branch[
                        i - 1].lower()] if i > 0 else None
                    levels_right[i][token] = Node(token,
                                                  parent=parent,
                                                  occurrences=1,
                                                  is_top_root=(parent is None))
                    tokens_right.append(levels_right[i][token])
                else:
                    levels_right[i][token].occurrences += 1

                occurrences = levels_right[i][token].occurrences
                self.max_occurrences[i] = max(
                    occurrences, self.max_occurrences[i]
                ) if i in self.max_occurrences else occurrences

            for branch in left_branches:
                if i >= len(branch):
                    continue

                token = branch[i].lower()
                if token not in levels_left[i]:
                    parent = levels_left[i - 1][branch[
                        i - 1].lower()] if i > 0 else None
                    levels_left[i][token] = Node(token,
                                                 parent=parent,
                                                 occurrences=1,
                                                 is_top_root=(parent is None))
                    tokens_left.append(levels_left[i][token])
                else:
                    levels_left[i][token].occurrences += 1

                occurrences = levels_left[i][token].occurrences
                self.max_occurrences[i] = max(
                    occurrences, self.max_occurrences[i]
                ) if i in self.max_occurrences else occurrences

        # nodes that have no siblings can be merged with their parents, else
        # the graph becomes unnecessarily large with lots of single-word nodes
        # connected to single-word nodes. additionally, we want the nodes with
        # the most branches to be sorted to the top, and then only retain the
        # most interesting (i.e. most-occurring) branches
        self.dataset.update_status("Merging and sorting tree nodes")
        for token in tokens_left:
            self.merge_upwards(token)
            self.sort_node(token)
            self.limit_subtree(token)

        for token in tokens_right:
            self.merge_upwards(token)
            self.sort_node(token)
            self.limit_subtree(token)

        # somewhat annoyingly, anytree does not simply delete nodes detached
        # from the tree in the previous steps, but makes them root nodes. We
        # don't need these root nodes (we only need the original root), so the
        # next step is to remove all root nodes that are not the main root.
        # We cannot modify a list in-place, so make a new list with the
        # relevant nodes
        level_sizes = {}
        filtered_tokens_right = []
        for token in tokens_right:
            if token.is_root and not token.is_top_root:
                continue

            filtered_tokens_right.append(token)

        filtered_tokens_left = []
        for token in tokens_left:
            if token.is_root and not token.is_top_root:
                continue

            filtered_tokens_left.append(token)

        # now we know which nodes are left, and can therefore determine how
        # large the canvas needs to be - this is based on the max number of
        # branches found on any level of the tree, in other words, the number
        # of "terminal nodes"
        height_left = self.whitespace * self.fontsize * max([
            self.max_breadth(node)
            for node in filtered_tokens_left if node.is_top_root
        ])
        height_right = self.whitespace * self.fontsize * max([
            self.max_breadth(node)
            for node in filtered_tokens_right if node.is_top_root
        ])
        height = max(height_left, height_right)

        canvas = Drawing(str(self.dataset.get_results_path()),
                         size=(width, height),
                         style="font-family:monospace;font-size:%ipx" %
                         self.fontsize)

        # the nodes on the left side of the graph now have the wrong word order,
        # because we reversed them earlier to generate the correct tree
        # hierarchy - now reverse the node labels so they are proper language
        # again
        for token in tokens_left:
            self.invert_node_labels(token)

        wrapper = SVG(overflow="visible")

        self.dataset.update_status("Rendering tree to SVG file")
        if sides != "right":
            wrapper = self.render(wrapper, [
                token for token in filtered_tokens_left
                if token.is_root and token.children
            ],
                                  height=height,
                                  side=self.SIDE_LEFT)

        if sides != "left":
            wrapper = self.render(wrapper, [
                token for token in filtered_tokens_right
                if token.is_root and token.children
            ],
                                  height=height,
                                  side=self.SIDE_RIGHT)

        # things may have been rendered outside the canvas, in which case we
        # need to readjust the SVG properties
        wrapper.update({"x": 0 if self.x_min >= 0 else self.x_min * -1})
        canvas.update({"width": (self.x_max - self.x_min)})

        canvas.add(wrapper)
        canvas.save(pretty=True)

        self.dataset.update_status("Finished")
        self.dataset.finish(len(tokens_left) + len(tokens_right))
Пример #9
0
    def process(self):
        graphs = {}
        intervals = []

        smooth = self.parameters.get("smooth")
        normalise_values = self.parameters.get("normalise")
        completeness = convert_to_int(self.parameters.get("complete"), 0)
        graph_label = self.parameters.get("label")
        top = convert_to_int(self.parameters.get("top"), 10)

        # first gather graph data: each distinct item gets its own graph and
        # for each graph we have a sequence of intervals, each interval with
        # its own value
        first_date = "9999-99-99"
        last_date = "0000-00-00"

        for row in self.iterate_items(self.source_file):
            if row["item"] not in graphs:
                graphs[row["item"]] = {}

            # make sure the months and days are zero-padded
            interval = row.get("date", "")
            interval = "-".join([
                str(bit).zfill(2 if len(bit) != 4 else 4)
                for bit in interval.split("-")
            ])
            first_date = min(first_date, interval)
            last_date = max(last_date, interval)

            if interval not in intervals:
                intervals.append(interval)

            if interval not in graphs[row["item"]]:
                graphs[row["item"]][interval] = 0

            graphs[row["item"]][interval] += float(row.get("value", 0))

        # first make sure we actually have something to render
        intervals = sorted(intervals)
        if len(intervals) <= 1:
            self.dataset.update_status(
                "Not enough data for a side-by-side over-time visualisation.")
            self.dataset.finish(0)
            return

        # only retain most-occurring series - sort by sum of all frequencies
        if len(graphs) > top:
            selected_graphs = {
                graph: graphs[graph]
                for graph in sorted(
                    graphs,
                    key=lambda x: sum(
                        [graphs[x][interval] for interval in graphs[x]]),
                    reverse=True)[0:top]
            }
            graphs = selected_graphs

        # there may be items that do not have values for all intervals
        # this will distort the graph, so the next step is to make sure all
        # graphs consist of the same continuous interval list
        missing = {graph: 0 for graph in graphs}
        for graph in graphs:
            missing[graph], graphs[graph] = pad_interval(
                graphs[graph],
                first_interval=first_date,
                last_interval=last_date)

        # now that's done, make sure the graph datapoints are in order
        intervals = sorted(list(graphs[list(graphs)[0]].keys()))

        # delete graphs that do not have the required amount of intervals
        # this is useful to get rid of outliers and items that only occur
        # very few times over the full interval
        if completeness > 0:
            intervals_required = len(intervals) * (completeness / 100)
            disqualified = []
            for graph in graphs:
                if len(intervals) - missing[graph] < intervals_required:
                    disqualified.append(graph)

            graphs = {
                graph: graphs[graph]
                for graph in graphs if graph not in disqualified
            }

        # determine max value per item, so we can normalize them later
        limits = {}
        max_limit = 0
        for graph in graphs:
            for interval in graphs[graph]:
                limits[graph] = max(limits.get(graph, 0),
                                    abs(graphs[graph][interval]))
                max_limit = max(max_limit, abs(graphs[graph][interval]))

        # order graphs by highest (or lowest) value)
        limits = {
            limit: limits[limit]
            for limit in sorted(limits, key=lambda l: limits[l])
        }
        graphs = {graph: graphs[graph] for graph in limits}

        if not graphs:
            # maybe nothing is actually there to be graphed
            self.dataset.update_status(
                "No items match the selection criteria - nothing to visualise."
            )
            self.dataset.finish(0)
            return None

        # how many vertical grid lines (and labels) are to be included at most
        # 12 is a sensible default because it allows one label per month for a full
        # year's data
        max_gridlines = 12

        # If True, label is put at the lower left bottom of the graph rather than
        # outside it. Automatically set to True if one of the labels is long, as
        # else the label would fall off the screen
        label_in_graph = max([len(item) for item in graphs]) > 30

        # determine how wide each interval should be
        # the graph has a minimum width - but the graph's width will be
        # extended if at this minimum width each item does not have the
        # minimum per-item width
        min_full_width = 600
        min_item_width = 50
        item_width = max(min_item_width, min_full_width / len(intervals))

        # determine how much space each graph should get
        # same trade-off as for the interval width
        min_full_height = 300
        min_item_height = 100
        item_height = max(min_item_height, min_full_height / len(graphs))

        # margin - this should be enough for the text labels to fit in
        margin_base = 50
        margin_right = margin_base * 4
        margin_top = margin_base * 3

        # this determines the "flatness" of the isometric projection and an be
        # tweaked for different looks - basically corresponds to how far the
        # camera is above the horizon
        plane_angle = 120

        # don't change these
        plane_obverse = radians((180 - plane_angle) / 2)
        plane_angle = radians(plane_angle)

        # okay, now determine the full graphic size with these dimensions projected
        # semi-isometrically. We can also use these values later for drawing for
        # drawing grid lines, et cetera. The axis widths and heights here are the
        # dimensions of the bounding box wrapping the isometrically projected axes.
        x_axis_length = (item_width * (len(intervals) - 1))
        y_axis_length = (item_height * len(graphs))

        x_axis_width = (sin(plane_angle / 2) * x_axis_length)
        y_axis_width = (sin(plane_angle / 2) * y_axis_length)
        canvas_width = x_axis_width + y_axis_width

        # leave room for graph header
        if graph_label:
            margin_top += (2 * (canvas_width / 50))

        x_axis_height = (cos(plane_angle / 2) * x_axis_length)
        y_axis_height = (cos(plane_angle / 2) * y_axis_length)
        canvas_height = x_axis_height + y_axis_height

        # now we have the dimensions, the canvas can be instantiated
        canvas = get_4cat_canvas(
            self.dataset.get_results_path(),
            width=(canvas_width + margin_base + margin_right),
            height=(canvas_height + margin_base + margin_top),
            header=graph_label)

        # draw gridlines - vertical
        gridline_x = y_axis_width + margin_base
        gridline_y = margin_top + canvas_height

        step_x_horizontal = sin(plane_angle / 2) * item_width
        step_y_horizontal = cos(plane_angle / 2) * item_width
        step_x_vertical = sin(plane_angle / 2) * item_height
        step_y_vertical = cos(plane_angle / 2) * item_height

        # labels for x axis
        # month and week both follow the same pattern
        # it's not always possible to distinguish between them but we will try
        # by looking for months greater than 12 in which case we are dealing
        # with weeks
        # we need to know this because for months there is an extra row in the
        # label with the full month
        is_week = False
        for i in range(0, len(intervals)):
            if re.match(r"^[0-9]{4}-[0-9]{2}",
                        intervals[i]) and int(intervals[i].split("-")[1]) > 12:
                is_week = True
                break

        skip = max(1, int(len(intervals) / max_gridlines))
        for i in range(0, len(intervals)):
            if i % skip == 0:
                canvas.add(
                    Line(start=(gridline_x, gridline_y),
                         end=(gridline_x - y_axis_width,
                              gridline_y - y_axis_height),
                         stroke="grey",
                         stroke_width=0.25))

                # to properly position the rotated and skewed text a container
                # element is needed
                label1 = str(intervals[i])[0:4]
                center = (gridline_x, gridline_y)
                container = SVG(x=center[0] - 25,
                                y=center[1],
                                width="50",
                                height="1.5em",
                                overflow="visible",
                                style="font-size:0.8em;")
                container.add(
                    Text(insert=("25%", "100%"),
                         text=label1,
                         transform="rotate(%f) skewX(%f)" %
                         (-degrees(plane_obverse), degrees(plane_obverse)),
                         text_anchor="middle",
                         baseline_shift="-0.5em",
                         style="font-weight:bold;"))

                if re.match(r"^[0-9]{4}-[0-9]{2}",
                            intervals[i]) and not is_week:
                    label2 = month_abbr[int(str(intervals[i])[5:7])]
                    if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}", intervals[i]):
                        label2 += " %i" % int(intervals[i][8:10])

                    container.add(
                        Text(insert=("25%", "150%"),
                             text=label2,
                             transform="rotate(%f) skewX(%f)" %
                             (-degrees(plane_obverse), degrees(plane_obverse)),
                             text_anchor="middle",
                             baseline_shift="-0.5em"))

                canvas.add(container)

            gridline_x += step_x_horizontal
            gridline_y -= step_y_horizontal

        # draw graphs as filled beziers
        top = step_y_vertical * 1.5
        graph_start_x = y_axis_width + margin_base
        graph_start_y = margin_top + canvas_height

        # draw graphs in reverse order, so the bottom one is most in the
        # foreground (in case of overlap)
        for graph in reversed(list(graphs)):
            self.dataset.update_status("Rendering graph for '%s'" % graph)

            # path starting at lower left corner of graph
            area_graph = Path(fill=self.colours[self.colour_index])
            area_graph.push("M %f %f" % (graph_start_x, graph_start_y))
            previous_value = None

            graph_x = graph_start_x
            graph_y = graph_start_y
            for interval in graphs[graph]:
                # normalise value
                value = graphs[graph][interval]
                try:
                    limit = limits[graph] if normalise_values else max_limit
                    value = top * copysign(abs(value) / limit, value)
                except ZeroDivisionError:
                    value = 0

                if previous_value is None:
                    # vertical line upwards to starting value of graph
                    area_graph.push("L %f %f" %
                                    (graph_start_x, graph_start_y - value))
                elif not smooth:
                    area_graph.push("L %f %f" % (graph_x, graph_y - value))
                else:
                    # quadratic bezier from previous value to current value
                    control_left = (graph_x - (step_x_horizontal / 2),
                                    graph_y + step_y_horizontal -
                                    previous_value - (step_y_horizontal / 2))
                    control_right = (graph_x - (step_x_horizontal / 2),
                                     graph_y - value + (step_y_horizontal / 2))
                    area_graph.push("C %f %f %f %f %f %f" %
                                    (*control_left, *control_right, graph_x,
                                     graph_y - value))

                previous_value = value
                graph_x += step_x_horizontal
                graph_y -= step_y_horizontal

            # line to the bottom of the graph at the current Y position
            area_graph.push(
                "L %f %f" %
                (graph_x - step_x_horizontal, graph_y + step_y_horizontal))
            area_graph.push("Z")  # then close the Path
            canvas.add(area_graph)

            # add text labels - skewing is a bit complicated and we need a
            # "center" to translate the origins properly.
            if label_in_graph:
                insert = (graph_start_x + 5, graph_start_y - 10)
            else:
                insert = (graph_x - (step_x_horizontal) + 5,
                          graph_y + step_y_horizontal - 10)

            # we need to take the skewing into account for the translation
            offset_y = tan(plane_obverse) * insert[0]
            canvas.add(
                Text(insert=(0, 0),
                     text=graph,
                     transform="skewY(%f) translate(%f %f)" %
                     (-degrees(plane_obverse), insert[0],
                      insert[1] + offset_y)))

            # cycle colours, back to the beginning if all have been used
            self.colour_index += 1
            if self.colour_index >= len(self.colours):
                self.colour_index = 0

            graph_start_x -= step_x_vertical
            graph_start_y -= step_y_vertical

        # draw gridlines - horizontal
        gridline_x = margin_base
        gridline_y = margin_top + canvas_height - y_axis_height
        for graph in graphs:
            gridline_x += step_x_vertical
            gridline_y += step_y_vertical
            canvas.add(
                Line(start=(gridline_x, gridline_y),
                     end=(gridline_x + x_axis_width,
                          gridline_y - x_axis_height),
                     stroke="black",
                     stroke_width=1))

        # x axis
        canvas.add(
            Line(start=(margin_base + y_axis_width,
                        margin_top + canvas_height),
                 end=(margin_base + canvas_width,
                      margin_top + canvas_height - x_axis_height),
                 stroke="black",
                 stroke_width=2))

        # and finally save the SVG
        canvas.save(pretty=True)
        self.dataset.finish(len(graphs))
Пример #10
0
	def process(self):
		"""
		Render an SVG histogram/bar chart using a previous frequency analysis
		as input.
		"""
		self.dataset.update_status("Reading source file")
		header = self.parameters.get("header", self.options["header"]["default"])
		max_posts = 0

		# collect post numbers per month
		intervals = {}
		for post in self.iterate_csv_items(self.source_file):
			intervals[post["date"]] = int(post["frequency"])
			max_posts = max(max_posts, int(post["frequency"]))

		if len(intervals) <= 1:
			self.dataset.update_status("Not enough data available for a histogram; need more than one time series.")
			self.dataset.finish(0)
			return

		self.dataset.update_status("Cleaning up data")
		(missing, intervals) = pad_interval(intervals)

		# create histogram
		self.dataset.update_status("Drawing histogram")

		# you may change the following four variables to adjust the graph dimensions
		width = 1024
		height = 786
		y_margin = 75
		x_margin = 50
		x_margin_left = x_margin * 2
		tick_width = 5

		fontsize_normal = int(height / 40)
		fontsize_small = int(height / 75)

		# better don't touch the following
		line_width = round(width / 512)
		y_margin_top = 150 if header else 50
		y_height = height - (y_margin + y_margin_top)
		x_width = width - (x_margin + x_margin_left)
		canvas = Drawing(filename=str(self.dataset.get_results_path()), size=(width, height),
						 style="font-family:monospace;font-size:%ipx" % fontsize_normal)

		# normalize the Y axis to a multiple of a power of 10
		magnitude = pow(10, len(str(max_posts)) - 1)  # ew
		max_neat = math.ceil(max_posts / magnitude) * magnitude
		self.dataset.update_status("Max (normalized): %i (%i) (magnitude: %i)" % (max_posts, max_neat, magnitude))

		# draw border
		canvas.add(Rect(
			insert=(0, 0),
			size=(width, height),
			stroke="#000",
			stroke_width=line_width,
			fill="#FFF"
		))

		# draw header on a black background if needed
		if header:
			if len(header) > 40:
				header = header[:37] + "..."

			header_rect_height = (y_margin_top / 1.5)
			header_fontsize = (width / len(header))

			header_container = SVG(insert=(0, 0), size=(width, header_rect_height))
			header_container.add(Rect(
				insert=(0, 0),
				size=(width, header_rect_height),
				fill="#000"
			))
			header_container.add(Text(
				insert=("50%", "50%"),
				text=header,
				dominant_baseline="middle",
				text_anchor="middle",
				fill="#FFF",
				style="font-size:%i" % header_fontsize
			))
			canvas.add(header_container)

		# horizontal grid lines
		for i in range(0, 10):
			offset = (y_height / 10) * i
			canvas.add(Line(
				start=(x_margin_left, y_margin_top + offset),
				end=(width - x_margin, y_margin_top + offset),
				stroke="#EEE",
				stroke_width=line_width
			))

		# draw bars
		item_width = (width - (x_margin + x_margin_left)) / len(intervals)
		item_height = (height - y_margin - y_margin_top)
		bar_width = item_width * 0.9
		x = x_margin_left + (item_width / 2) - (bar_width / 2)

		if bar_width >= 8:
			arc_adjust = max(8, int(item_width / 5)) / 2
		else:
			arc_adjust = 0

		for interval in intervals:
			posts = int(intervals[interval])
			bar_height = ((posts / max_neat) * item_height)
			self.dataset.update_status("%s: %i posts" % (interval, posts))
			bar_top = height - y_margin - bar_height
			bar_bottom = height - y_margin

			if bar_height == 0:
				x += item_width
				continue

			bar = Path(fill="#000")
			bar.push("M %f %f" % (x, bar_bottom))
			bar.push("L %f %f" % (x, bar_top + (arc_adjust if bar_height > arc_adjust else 0)))
			if bar_height > arc_adjust > 0:
				control = (x, bar_top)
				bar.push("C %f %f %f %f %f %f" % (*control, *control, x + arc_adjust, bar_top))
			bar.push("L %f %f" % (x + bar_width - arc_adjust, height - y_margin - bar_height))
			if bar_height > arc_adjust > 0:
				control = (x + bar_width, bar_top)
				bar.push("C %f %f %f %f %f %f" % (*control, *control, x + bar_width, bar_top + arc_adjust))
			bar.push("L %f %f" % (x + bar_width, height - y_margin))
			bar.push("Z")
			canvas.add(bar)

			x += item_width

		# draw X and Y axis
		canvas.add(Line(
			start=(x_margin_left, height - y_margin),
			end=(width - x_margin, height - y_margin),
			stroke="#000",
			stroke_width=2
		))
		canvas.add(Line(
			start=(x_margin_left, y_margin_top),
			end=(x_margin_left, height - y_margin),
			stroke="#000",
			stroke_width=2
		))

		# draw ticks on Y axis
		for i in range(0, 10):
			offset = (y_height / 10) * i
			canvas.add(Line(
				start=(x_margin_left - tick_width, y_margin_top + offset),
				end=(x_margin_left, y_margin_top + offset),
				stroke="#000",
				stroke_width=line_width
			))

		# draw ticks on X axis
		for i in range(0, len(intervals)):
			offset = (x_width / len(intervals)) * (i + 0.5)
			canvas.add(Line(
				start=(x_margin_left + offset, height - y_margin),
				end=(x_margin_left + offset, height - y_margin + tick_width),
				stroke="#000",
				stroke_width=line_width
			))

		# prettify

		# y labels
		origin = (x_margin_left / 2)
		step = y_height / 10
		for i in range(0, 11):
			label = str(int((max_neat / 10) * i))
			labelsize = (len(label) * fontsize_normal * 1.25, fontsize_normal)
			label_x = origin - (tick_width * 2)
			label_y = height - y_margin - (i * step) - (labelsize[1] / 2)
			label_container = SVG(
				insert=(label_x, label_y),
				size=(x_margin_left / 2, x_margin_left / 5)
			)
			label_container.add(Text(
				insert=("100%", "50%"),
				text=label,
				dominant_baseline="middle",
				text_anchor="end"
			))
			canvas.add(label_container)

		# x labels
		label_width = max(fontsize_small * 6, item_width)
		label_x = x_margin_left
		label_y = height - y_margin + (tick_width * 2)
		next = 0
		for interval in intervals:
			if len(interval) == 7:
				label = month_abbr[int(interval[5:7])] + "\n" + interval[0:4]
			elif len(interval) == 10:
				label = str(int(interval[8:10])) + month_abbr[int(interval[5:7])] + "\n" + interval[0:4]
			else:
				label = interval.replace("-", "\n")

			if label_x > next:
				shift = 0
				for line in label.split("\n"):
					label_container = SVG(
						insert=(label_x + (item_width / 2) - (label_width / 2), label_y + (tick_width * 2)),
						size=(label_width, y_margin), overflow="visible")
					label_container.add(Text(
						insert=("50%", "0%"),
						text=line,
						dominant_baseline="middle",
						text_anchor="middle",
						baseline_shift=-shift
					))
					shift += fontsize_small * 2
					canvas.add(label_container)
					next = label_x + (label_width * 0.9)
			label_x += item_width

		# 4cat logo
		label = "made with 4cat - 4cat.oilab.nl"
		footersize = (fontsize_small * len(label) * 0.7, fontsize_small * 2)
		footer = SVG(insert=(width - footersize[0], height - footersize[1]), size=footersize)
		footer.add(Rect(insert=(0, 0), size=("100%", "100%"), fill="#000"))
		footer.add(Text(
			insert=("50%", "50%"),
			text=label,
			dominant_baseline="middle",
			text_anchor="middle",
			fill="#FFF",
			style="font-size:%i" % fontsize_small
		))
		canvas.add(footer)

		canvas.save(pretty=True)

		self.dataset.update_status("Finished")
		self.dataset.finish(len(intervals))
Пример #11
0
def get_4cat_canvas(path,
                    width,
                    height,
                    header=None,
                    footer="made with 4CAT",
                    fontsize_normal=None,
                    fontsize_small=None,
                    fontsize_large=None):
    """
	Get a standard SVG canvas to draw 4CAT graphs to

	Adds a border, footer, header, and some basic text styling

	:param path:  The path where the SVG graph will be saved
	:param width:  Width of the canvas
	:param height:  Height of the canvas
	:param header:  Header, if necessary to draw
	:param footer:  Footer text, if necessary to draw. Defaults to shameless
	4CAT advertisement.
	:param fontsize_normal:  Font size of normal text
	:param fontsize_small:  Font size of small text (e.g. footer)
	:param fontsize_large:  Font size of large text (e.g. header)
	:return SVG:  SVG canvas (via svgwrite) that can be drawn to
	"""
    from svgwrite.container import SVG
    from svgwrite.drawing import Drawing
    from svgwrite.shapes import Rect
    from svgwrite.text import Text

    if fontsize_normal is None:
        fontsize_normal = width / 75

    if fontsize_small is None:
        fontsize_small = width / 100

    if fontsize_large is None:
        fontsize_large = width / 50

    # instantiate with border and white background
    canvas = Drawing(str(path),
                     size=(width, height),
                     style="font-family:monospace;font-size:%ipx" %
                     fontsize_normal)
    canvas.add(
        Rect(insert=(0, 0),
             size=(width, height),
             stroke="#000",
             stroke_width=2,
             fill="#FFF"))

    # header
    if header:
        header_shape = SVG(insert=(0, 0), size=("100%", fontsize_large * 2))
        header_shape.add(
            Rect(insert=(0, 0), size=("100%", "100%"), fill="#000"))
        header_shape.add(
            Text(insert=("50%", "50%"),
                 text=header,
                 dominant_baseline="middle",
                 text_anchor="middle",
                 fill="#FFF",
                 style="font-size:%ipx" % fontsize_large))
        canvas.add(header_shape)

    # footer (i.e. 4cat banner)
    if footer:
        footersize = (fontsize_small * len(footer) * 0.7, fontsize_small * 2)
        footer_shape = SVG(insert=(width - footersize[0],
                                   height - footersize[1]),
                           size=footersize)
        footer_shape.add(
            Rect(insert=(0, 0), size=("100%", "100%"), fill="#000"))
        footer_shape.add(
            Text(insert=("50%", "50%"),
                 text=footer,
                 dominant_baseline="middle",
                 text_anchor="middle",
                 fill="#FFF",
                 style="font-size:%ipx" % fontsize_small))
        canvas.add(footer_shape)

    return canvas
Пример #12
0
 def test_add_svg_as_subelement(self):
     svg = SVG(id='svg')
     subsvg = SVG(id='subsvg')
     svg.add(subsvg)
     self.assertEqual(svg.tostring(), '<svg id="svg"><defs /><svg id="subsvg"><defs /></svg></svg>')
Пример #13
0
 def test_constructor(self):
     svg = SVG(insert=(10,20), size=(100,200))
     self.assertTrue(isinstance(svg, Symbol))
     self.assertEqual(svg.tostring(), '<svg height="200" width="100" x="10" y="20"><defs /></svg>')
Пример #14
0
 def test_constructor(self):
     svg = SVG(insert=(10, 20), size=(100, 200))
     self.assertTrue(isinstance(svg, Symbol))
     self.assertEqual(
         svg.tostring(),
         '<svg height="200" width="100" x="10" y="20"><defs /></svg>')
Пример #15
0
def main():

    print("PY_PLUGIN - PLOT_TX_TRACE")
    #----------------------------
    # INPUT
    args_map = parse_args()

    l = sys.stdin.readline()
    print(">>>>>>>>>>>>>>>>>>>>>>>>>>")
    # print(l.strip())

    tx_id_str = args_map["tx_id_str"]
    tx_trace_map = json.loads(l)
    assert isinstance(tx_trace_map, dict)

    #----------------------------

    plot_y = 50

    dwg = svgwrite.Drawing(filename=f"{modd_str}/test.svg", debug=True)
    plot_svg = SVG((50, plot_y))
    dwg.add(plot_svg)

    plot_width_mm_int = 500

    x_ops_base = 10
    x_stack_base = 30
    x_memory_base = x_stack_base + 18
    x_gas_cost_base = x_memory_base + 10

    #----------------------------
    # LEGEND
    legend_svg = SVG((50, 10))
    dwg.add(legend_svg)

    legend_svg.add(
        dwg.text(f"tx ID - {tx_id_str}", (0 * mm, 4 * mm), font_size=8))

    legend_svg.add(
        dwg.text(f"stack", (x_stack_base * mm, plot_y - 12), font_size=8))
    legend_svg.add(
        dwg.text(f"memory", ((x_memory_base - 3) * mm, plot_y - 12),
                 font_size=8))
    legend_svg.add(
        dwg.text(f"gas cost", (x_gas_cost_base * mm, plot_y - 12),
                 font_size=8))

    #----------------------------

    i = 0

    memory_ops_lst = []
    call_ops_lst = []
    logs_ops_lst = []

    for op_map in tx_trace_map["opcodes_lst"]:

        # print("--------------------")
        # print(op_map)

        op_str = op_map["op_str"].strip()
        gas_cost_int = int(op_map["gas_cost_uint"])
        # print(f"{op_str}-{gas_cost_int}")

        stack_lst = op_map["stack_lst"]
        memory_lst = op_map["memory_lst"]

        x1 = x_gas_cost_base
        x2 = x1 + gas_cost_int
        y = i * 8  # 2.4

        op_svg = SVG((0, y))
        plot_svg.add(op_svg)

        #----------------------------
        # OP_GAS_COST__LINE
        op_svg.add(
            dwg.line(start=(x1 * mm, 1.25 * mm),
                     end=(x2 * mm, 1.25 * mm),
                     stroke="green",
                     stroke_width=3))

        #----------------------------
        # DEBUGGING - alignment line. used to align other elements in line.
        op_svg.add(
            dwg.line(start=((x_stack_base - 1) * mm, 1.25 * mm),
                     end=(x1 * mm, 1.25 * mm),
                     stroke="black",
                     stroke_width=0.5))

        #----------------------------
        # OP__TEXT - text local coordinate system is at lower left corner,
        #            not upper-left like everything else.
        #            so positioning it a bit lower from 0,0 (in this case 0,2)
        op_txt = dwg.text(op_str, (x_ops_base * mm, 2 * mm), font_size=8)
        op_svg.add(op_txt)

        # MSTORE
        if op_str == "MSTORE":
            y__local = 0.8
            op_svg.add(
                dwg.rect(insert=((x_ops_base - 1.5) * mm, y__local * mm),
                         size=(1 * mm, 1 * mm),
                         fill='blue',
                         stroke='black',
                         stroke_width=0.5))

            y__global = y + y__local
            memory_ops_lst.append(y__global)

        # CALLDATASIZE/CALLVALUE/CALLER
        ops_call_lst = ["CALLDATASIZE", "CALLVALUE", "CALLER"]
        if op_str in ops_call_lst:

            x_call_rect__global = x_ops_base - 2
            y__local = 0.6
            op_svg.add(
                dwg.rect(insert=((x_call_rect__global) * mm, y__local * mm),
                         size=(1.2 * mm, 1.2 * mm),
                         fill='red',
                         stroke='black',
                         stroke_width=0.5))

            y__global = y + y__local
            call_ops_lst.append(y__global)

        # LOG
        if op_str.startswith("LOG"):

            x_log_rect__global = x_ops_base - 2
            y__local = 0.6
            op_svg.add(
                dwg.rect(insert=(x_log_rect__global * mm, y__local * mm),
                         size=(1 * mm, 1 * mm),
                         fill='cyan',
                         stroke='black',
                         stroke_width=0.5))

            y__global = y + y__local
            logs_ops_lst.append(y__global)

        #----------------------------
        # STACK
        op_stack_g = op_svg.add(dwg.g(id='op_stack', stroke='blue'))
        j = 0
        for s in stack_lst:

            x = x_stack_base + j * 2
            op_stack_g.add(
                dwg.rect(insert=(x * mm, 0.5 * mm),
                         size=(1.5 * mm, 1.5 * mm),
                         fill='yellow',
                         stroke='black',
                         stroke_width=0.5))

            j += 1

        #----------------------------
        # MEMORY
        op_memory_g = op_svg.add(dwg.g(id='op_memory', stroke='blue'))
        j = 0
        for s in memory_lst:

            x = x_memory_base + j * 2
            op_memory_g.add(
                dwg.rect(insert=(x * mm, 0.5 * mm),
                         size=(1.5 * mm, 1.5 * mm),
                         fill='orange',
                         stroke='black',
                         stroke_width=0.5))

            j += 1

        #----------------------------

        i += 1

    #--------------------------------------------------
    # ARCHS
    def draw_archs():

        x_call_rect__global_px = 30

        i = 0
        for y__global in call_ops_lst[:-1]:

            y__global__next = call_ops_lst[i + 1]

            y__global__start_str = f"{x_call_rect__global_px} {int(y__global+4.4)}"
            y__global__end_str = f"{x_call_rect__global_px} {int(y__global__next+4.4)}"
            y__global__control_point_str = f"0 {int(y__global+(y__global__next-y__global)/2)}"

            path_str = f"M {y__global__start_str} Q {y__global__control_point_str} {y__global__end_str}"

            plot_svg.add(
                dwg.path(d=path_str,
                         fill="none",
                         stroke="red",
                         stroke_width=0.5))

            i += 1

    #--------------------------------------------------
    draw_archs()

    print("done drawing...")

    # FILE_SAVE
    if args_map["stdout_bool"]:

        svg_str = dwg.tostring()
        out_map = {"svg_str": svg_str}
        print(f"GF_OUT:{json.dumps(out_map)}")
    else:
        dwg.save()
Пример #16
0
class TextBoxCompare:
    def __init__(self):
        self.max_line_width = render_text.Render.max_textbox_len
        self.pos_x = 0
        self.pos_y = 0
        self.act_line_width = 0
        self.font_family = None
        self.font_size = None
        self.width, self.height_line = render_text.Render.get_text_size("")
        self.height = self.height_line
        self.width_max = 0
        self.svg_text = Text(text="")

    def build_svg_line(self, line_fragment, fill):
        _lines = render_text.Render.split_text_to_lines(
            line_fragment, self.act_line_width)
        _line, _w, _h = _lines[0]

        if self.svg_text is None:
            self.svg_text = Text(text="")

        if len(_lines) == 1 and self.act_line_width < self.max_line_width:
            self.svg_text.add(TSpan(text=_line, fill=fill))
            self.act_line_width += _w
            self.width_max = max(self.act_line_width, self.width_max)

        elif len(_lines) == 1 and self.act_line_width >= self.max_line_width:
            self.svg_text.add(TSpan(text=_line, fill=fill))
            self.svg_text["x"] = 0
            self.svg_text["y"] = self.height
            self.svg.add(self.svg_text)
            self.height += _h
            self.act_line_width = 0
            self.svg_text = None

        elif len(_lines) > 1:

            for _line, _w, _h in _lines:

                if self.svg_text is None:
                    self.svg_text = Text(text="")

                if _line == "%NewLine%":
                    self.svg_text["x"] = 0
                    self.svg_text["y"] = self.height
                    self.svg.add(self.svg_text)
                    self.height += _h
                    self.act_line_width = 0
                    self.svg_text = None

                elif self.act_line_width < self.max_line_width:
                    self.svg_text.add(TSpan(text=_line, fill=fill))
                    self.act_line_width += _w
                    self.width_max = max(self.act_line_width, self.width_max)

                else:

                    self.svg_text["x"] = 0
                    self.svg_text["y"] = self.height
                    self.svg.add(self.svg_text)
                    self.height += _h
                    self.act_line_width = _w
                    self.width_max = max(self.act_line_width, self.width_max)
                    self.svg_text = None

    def compare(self, text_block1, text_block2):

        self.svg = SVG(insert=(self.pos_x, self.pos_y),
                       font_family=self.font_family,
                       font_size=self.font_size)

        _len_textblock1 = len(text_block1)
        _len_textblock2 = len(text_block2)

        if text_block1 == text_block2:
            _fill = rgb(0x00, 0x0, 0x0)
            self.build_svg_line(text_block2, _fill)

        elif _len_textblock1 > 0 and _len_textblock2 > 0:
            _matcher = SequenceMatcher(None, text_block1, text_block2)

            for tag, _s1, _e1, _s2, _e2 in _matcher.get_opcodes():

                if tag == "replace":
                    _text = text_block2[_s2:_e2]
                    _fill = rgb(0x00, 0x80, 0xff)
                    self.build_svg_line(_text, _fill)

                elif tag == "delete":
                    pass

                elif tag == "insert":
                    _text = text_block2[_s2:_e2]
                    _fill = rgb(0x00, 0x80, 0xff)
                    self.build_svg_line(_text, _fill)

                elif tag == "equal":
                    _text = text_block1[_s1:_e1]
                    _fill = rgb(0x0, 0x0, 0x0)
                    self.build_svg_line(_text, _fill)

        elif _len_textblock1 == 0 and _len_textblock2 > 0:
            _fill = rgb(0x00, 0x0, 0x0)
            self.build_svg_line(text_block2, _fill)

        elif _len_textblock1 > 0 and _len_textblock2 == 0:
            _fill = rgb(0x00, 0x0, 0x0)
            self.build_svg_line(text_block1, _fill)

        else:
            _fill = rgb(0x00, 0x0, 0x0)
            self.build_svg_line("", _fill)

        if self.svg_text is not None:
            self.svg_text["x"] = 0
            self.svg_text["y"] = self.height
            self.svg.add(self.svg_text)

            self.height += self.height_line * 0.3
            self.svg_text = None

        self.svg['height'] = self.height
        self.svg['width'] = self.width_max

        return self.svg, self.width_max, self.height