def draw(self, svg_parent, tree_layout, parent, child): from svgwrite.shapes import Line if child.depth > parent.depth + 1: line_start = parent.y + parent.height if parent.height > 0: line_start += 0.2 # extra space for descenders box_y = tree_layout.y_distance(parent.depth, child.depth) y_target = em(box_y + child.y) x_target = perc(child.x + child.width / 2) # we are skipping level(s). Find the y position that an empty # node on the next level would have. intermediate_y = em( tree_layout.label_y_dodge(level=parent.depth + 1, height=0)[0] + tree_layout.y_distance(parent.depth, parent.depth + 1)) # TODO: do as Path? svg_parent.add( Line(start=(u"50%", em(line_start)), end=(x_target, intermediate_y), **self.svg_opts())) svg_parent.add( Line(start=(x_target, intermediate_y), end=(x_target, y_target), **self.svg_opts())) else: EdgeStyle.draw(self, svg_parent, tree_layout, parent, child)
def _create_rect_arrow(scene: Drawing, start: tuple, point1: tuple, point2: tuple, end: tuple, color: tuple): """Create an rectangular path through the given points. The path starts at p1 the goes to point1, p2 and finally to end. Args: scene (Scene): The scene where the path should be created. start: The first point. point1: The second point. point2: The third point. end: The last point. color: The arrow's color. Returns: The modified scene """ scene.add( Line(start, point1, shape_rendering='inherit', stroke=rgb(*color), stroke_width=1)) scene.add( Line(point1, point2, shape_rendering='inherit', stroke=rgb(*color), stroke_width=1)) scene.add( Line(point2, end, shape_rendering='inherit', stroke=rgb(*color), stroke_width=1))
def get_runner_end(self, line, is_out): x, y = self.get_line_end(line) if is_out: g = Group() g['class'] = 'out' g.add(Line((x - X_SIZE, y - X_SIZE), (x + X_SIZE, y + X_SIZE))) g.add(Line((x - X_SIZE, y + X_SIZE), (x + X_SIZE, y - X_SIZE))) return g else: return Circle((x, y), CIRCLE_R)
def get_team_box(self, id, ht): box = Group() box['id'] = id box['class'] = 'team-box' box.add(Rect((ORIGIN_X, ORIGIN_Y), (ATBAT_W, ht))) box.add( Line((ORIGIN_X + NAME_W, ORIGIN_Y), (ORIGIN_X + NAME_W, ORIGIN_Y + ht))) box.add( Line((ORIGIN_X + NAME_W + SCORE_W, ORIGIN_Y), (ORIGIN_X + NAME_W + SCORE_W, ORIGIN_Y + ht))) return box
def _put_cross(self, cell, x, y, fill, stone_size): offset = stone_size / 4 cell.add( Line(start=((x - offset) * self.unit, (y - offset) * self.unit), end=((x + offset) * self.unit, (y + offset) * self.unit), fill=fill, stroke=fill, stroke_width=self._line_width * 3)) cell.add( Line(start=((x - offset) * self.unit, (y + offset) * self.unit), end=((x + offset) * self.unit, (y - offset) * self.unit), fill=fill, stroke=fill, stroke_width=self._line_width * 3))
def draw_inning_separators(self): y = ORIGIN_Y for i, inning in enumerate(self.game.innings[:-1]): inning_ht = self.get_inning_height(inning) y += inning_ht self.dwg.add( Line((ORIGIN_X, y), (ORIGIN_X + ATBAT_W, y), class_='team-box')) if (not (i == len(self.game.innings) - 2 and self.is_no_final_bottom())): self.dwg.add( Line((ORIGIN_X + ATBAT_W + SEPARATION, y), (ORIGIN_X + 2 * ATBAT_W + SEPARATION, y), class_='team-box'))
def draw_arrow_w_text_middle(self, scene: Drawing, start: tuple, point1: tuple, point2: tuple, end: tuple, height: int, arrowsize: int, is_curved: bool, text: str, font_size: int, font_family: str, over: bool, color: tuple): # Store the appropriate function ouside of the loop if is_curved: self._create_curve_arrow(scene, start, point1, point2, end, color) else: self._create_rect_arrow(scene, start, point1, point2, end, color) # Draw arrow x_coord = (end[0] - arrowsize, end[1] - arrowsize) z_coord = (end[0] + arrowsize, end[1] - arrowsize) y_coord = (end[0], end[1]) # Draw the arrow head scene.add( Line(x_coord, y_coord, shape_rendering='inherit', stroke=rgb(*color), stroke_width=1)) scene.add( Line(z_coord, y_coord, shape_rendering='inherit', stroke=rgb(*color), stroke_width=1)) direction = 1 if over: direction = -1 # Write label in the middle under labelx = min(start[0], point2[0]) + abs(start[0] - point2[0]) // 2 labely = height + direction * font_size # TODO: Should be font height! scene.add( Text(text, insert=(labelx, labely), fill=rgb(*color), font_family=font_family, font_size=font_size, text_rendering='inherit', alignment_baseline='central', text_anchor='middle') ) # TODO: alignment_baseline should be hanging or baseline!
def create_doors(doors, walls): door_ids = set() door_coords = defaultdict(list) for door_point_id in doors: door_id, index = door_point_id[0].split(':') door_ids.add(door_id) for point in walls: if point.name is None or point.name.find(':') == -1: continue name_id, index = point.name.split(':') if name_id in door_ids: door_coords[name_id].append(point) lines = [] extra = { 'stroke': 'red', 'stroke-width': 3 } for door_id, door_pair in door_coords.items(): assert len(door_pair) == 2 print('Create door {}'.format(door_id)) start = door_pair[0].x, door_pair[0].y end = door_pair[1].x, door_pair[1].y lines.append(Line(start, end, **extra)) return lines
def get_parallax_lines(): groups = [Group() for i in range(3)] duplicated_groups = [Group() for i in range(3)] longest = [0 for i in range(3)] current_y = anim['verticalInterval'] while current_y < anim['height']: line_quantity = randint(1, 9) lengths = [randint(5, 30) for l in range(line_quantity)] position = sample(range(anim['width'] * 2), line_quantity) for i, (length, pos) in enumerate(zip(lengths, position)): if i % 3 is 0 or i is 0: n = 2 elif i % 2 is 0: n = 1 else: n = 0 if pos + length > longest[n]: longest[n] = pos + length line = Line((pos, current_y), (pos + length, current_y)) if pos < anim['height']: duplicated_groups[n].add(line) groups[n].add(line) current_y += randint(1,5) * anim['verticalInterval'] dur = 2 containers = [Group(**anim['style']) for i in range(3)] for c, g, dg, l in zip(containers, groups, duplicated_groups, longest): c.add(g) dg['transform'] = 'translate({}, 0)'.format(l) c.add(dg) c.add(animate(l, dur)) dur += 0.5 return containers
def _draw_wirelesslink(self, url, labels): """ Draw a line with labels representing a WirelessLink. :param url: Hyperlink URL :param labels: Iterable of text labels """ group = Group(class_='connector') # Draw the wireless link start = (OFFSET + self.center, self.cursor) height = PADDING * 2 + LINE_HEIGHT * len(labels) + PADDING * 2 end = (start[0], start[1] + height) line = Line(start=start, end=end, class_='wireless-link') group.add(line) self.cursor += PADDING * 2 # Add link link = Hyperlink(href=f'{self.base_url}{url}', target='_blank') # Add text label(s) for i, label in enumerate(labels): self.cursor += LINE_HEIGHT text_coords = (self.center + PADDING * 2, self.cursor - LINE_HEIGHT / 2) text = Text(label, insert=text_coords, class_='bold' if not i else []) link.add(text) group.add(link) self.cursor += PADDING * 2 return group
def get_mid_pa_runner_line(self, runner, i, num_events): x = ORIGIN_X + NAME_W + SCORE_W x_start = x + BASE_L * runner.start x_end = x + BASE_L * runner.end y_step = ATBAT_HT / 2 / num_events y_start = self.y - ATBAT_HT + i * y_step y_end = y_start + y_step return Line((x_start, y_start), (x_end, y_end))
def get_baserunner_line(self, runner): x = ORIGIN_X + NAME_W + SCORE_W x_start = x + BASE_L * runner.start x_end = x + BASE_L * runner.end y_start = self.y - ATBAT_HT mid_pa_runner = self.get_mid_pa_runner_to_use(runner.start) if mid_pa_runner: x_start = x + BASE_L * mid_pa_runner.end y_start = self.y - ATBAT_HT / 2 return Line((x_start, y_start), (x_end, self.y))
def draw_hash(self, inning): line = Line((ORIGIN_X + ATBAT_W + HASH_SEP, self.y), (ORIGIN_X + ATBAT_W + HASH_SEP + HASH_LEN, self.y)) if self.is_home_team_batting(inning): flip(line) self.home_hash_ys.append(self.y) line['class'] = 'away-pitcher-hash' else: self.away_hash_ys.append(self.y) line['class'] = 'home-pitcher-hash' self.dwg.add(line)
def background_lines(self, max_data, log_max_value, log_min_value): line = 1 while line < max_data: plot_value = self.margin_top + self.plottable_y - self.scale_y( log10(line), log_max_value, log_min_value) self.plot.add( Line(start=(self.margin_left, plot_value), end=(self.margin_left + self.plottable_x, plot_value), stroke_width=1, stroke="lightgrey")) line *= 10
def _draw_cable(self, color, url, labels): """ Return an SVG group containing a line element and text labels representing a Cable. :param color: Cable (line) color :param url: Hyperlink URL :param labels: Iterable of text labels """ group = Group(class_='connector') # Draw a "shadow" line to give the cable a border start = (OFFSET + self.center, self.cursor) height = PADDING * 2 + LINE_HEIGHT * len(labels) + PADDING * 2 end = (start[0], start[1] + height) cable_shadow = Line(start=start, end=end, class_='cable-shadow') group.add(cable_shadow) # Draw the cable cable = Line(start=start, end=end, style=f'stroke: #{color}') group.add(cable) self.cursor += PADDING * 2 # Add link link = Hyperlink(href=f'{self.base_url}{url}', target='_blank') # Add text label(s) for i, label in enumerate(labels): self.cursor += LINE_HEIGHT text_coords = (self.center + PADDING * 2, self.cursor - LINE_HEIGHT / 2) text = Text(label, insert=text_coords, class_='bold' if not i else []) link.add(text) group.add(link) self.cursor += PADDING * 2 return group
def corner_ruler(scale, length, major_interval, minor_per_major): g = Group(stroke="black", stroke_width=0.4) ruler_length_mm = (length / scale) * 1000 # Horizontal Ruler g.add(Line((-ruler_length_mm * mm, 0 * mm), ((3) * mm, 0 * mm))) r = ruler(scale=scale, length=length, major_interval=major_interval, minor_per_major=minor_per_major, reverse=True, vertical=False) g.add(r) # Vertical Ruler g.add(Line((0 * mm, -3 * mm), (0 * mm, (ruler_length_mm) * mm))) r = ruler(scale=scale, length=length, major_interval=major_interval, minor_per_major=minor_per_major, reverse=False, vertical=True) g.add(r) g.add(Circle((0, 0), r=0.5 * mm, fill="white")) # Scale label g.add( Text( f"1:{scale}", x=[(-ruler_length_mm / 2) * mm], y=[-1 * mm], text_anchor="middle", font_size=2 * mm, font_weight="bold", )) return g
def draw_line(scene: Drawing, start: tuple, ctrl1: tuple, ctrl2: tuple, end: tuple, is_curved: bool, edge_color: tuple): if is_curved: # cubic Bezier curve scene.add( Path(d=['M', start, 'C', ctrl1, ctrl2, end], stroke=rgb(*edge_color), stroke_width=1, fill='none')) else: scene.add( Line(start, end, shape_rendering='inherit', stroke=rgb(*edge_color), stroke_width=1))
def _draw_attachment(self): """ Return an SVG group containing a line element and "Attachment" label. """ group = Group(class_='connector') # Draw attachment (line) start = (OFFSET + self.center, OFFSET + self.cursor) height = PADDING * 2 + LINE_HEIGHT + PADDING * 2 end = (start[0], start[1] + height) line = Line(start=start, end=end, class_='attachment') group.add(line) self.cursor += PADDING * 4 return group
def get_lines(): group = Group(**img['style']) current_y = 6 current_x = randint(-10 , 150) for y in range(0, img['verticalLines']): while current_x < img['width']: rand_x = randint(2, 6) + current_x line = Line((current_x, current_y), (rand_x, current_y)) group.add(line) current_x = rand_x + randint(50, 150) current_x = randint(-10, 150) current_y = round(current_y + img['verticalInterval'], 1) return group
def add_zero_based_regression(self, slope): """place a regression line on the plot.""" self.max_min() x_value = self.x_max y_value = slope * x_value if y_value > self.y_max: y_value = self.y_max x_value = y_value / slope self.plot.add(Line(start=(self.margin_left, self.margin_top + self.plottable_y), end=(self.x_to_printx(x_value), self.y_to_printy(y_value)), stroke_width=1, stroke=self.graph_colour)) self.plot.add( Text(f"Slope = {round(slope, 4)}", insert=(self.plottable_x + self.margin_left - 200, self.margin_top + 15), fill=self.graph_colour, font_size="15"))
def get_batter_runner_line(self, runner): x_start = ORIGIN_X + NAME_W x_end = x_start + SCORE_W + BASE_L * runner.end return Line((x_start, self.y), (x_end, self.y))
def test_numbers(self): line = Line(start=(0,0), end=(10,20)) self.assertEqual(line.tostring(), '<line x1="0" x2="10" y1="0" y2="20" />')
def line(start: tuple, end: tuple) -> Line: return Line(start, end, stroke='black', style='stroke-linecap:round;stroke-width:2px;')
def process(self): graphs = {} intervals = [] smooth = self.parameters.get("smooth") normalise_values = self.parameters.get("normalise") completeness = convert_to_int(self.parameters.get("complete"), 0) graph_label = self.parameters.get("label") top = convert_to_int(self.parameters.get("top"), 10) # first gather graph data: each distinct item gets its own graph and # for each graph we have a sequence of intervals, each interval with # its own value first_date = "9999-99-99" last_date = "0000-00-00" for row in self.iterate_items(self.source_file): if row["item"] not in graphs: graphs[row["item"]] = {} # make sure the months and days are zero-padded interval = row.get("date", "") interval = "-".join([ str(bit).zfill(2 if len(bit) != 4 else 4) for bit in interval.split("-") ]) first_date = min(first_date, interval) last_date = max(last_date, interval) if interval not in intervals: intervals.append(interval) if interval not in graphs[row["item"]]: graphs[row["item"]][interval] = 0 graphs[row["item"]][interval] += float(row.get("value", 0)) # first make sure we actually have something to render intervals = sorted(intervals) if len(intervals) <= 1: self.dataset.update_status( "Not enough data for a side-by-side over-time visualisation.") self.dataset.finish(0) return # only retain most-occurring series - sort by sum of all frequencies if len(graphs) > top: selected_graphs = { graph: graphs[graph] for graph in sorted( graphs, key=lambda x: sum( [graphs[x][interval] for interval in graphs[x]]), reverse=True)[0:top] } graphs = selected_graphs # there may be items that do not have values for all intervals # this will distort the graph, so the next step is to make sure all # graphs consist of the same continuous interval list missing = {graph: 0 for graph in graphs} for graph in graphs: missing[graph], graphs[graph] = pad_interval( graphs[graph], first_interval=first_date, last_interval=last_date) # now that's done, make sure the graph datapoints are in order intervals = sorted(list(graphs[list(graphs)[0]].keys())) # delete graphs that do not have the required amount of intervals # this is useful to get rid of outliers and items that only occur # very few times over the full interval if completeness > 0: intervals_required = len(intervals) * (completeness / 100) disqualified = [] for graph in graphs: if len(intervals) - missing[graph] < intervals_required: disqualified.append(graph) graphs = { graph: graphs[graph] for graph in graphs if graph not in disqualified } # determine max value per item, so we can normalize them later limits = {} max_limit = 0 for graph in graphs: for interval in graphs[graph]: limits[graph] = max(limits.get(graph, 0), abs(graphs[graph][interval])) max_limit = max(max_limit, abs(graphs[graph][interval])) # order graphs by highest (or lowest) value) limits = { limit: limits[limit] for limit in sorted(limits, key=lambda l: limits[l]) } graphs = {graph: graphs[graph] for graph in limits} if not graphs: # maybe nothing is actually there to be graphed self.dataset.update_status( "No items match the selection criteria - nothing to visualise." ) self.dataset.finish(0) return None # how many vertical grid lines (and labels) are to be included at most # 12 is a sensible default because it allows one label per month for a full # year's data max_gridlines = 12 # If True, label is put at the lower left bottom of the graph rather than # outside it. Automatically set to True if one of the labels is long, as # else the label would fall off the screen label_in_graph = max([len(item) for item in graphs]) > 30 # determine how wide each interval should be # the graph has a minimum width - but the graph's width will be # extended if at this minimum width each item does not have the # minimum per-item width min_full_width = 600 min_item_width = 50 item_width = max(min_item_width, min_full_width / len(intervals)) # determine how much space each graph should get # same trade-off as for the interval width min_full_height = 300 min_item_height = 100 item_height = max(min_item_height, min_full_height / len(graphs)) # margin - this should be enough for the text labels to fit in margin_base = 50 margin_right = margin_base * 4 margin_top = margin_base * 3 # this determines the "flatness" of the isometric projection and an be # tweaked for different looks - basically corresponds to how far the # camera is above the horizon plane_angle = 120 # don't change these plane_obverse = radians((180 - plane_angle) / 2) plane_angle = radians(plane_angle) # okay, now determine the full graphic size with these dimensions projected # semi-isometrically. We can also use these values later for drawing for # drawing grid lines, et cetera. The axis widths and heights here are the # dimensions of the bounding box wrapping the isometrically projected axes. x_axis_length = (item_width * (len(intervals) - 1)) y_axis_length = (item_height * len(graphs)) x_axis_width = (sin(plane_angle / 2) * x_axis_length) y_axis_width = (sin(plane_angle / 2) * y_axis_length) canvas_width = x_axis_width + y_axis_width # leave room for graph header if graph_label: margin_top += (2 * (canvas_width / 50)) x_axis_height = (cos(plane_angle / 2) * x_axis_length) y_axis_height = (cos(plane_angle / 2) * y_axis_length) canvas_height = x_axis_height + y_axis_height # now we have the dimensions, the canvas can be instantiated canvas = get_4cat_canvas( self.dataset.get_results_path(), width=(canvas_width + margin_base + margin_right), height=(canvas_height + margin_base + margin_top), header=graph_label) # draw gridlines - vertical gridline_x = y_axis_width + margin_base gridline_y = margin_top + canvas_height step_x_horizontal = sin(plane_angle / 2) * item_width step_y_horizontal = cos(plane_angle / 2) * item_width step_x_vertical = sin(plane_angle / 2) * item_height step_y_vertical = cos(plane_angle / 2) * item_height # labels for x axis # month and week both follow the same pattern # it's not always possible to distinguish between them but we will try # by looking for months greater than 12 in which case we are dealing # with weeks # we need to know this because for months there is an extra row in the # label with the full month is_week = False for i in range(0, len(intervals)): if re.match(r"^[0-9]{4}-[0-9]{2}", intervals[i]) and int(intervals[i].split("-")[1]) > 12: is_week = True break skip = max(1, int(len(intervals) / max_gridlines)) for i in range(0, len(intervals)): if i % skip == 0: canvas.add( Line(start=(gridline_x, gridline_y), end=(gridline_x - y_axis_width, gridline_y - y_axis_height), stroke="grey", stroke_width=0.25)) # to properly position the rotated and skewed text a container # element is needed label1 = str(intervals[i])[0:4] center = (gridline_x, gridline_y) container = SVG(x=center[0] - 25, y=center[1], width="50", height="1.5em", overflow="visible", style="font-size:0.8em;") container.add( Text(insert=("25%", "100%"), text=label1, transform="rotate(%f) skewX(%f)" % (-degrees(plane_obverse), degrees(plane_obverse)), text_anchor="middle", baseline_shift="-0.5em", style="font-weight:bold;")) if re.match(r"^[0-9]{4}-[0-9]{2}", intervals[i]) and not is_week: label2 = month_abbr[int(str(intervals[i])[5:7])] if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}", intervals[i]): label2 += " %i" % int(intervals[i][8:10]) container.add( Text(insert=("25%", "150%"), text=label2, transform="rotate(%f) skewX(%f)" % (-degrees(plane_obverse), degrees(plane_obverse)), text_anchor="middle", baseline_shift="-0.5em")) canvas.add(container) gridline_x += step_x_horizontal gridline_y -= step_y_horizontal # draw graphs as filled beziers top = step_y_vertical * 1.5 graph_start_x = y_axis_width + margin_base graph_start_y = margin_top + canvas_height # draw graphs in reverse order, so the bottom one is most in the # foreground (in case of overlap) for graph in reversed(list(graphs)): self.dataset.update_status("Rendering graph for '%s'" % graph) # path starting at lower left corner of graph area_graph = Path(fill=self.colours[self.colour_index]) area_graph.push("M %f %f" % (graph_start_x, graph_start_y)) previous_value = None graph_x = graph_start_x graph_y = graph_start_y for interval in graphs[graph]: # normalise value value = graphs[graph][interval] try: limit = limits[graph] if normalise_values else max_limit value = top * copysign(abs(value) / limit, value) except ZeroDivisionError: value = 0 if previous_value is None: # vertical line upwards to starting value of graph area_graph.push("L %f %f" % (graph_start_x, graph_start_y - value)) elif not smooth: area_graph.push("L %f %f" % (graph_x, graph_y - value)) else: # quadratic bezier from previous value to current value control_left = (graph_x - (step_x_horizontal / 2), graph_y + step_y_horizontal - previous_value - (step_y_horizontal / 2)) control_right = (graph_x - (step_x_horizontal / 2), graph_y - value + (step_y_horizontal / 2)) area_graph.push("C %f %f %f %f %f %f" % (*control_left, *control_right, graph_x, graph_y - value)) previous_value = value graph_x += step_x_horizontal graph_y -= step_y_horizontal # line to the bottom of the graph at the current Y position area_graph.push( "L %f %f" % (graph_x - step_x_horizontal, graph_y + step_y_horizontal)) area_graph.push("Z") # then close the Path canvas.add(area_graph) # add text labels - skewing is a bit complicated and we need a # "center" to translate the origins properly. if label_in_graph: insert = (graph_start_x + 5, graph_start_y - 10) else: insert = (graph_x - (step_x_horizontal) + 5, graph_y + step_y_horizontal - 10) # we need to take the skewing into account for the translation offset_y = tan(plane_obverse) * insert[0] canvas.add( Text(insert=(0, 0), text=graph, transform="skewY(%f) translate(%f %f)" % (-degrees(plane_obverse), insert[0], insert[1] + offset_y))) # cycle colours, back to the beginning if all have been used self.colour_index += 1 if self.colour_index >= len(self.colours): self.colour_index = 0 graph_start_x -= step_x_vertical graph_start_y -= step_y_vertical # draw gridlines - horizontal gridline_x = margin_base gridline_y = margin_top + canvas_height - y_axis_height for graph in graphs: gridline_x += step_x_vertical gridline_y += step_y_vertical canvas.add( Line(start=(gridline_x, gridline_y), end=(gridline_x + x_axis_width, gridline_y - x_axis_height), stroke="black", stroke_width=1)) # x axis canvas.add( Line(start=(margin_base + y_axis_width, margin_top + canvas_height), end=(margin_base + canvas_width, margin_top + canvas_height - x_axis_height), stroke="black", stroke_width=2)) # and finally save the SVG canvas.save(pretty=True) self.dataset.finish(len(graphs))
def process(self): """ Render an SVG histogram/bar chart using a previous frequency analysis as input. """ self.dataset.update_status("Reading source file") header = self.parameters.get("header", self.options["header"]["default"]) max_posts = 0 # collect post numbers per month intervals = {} for post in self.iterate_csv_items(self.source_file): intervals[post["date"]] = int(post["frequency"]) max_posts = max(max_posts, int(post["frequency"])) if len(intervals) <= 1: self.dataset.update_status("Not enough data available for a histogram; need more than one time series.") self.dataset.finish(0) return self.dataset.update_status("Cleaning up data") (missing, intervals) = pad_interval(intervals) # create histogram self.dataset.update_status("Drawing histogram") # you may change the following four variables to adjust the graph dimensions width = 1024 height = 786 y_margin = 75 x_margin = 50 x_margin_left = x_margin * 2 tick_width = 5 fontsize_normal = int(height / 40) fontsize_small = int(height / 75) # better don't touch the following line_width = round(width / 512) y_margin_top = 150 if header else 50 y_height = height - (y_margin + y_margin_top) x_width = width - (x_margin + x_margin_left) canvas = Drawing(filename=str(self.dataset.get_results_path()), size=(width, height), style="font-family:monospace;font-size:%ipx" % fontsize_normal) # normalize the Y axis to a multiple of a power of 10 magnitude = pow(10, len(str(max_posts)) - 1) # ew max_neat = math.ceil(max_posts / magnitude) * magnitude self.dataset.update_status("Max (normalized): %i (%i) (magnitude: %i)" % (max_posts, max_neat, magnitude)) # draw border canvas.add(Rect( insert=(0, 0), size=(width, height), stroke="#000", stroke_width=line_width, fill="#FFF" )) # draw header on a black background if needed if header: if len(header) > 40: header = header[:37] + "..." header_rect_height = (y_margin_top / 1.5) header_fontsize = (width / len(header)) header_container = SVG(insert=(0, 0), size=(width, header_rect_height)) header_container.add(Rect( insert=(0, 0), size=(width, header_rect_height), fill="#000" )) header_container.add(Text( insert=("50%", "50%"), text=header, dominant_baseline="middle", text_anchor="middle", fill="#FFF", style="font-size:%i" % header_fontsize )) canvas.add(header_container) # horizontal grid lines for i in range(0, 10): offset = (y_height / 10) * i canvas.add(Line( start=(x_margin_left, y_margin_top + offset), end=(width - x_margin, y_margin_top + offset), stroke="#EEE", stroke_width=line_width )) # draw bars item_width = (width - (x_margin + x_margin_left)) / len(intervals) item_height = (height - y_margin - y_margin_top) bar_width = item_width * 0.9 x = x_margin_left + (item_width / 2) - (bar_width / 2) if bar_width >= 8: arc_adjust = max(8, int(item_width / 5)) / 2 else: arc_adjust = 0 for interval in intervals: posts = int(intervals[interval]) bar_height = ((posts / max_neat) * item_height) self.dataset.update_status("%s: %i posts" % (interval, posts)) bar_top = height - y_margin - bar_height bar_bottom = height - y_margin if bar_height == 0: x += item_width continue bar = Path(fill="#000") bar.push("M %f %f" % (x, bar_bottom)) bar.push("L %f %f" % (x, bar_top + (arc_adjust if bar_height > arc_adjust else 0))) if bar_height > arc_adjust > 0: control = (x, bar_top) bar.push("C %f %f %f %f %f %f" % (*control, *control, x + arc_adjust, bar_top)) bar.push("L %f %f" % (x + bar_width - arc_adjust, height - y_margin - bar_height)) if bar_height > arc_adjust > 0: control = (x + bar_width, bar_top) bar.push("C %f %f %f %f %f %f" % (*control, *control, x + bar_width, bar_top + arc_adjust)) bar.push("L %f %f" % (x + bar_width, height - y_margin)) bar.push("Z") canvas.add(bar) x += item_width # draw X and Y axis canvas.add(Line( start=(x_margin_left, height - y_margin), end=(width - x_margin, height - y_margin), stroke="#000", stroke_width=2 )) canvas.add(Line( start=(x_margin_left, y_margin_top), end=(x_margin_left, height - y_margin), stroke="#000", stroke_width=2 )) # draw ticks on Y axis for i in range(0, 10): offset = (y_height / 10) * i canvas.add(Line( start=(x_margin_left - tick_width, y_margin_top + offset), end=(x_margin_left, y_margin_top + offset), stroke="#000", stroke_width=line_width )) # draw ticks on X axis for i in range(0, len(intervals)): offset = (x_width / len(intervals)) * (i + 0.5) canvas.add(Line( start=(x_margin_left + offset, height - y_margin), end=(x_margin_left + offset, height - y_margin + tick_width), stroke="#000", stroke_width=line_width )) # prettify # y labels origin = (x_margin_left / 2) step = y_height / 10 for i in range(0, 11): label = str(int((max_neat / 10) * i)) labelsize = (len(label) * fontsize_normal * 1.25, fontsize_normal) label_x = origin - (tick_width * 2) label_y = height - y_margin - (i * step) - (labelsize[1] / 2) label_container = SVG( insert=(label_x, label_y), size=(x_margin_left / 2, x_margin_left / 5) ) label_container.add(Text( insert=("100%", "50%"), text=label, dominant_baseline="middle", text_anchor="end" )) canvas.add(label_container) # x labels label_width = max(fontsize_small * 6, item_width) label_x = x_margin_left label_y = height - y_margin + (tick_width * 2) next = 0 for interval in intervals: if len(interval) == 7: label = month_abbr[int(interval[5:7])] + "\n" + interval[0:4] elif len(interval) == 10: label = str(int(interval[8:10])) + month_abbr[int(interval[5:7])] + "\n" + interval[0:4] else: label = interval.replace("-", "\n") if label_x > next: shift = 0 for line in label.split("\n"): label_container = SVG( insert=(label_x + (item_width / 2) - (label_width / 2), label_y + (tick_width * 2)), size=(label_width, y_margin), overflow="visible") label_container.add(Text( insert=("50%", "0%"), text=line, dominant_baseline="middle", text_anchor="middle", baseline_shift=-shift )) shift += fontsize_small * 2 canvas.add(label_container) next = label_x + (label_width * 0.9) label_x += item_width # 4cat logo label = "made with 4cat - 4cat.oilab.nl" footersize = (fontsize_small * len(label) * 0.7, fontsize_small * 2) footer = SVG(insert=(width - footersize[0], height - footersize[1]), size=footersize) footer.add(Rect(insert=(0, 0), size=("100%", "100%"), fill="#000")) footer.add(Text( insert=("50%", "50%"), text=label, dominant_baseline="middle", text_anchor="middle", fill="#FFF", style="font-size:%i" % fontsize_small )) canvas.add(footer) canvas.save(pretty=True) self.dataset.update_status("Finished") self.dataset.finish(len(intervals))
def make_svg(self, data): layers = data["layers"] _PROPORTION = 10 _SHIFT_PROP = _PROPORTION / 1.5 for f in self.forms: structures = [None] * len(f.id.split("_")) linkers = [] centers = {} order = {} maxW, maxH = 0, 0 for cx, x in enumerate(f.id.split("_")): order[x] = cx for cl, l in enumerate(layers): for cs, s in enumerate(l): name = f.id + "__" + s["id"] if s["type"] == "H": color = "cornflowerblue" if "ref" not in s else "gainsboro" shape = Circle( center = (s["shift_x"] * _SHIFT_PROP, s["shift_z"] * _SHIFT_PROP), r = 2.3 * _PROPORTION, id = name, fill = color, stroke="black", stroke_width = "2") elif s["type"] == "E": color = "indianred" if "ref" not in s else "salmon" rotate = f.get_ss_by_id(s["id"]).struc.goes_down() shape = Triangle(center = (s["shift_x"] * _SHIFT_PROP, s["shift_z"] * _SHIFT_PROP), rc = 2.3 * _PROPORTION, id = name, fill=color, stroke = "black", stroke_width = "2", rotate = rotate) else: color = "darkgreen" if "ref" not in s else "lightgreen" shape = Cross(center = (s["shift_x"] * _SHIFT_PROP, s["shift_z"] * _SHIFT_PROP), r = 2.3 * _PROPORTION, fill=color, stroke="black", stroke_width = "2", id = name) structures[order[s["id"]]] = shape centers[s["id"]] = [s["shift_x"] * _SHIFT_PROP, s["shift_z"] * _SHIFT_PROP] if s["shift_x"] * _SHIFT_PROP > maxW: maxW = s["shift_x"] * _SHIFT_PROP if s["shift_z"] * _SHIFT_PROP > maxH: maxH = s["shift_z"] * _SHIFT_PROP for cx, x in enumerate(f.id.split("_")): if cx == 0 or cx == len(f.id.split("_")) - 1: init = [0, 0] if (ord(x[0]) - 64) <= len(layers) / 2: init[1] = centers[x][1] - (2.3 * _PROPORTION * 2) else: init[1] = centers[x][1] + (2.3 * _PROPORTION * 2) if int(re.search("(\d+)", x).group(1)) <= len(layers[ord(x[0]) - 65]) / 2: init[0] = centers[x][0] - (2.3 * _PROPORTION * 2) else: init[0] = centers[x][0] + (2.3 * _PROPORTION * 2) if cx == 0: shape = Line(init, centers[x], stroke="darkblue", stroke_width = "4") elif cx == len(f.id.split("_")) - 1: shape = Line(centers[x], init, stroke="darkred", stroke_width = "4") linkers.append(shape) for cy, y in enumerate(f.id.split("_")): if cy == cx + 1: shape = Line(centers[x], centers[y], stroke="black", stroke_width = "4") linkers.append(shape) # Intercalate toplink = [] dowlink = [] if f.sslist[0].struc.goes_up(): dowlink = linkers[0:][::2] toplink = linkers[1:][::2] else: dowlink = linkers[1:][::2] toplink = linkers[0:][::2] g = Group() for x in dowlink: g.add(x) for x in structures: g.add(x) for x in toplink: g.add(x) g.translate(2.3 * _PROPORTION * 3, 2.3 * _PROPORTION * 3) d = Drawing(size = (maxW + ((2.3 * _PROPORTION * 3) * 2), maxH + ((2.3 * _PROPORTION * 3) * 2)), id = f.id + "__image") d.add(g) for x in data["forms"]: if x["id"] == f.id: x["svg"] = d.tostring()
def plot_data(self): if not self.show_legend: self.plot.add( Line(start=(self.margin_left, self.margin_top), end=(self.width - self.margin_right, self.margin_top), stroke_width=1, stroke="black")) self.max_value = max( [x['score'] for x in self.annotated_scores.values()]) if not self.min_value: self.min_value = min( [x['score'] for x in self.annotated_scores.values()]) if not self.show_legend: self.plot.add( Text("Fibroblasts", insert=(self.margin_left, self.margin_top - 5), fill="black", font_size="15")) self.plot.add( Text("Cardiomyocytes", insert=(self.width - self.margin_right - 100, self.margin_top - 5), fill="black", font_size="15")) delta = self.max_value - self.min_value plottable = self.width - (self.margin_left + self.margin_right) spacing = { "Fibroblasts": INIT_GAP, "Cardiomyocytes": INIT_GAP, "Experimental": INIT_GAP, "other": INIT_GAP } for sample, sample_details in self.annotated_scores.items(): position = self.margin_left + ( (sample_details['score'] - self.min_value) / delta * plottable) # colour = "grey" if "category" in sample_details: sample_type = sample_details["category"] if sample_type == "Fibroblasts": colour = "green" elif sample_type == "Cardiomyocytes": colour = "blue" elif sample_type == "Experimental": colour = "red" else: continue # sample_type = "other" else: continue # sample_type = "other" circle_obj = Circle( center=(position, self.margin_top + spacing[sample_type] + 5), r=3, stroke_width=0.1, stroke_linecap='round', stroke_opacity=1, fill=colour, fill_opacity=0.6) # set to 0.2 if you want to show clear. circle_obj.set_desc('{} - {} - {}'.format( sample, sample_type, sample_details["score"] if "description" not in sample_details else sample_details["description"])) self.plot.add(circle_obj) if sample_type == "other": spacing[sample_type] += 3 else: spacing[sample_type] += 6
def test_coordinates(self): line = Line(start=('10cm','11cm'), end=('20cm', '30cm')) self.assertEqual(line.tostring(), '<line x1="10cm" x2="20cm" y1="11cm" y2="30cm" />')
def ruler(scale=1000, length=100, major_interval=10, minor_per_major=10, reverse=False, vertical=False): major_len_mm = 3 mid_len_mm = 2 minor_len_mm = 1 font_size_mm = 2 if reverse: sign = -1 else: sign = 1 g = Group() # Draw major lines mj = 0 while mj < length: # Minor tick marks for i in range(1, minor_per_major): offset_mm = sign * ( (mj + i * major_interval / minor_per_major) / scale) * 1000 if i == minor_per_major / 2: # Half mark if vertical: g.add( Line(((0) * mm, (offset_mm) * mm), ((-mid_len_mm) * mm, (offset_mm) * mm))) else: g.add( Line(((offset_mm) * mm, (0) * mm), ((offset_mm) * mm, (mid_len_mm) * mm))) else: if vertical: g.add( Line(((0) * mm, (offset_mm) * mm), ((-minor_len_mm) * mm, (offset_mm) * mm))) else: g.add( Line(((offset_mm) * mm, (0) * mm), ((offset_mm) * mm, (minor_len_mm) * mm))) mj += major_interval if mj >= length: label = f"{mj}m" else: label = f"{mj}" # Major line offset_mm = sign * (mj / scale) * 1000 if vertical: g.add( Line(((0) * mm, (offset_mm) * mm), ((-major_len_mm) * mm, (offset_mm) * mm))) if offset_mm > (major_len_mm + font_size_mm * 1.5): # only draw label if it wont collide w horizontal ruler g.add( Text( label, x=[(-major_len_mm - 1) * mm], y=[(offset_mm + font_size_mm / 2.5) * mm], text_anchor="end", font_size=font_size_mm * mm, )) else: g.add( Line(((offset_mm) * mm, (0) * mm), ((offset_mm) * mm, (major_len_mm) * mm))) g.add( Text( label, x=[(offset_mm) * mm], y=[(major_len_mm + font_size_mm) * mm], text_anchor="middle", font_size=font_size_mm * mm, )) return g
def get_line(self, value, color, **options): end = self.calc_slice_coord(value, self.r) start = (self.x0, self.y0) line = Line(start, end, stroke=color, stroke_dasharray="4 4") return line.tostring()
def process(self): # parse parameters input_words = self.parameters.get("words", "") if not input_words or not input_words.split(","): self.dataset.update_status( "No input words provided, cannot look for similar words.", is_final=True) self.dataset.finish(0) return input_words = input_words.split(",") try: threshold = float( self.parameters.get("threshold", self.options["threshold"]["default"])) except ValueError: threshold = float(self.options["threshold"]["default"]) threshold = max(-1.0, min(1.0, threshold)) num_words = convert_to_int(self.parameters.get("num-words"), self.options["num-words"]["default"]) overlay = self.parameters.get("overlay") reduction_method = self.parameters.get("method") all_words = self.parameters.get("all-words") # load model files and initialise self.dataset.update_status("Unpacking word embedding models") staging_area = self.unpack_archive_contents(self.source_file) common_vocab = None vector_size = None models = {} # find words that are common to all models self.dataset.update_status("Determining cross-model common vocabulary") for model_file in staging_area.glob("*.model"): if self.interrupted: shutil.rmtree(staging_area) raise ProcessorInterruptedException( "Interrupted while processing word embedding models") model = KeyedVectors.load(str(model_file)).wv models[model_file.stem] = model if vector_size is None: vector_size = model.vector_size # needed later for dimensionality reduction if common_vocab is None: common_vocab = set(model.vocab.keys()) else: common_vocab &= set(model.vocab.keys()) # intersect # sort common vocabulary by combined frequency across all models # this should make filtering for common words a bit faster further down self.dataset.update_status("Sorting vocabulary") common_vocab = list(common_vocab) common_vocab.sort(key=lambda w: sum( [model.vocab[w].count for model in models.values()]), reverse=True) # initial boundaries of 2D space (to be adjusted later based on t-sne # outcome) max_x = 0.0 - sys.float_info.max max_y = 0.0 - sys.float_info.max min_x = sys.float_info.max min_y = sys.float_info.max # for each model, find the words that we may want to plot - these are # the nearest neighbours for the given query words relevant_words = {} # the vectors need to be reduced all at once - but the vectors are # grouped by model. To solve this, keep one numpy array of vectors, # but also keep track of which indexes of this array belong to which # model, by storing the index of the first vector for a model vectors = numpy.empty((0, vector_size)) vector_offsets = {} # now process each model for model_name, model in models.items(): relevant_words[model_name] = set( ) # not a set, since order needs to be preserved self.dataset.update_status("Finding similar words in model '%s'" % model_name) for query in input_words: if query not in model.vocab: self.dataset.update_status( "Query '%s' was not found in model %s; cannot find nearest neighbours." % (query, model_name), is_final=True) self.dataset.finish(0) return if self.interrupted: shutil.rmtree(staging_area) raise ProcessorInterruptedException( "Interrupted while finding similar words") # use a larger sample (topn) than required since some of the # nearest neighbours may not be in the common vocabulary and # will therefore need to be ignored context = set([ word[0] for word in model.most_similar(query, topn=1000) if word[0] in common_vocab and word[1] >= threshold ][:num_words]) relevant_words[model_name] |= { query } | context # always include query word # now do another loop to determine which words to plot for each model # this is either the same as relevant_words, or a superset which # combines all relevant words for all models plottable_words = {} last_model = max(relevant_words.keys()) all_relevant_words = set().union(*relevant_words.values()) for model_name, words in relevant_words.items(): plottable_words[model_name] = [] vector_offsets[model_name] = len(vectors) # determine which words to plot for this model. either the nearest # neighbours for this model, or all nearest neighbours found across # all models words_to_include = all_relevant_words if all_words else relevant_words[ model_name] for word in words_to_include: if word in plottable_words[model_name] or ( not overlay and model_name != last_model and word not in input_words): # only plot each word once per model, or if 'overlay' # is not set, only once overall (for the most recent # model) continue vector = models[model_name][word] plottable_words[model_name].append(word) vectors = numpy.append(vectors, [vector], axis=0) del models # no longer needed # reduce the vectors of all words to be plotted for this model to # a two-dimensional coordinate with the previously initialised tsne # transformer. here the two-dimensional vectors are interpreted as # cartesian coordinates if reduction_method == "PCA": pca = PCA(n_components=2, random_state=0) vectors = pca.fit_transform(vectors) elif reduction_method == "t-SNE": # initialise t-sne transformer # parameters taken from Hamilton et al. # https://github.com/williamleif/histwords/blob/master/viz/common.py tsne = TSNE(n_components=2, random_state=0, learning_rate=150, init="pca") vectors = tsne.fit_transform(vectors) elif reduction_method == "TruncatedSVD": # standard sklearn parameters made explicit svd = TruncatedSVD(n_components=2, algorithm="randomized", n_iter=5, random_state=0) vectors = svd.fit_transform(vectors) else: shutil.rmtree(staging_area) self.dataset.update_status( "Invalid dimensionality reduction technique selected", is_final=True) self.dataset.finish(0) return # also keep track of the boundaries of our 2D space, so we can plot # them properly later for position in vectors: max_x = max(max_x, position[0]) max_y = max(max_y, position[1]) min_x = min(min_x, position[0]) min_y = min(min_y, position[1]) # now we know for each model which words should be plotted and at what # position # with this knowledge, we can normalize the positions, and start # plotting them in a graph # a palette generated with https://medialab.github.io/iwanthue/ colours = [ "#d58eff", "#cf9000", "#3391ff", "#a15700", "#911ca7", "#00ddcb", "#cc25a9", "#d5c776", "#6738a8", "#ff9470", "#47c2ff", "#a4122c", "#00b0ca", "#9a0f76", "#ff70c8", "#713c88" ] colour_index = 0 # make sure all coordinates are positive max_x -= min_x max_y -= min_y # determine graph dimensions and proportions width = 1000 # arbitrary height = width * (max_y / max_x) # retain proportions scale = width / max_x # margin around the plot to give room for labels and to look better margin = width * 0.1 width += 2 * margin height += 2 * margin # normalize all known positions to fit within the graph vectors = [(margin + ((position[0] - min_x) * scale), margin + ((position[1] - min_y) * scale)) for position in vectors] # now all positions are finalised, we can determine the "journey" of # each query - the sequence of positions in the graph it takes, so we # can draw lines from position to position later journeys = {} for query in input_words: journeys[query] = [] for model_name, words in plottable_words.items(): index = words.index(query) journeys[query].append(vectors[vector_offsets[model_name] + index]) # font sizes proportional to width (which is static and thus predictable) fontsize_large = width / 50 fontsize_normal = width / 75 fontsize_small = width / 100 # now we have the dimensions, the canvas can be instantiated model_type = self.source_dataset.parameters.get( "model-type", "word2vec") canvas = get_4cat_canvas( self.dataset.get_results_path(), width, height, header="%s nearest neighbours (fitting: %s) - '%s'" % (model_type, reduction_method, ",".join(input_words)), fontsize_normal=fontsize_normal, fontsize_large=fontsize_large, fontsize_small=fontsize_small) # use colour-coded backgrounds to distinguish the query words in the # graph, each model (= interval) with a separate colour for model_name in plottable_words: solid = Filter(id="solid-%s" % model_name) solid.feFlood(flood_color=colours[colour_index]) solid.feComposite(in_="SourceGraphic") canvas.defs.add(solid) colour_index += 1 # now plot each word for each model self.dataset.update_status("Plotting graph") words = SVG(insert=(0, 0), size=(width, height)) queries = SVG(insert=(0, 0), size=(width, height)) colour_index = 0 for model_name, labels in plottable_words.items(): positions = vectors[ vector_offsets[model_name]:vector_offsets[model_name] + len(labels)] label_index = 0 for position in positions: word = labels[label_index] is_query = word in input_words label_index += 1 filter = ("url(#solid-%s)" % model_name) if is_query else "none" colour = "#FFF" if is_query else colours[colour_index] fontsize = fontsize_normal if is_query else fontsize_small if word in input_words: word += " (" + model_name + ")" label_container = SVG(insert=position, size=(1, 1), overflow="visible") label_container.add( Text(insert=("50%", "50%"), text=word, dominant_baseline="middle", text_anchor="middle", style="fill:%s;font-size:%ipx" % (colour, fontsize), filter=filter)) # we make sure the queries are always rendered on top by # putting them in a separate SVG container if is_query: queries.add(label_container) else: words.add(label_container) colour_index = 0 if colour_index >= len( colours) else colour_index + 1 # plot a line between positions for query words lines = SVG(insert=(0, 0), size=(width, height)) for query, journey in journeys.items(): previous_position = None for position in journey: if previous_position is None: previous_position = position continue lines.add( Line(start=previous_position, end=position, stroke="#CE1B28", stroke_width=2)) previous_position = position canvas.add(lines) canvas.add(words) canvas.add(queries) canvas.save(pretty=True) shutil.rmtree(staging_area) self.dataset.finish(len(journeys))
def test_coordinates(self): line = Line(start=('10cm', '11cm'), end=('20cm', '30cm')) self.assertEqual(line.tostring(), '<line x1="10cm" x2="20cm" y1="11cm" y2="30cm" />')