def index_narrative_elements(self, meta): "Stores acts and key scenes in redis" for noun in ('act', 'key_scene'): # Sort by element['range'][0] before adding to redis narrative_elements = meta.get('%ss' % noun, []) narrative_elements_sorted = sorted( narrative_elements, key=lambda element: element['range'][0]) for i, data in enumerate(narrative_elements_sorted): key = "%s:%s:%i" % (noun, self.mission_name, i) self.redis_conn.rpush( "%ss:%s" % (noun, self.mission_name), "%s:%i" % (self.mission_name, i), ) data['start'], data['end'] = map(mission_time_to_timestamp, data['range']) del data['range'] self.redis_conn.hmset(key, data) # if no acts at all, make one that includes everything from before Vostok 1 until after now # do this before we link key scenes, so we can have them without having to specify acts if len(list(Act.Query(self.redis_conn, self.mission_name))) == 0: key = "act:%s:0" % (self.mission_name, ) title = meta.get('copy', {}).get('title', None) if title is None: title = meta.get('name', u'The Mission') else: title = json.loads(title) data = { 'title': title, 'description': '', 'start': -300000000, # Vostok 1 launch was -275248380 'end': int(time.time()) + 86400 * 365 # so we can have acts ending up to a year in the future } self.redis_conn.rpush( "acts:%s" % (self.mission_name, ), "%s:0" % (self.mission_name, ), ) self.redis_conn.hmset(key, data) # Link key scenes and acts for act in Act.Query(self.redis_conn, self.mission_name): for key_scene in KeyScene.Query(self.redis_conn, self.mission_name): if act.includes(key_scene.start): self.redis_conn.rpush( 'act:%s:%s:key_scenes' % (self.mission_name, act.number), '%s:%s' % (self.mission_name, key_scene.number), )
def get_context_data(self): acts = [ (x+1, act) for x, act in enumerate(Act.Query(self.request.redis_conn, self.request.mission.name)) ] return { "acts": acts, "quote": self.get_quote(), }
def get_context_data(self, phase_number='1'): try: selected_act = Act(self.request.redis_conn, self.request.mission.name, int(phase_number) - 1) except KeyError: raise Http404('Phase %s not found' % phase_number) return { 'acts': list(self.act_query()), 'act': selected_act, }
def get_context_data(self): if self.request.mission.memorial: people, more_people = mission_people(self.request) return { 'people': [group for group in people if group['view'] == 'full'], } acts = [(x + 1, act) for x, act in enumerate( Act.Query(self.request.redis_conn, self.request.mission.name))] return { "acts": acts, "quote": self.get_quote(), }
def index(self): current_labels = {} current_transcript_page = None current_page = 1 current_page_lines = 0 current_lang = None last_act = None previous_log_line_id = None previous_timestamp = None launch_time = int( self.redis_conn.hget("mission:%s" % self.mission_name, "utc_launch_time")) acts = list(Act.Query(self.redis_conn, self.mission_name)) key_scenes = list(KeyScene.Query(self.redis_conn, self.mission_name)) glossary_items = dict([ (item.identifier.lower(), item) for item in Glossary.Query(self.redis_conn, self.mission_name) ]) for chunk in self.parser.get_chunks(): timestamp = chunk['timestamp'] log_line_id = "%s:%i" % (self.transcript_name, timestamp) if timestamp <= previous_timestamp: raise Exception, "%s should be after %s" % ( seconds_to_timestamp(timestamp), seconds_to_timestamp(previous_timestamp)) # See if there's transcript page info, and update it if so if chunk['meta'].get('_page', 0): current_transcript_page = int(chunk["meta"]['_page']) if chunk['meta'].get('_lang', None): current_lang = chunk['meta']['_lang'] if current_transcript_page: self.redis_conn.set("log_line:%s:page" % log_line_id, current_transcript_page) # Look up the act for act in acts: if act.includes(timestamp): break else: print "Error: No act for timestamp %s" % seconds_to_timestamp( timestamp) continue # If we've filled up the current page, go to a new one if current_page_lines >= self.LINES_PER_PAGE or ( last_act is not None and last_act != act): current_page += 1 current_page_lines = 0 last_act = act # First, create a record with some useful information info_key = "log_line:%s:info" % log_line_id info_record = { "offset": chunk['offset'], "page": current_page, "act": act.number, "utc_time": launch_time + timestamp, } if current_transcript_page: info_record["transcript_page"] = current_transcript_page if current_lang: info_record["lang"] = current_lang # And an editorial note if present if '_note' in chunk['meta']: info_record["note"] = chunk['meta']['_note'] self.redis_conn.hmset( info_key, info_record, ) # Look up the key scene for key_scene in key_scenes: if key_scene.includes(timestamp): self.redis_conn.hset(info_key, 'key_scene', key_scene.number) break # Create the doubly-linked list structure if previous_log_line_id: self.redis_conn.hset( info_key, "previous", previous_log_line_id, ) self.redis_conn.hset( "log_line:%s:info" % previous_log_line_id, "next", log_line_id, ) previous_log_line_id = log_line_id previous_timestamp = timestamp # Also store the text text = u"" for line in chunk['lines']: self.redis_conn.rpush( "log_line:%s:lines" % log_line_id, u"%(speaker)s: %(text)s" % line, ) text += "%s %s" % (line['speaker'], line['text']) # Store any images for i, image in enumerate(chunk['meta'].get("_images", [])): # Make the image id image_id = "%s:%s" % (log_line_id, i) # Push it onto the images list self.redis_conn.rpush( "log_line:%s:images" % log_line_id, image_id, ) # Store the image data self.redis_conn.hmset( "image:%s" % image_id, image, ) # Add that logline ID for the people involved speakers = set([line['speaker'] for line in chunk['lines']]) for speaker in speakers: self.redis_conn.sadd("speaker:%s" % speaker, log_line_id) # Add it to the index for this page self.redis_conn.rpush( "page:%s:%i" % (self.transcript_name, current_page), log_line_id) # Add it to the index for this transcript page self.redis_conn.rpush( "transcript_page:%s:%s" % (self.transcript_name, current_transcript_page), log_line_id) # Add it into the transcript and everything sets self.redis_conn.zadd("log_lines:%s" % self.mission_name, log_line_id, chunk['timestamp']) self.redis_conn.zadd("transcript:%s" % self.transcript_name, log_line_id, chunk['timestamp']) # Read the new labels into current_labels has_labels = False if '_labels' in chunk['meta']: for label, endpoint in chunk['meta']['_labels'].items(): if endpoint is not None and label not in current_labels: current_labels[label] = endpoint elif label in current_labels: current_labels[label] = max(current_labels[label], endpoint) elif endpoint is None: self.redis_conn.sadd("label:%s" % label, log_line_id) has_labels = True # Expire any old labels for label, endpoint in current_labels.items(): if endpoint < chunk['timestamp']: del current_labels[label] # Apply any surviving labels for label in current_labels: self.redis_conn.sadd("label:%s" % label, log_line_id) has_labels = True # And add this logline to search index if has_labels: print "weight = 3 for %s" % log_line_id weight = 3.0 # magic! else: weight = 1.0 self.add_to_search_index( mission=self.mission_name, id=log_line_id, chunk=chunk, weight=weight, timestamp=timestamp, ) # For any mentioned glossary terms, add to them. for word in text.split(): word = word.strip(",;-:'\"").lower() if word in glossary_items: glossary_item = glossary_items[word] self.redis_conn.hincrby( "glossary:%s" % glossary_item.id, "times_mentioned", 1, ) # Increment the number of log lines we've done current_page_lines += len(chunk['lines']) pages_set = self.redis_conn.hexists("pages:%s" % self.mission_name, self.transcript_name) if not pages_set and current_transcript_page: print "%s original pages: %d" % (self.transcript_name, current_transcript_page) self.redis_conn.hset("pages:%s" % self.mission_name, self.transcript_name, current_transcript_page)
def build_mission(self, mission): print "Building data visualisations for %s..." % mission.name for act in list(Act.Query(self.redis_conn, mission.name)): print ' ... %s' % act.title # Split the act into sections, one for each bar on the graph act_duration = act.end - act.start section_duration = act_duration // 92 # Count the number of log lines in each segment # and find the maximum number of log lines in a segment t = act.start segment_line_counts = [] max_line_count = 0 real_output_path = self.image_output_path % mission.name while t < act.end: # Load log lines for this segment query = LogLine.Query(self.redis_conn, mission.name).transcript( mission.main_transcript).range( t, t + section_duration) line_count = len(list(query)) # Store segment stats max_line_count = max(line_count, max_line_count) segment_line_counts.append( (t, t + section_duration, line_count)) t += section_duration # Make sure we have an output directory and work out where to # write the image try: os.makedirs(real_output_path) except OSError: pass graph_file = 'graph_%s_%s.png' % (mission.name, act.number) output_path = '%s/%s' % (real_output_path, graph_file) # Add initial draw command draw_commands = [ 'convert', '-size', '%dx%d' % (self.width, self.height), 'xc:transparent', '-fill', self.end_marker_colour, '-draw', "path 'M 1,1 L 10,1 L 5,8 L 1,1", '-draw', "path 'M 890,1 L 900,1 L 895,8 L 890,1", '-fill', self.graph_bar_colour, ] # Add initial image map tags image_map_id = '%s_%s_frequency_graph' % (mission.name, act.number) image_map = [ '<map id="%s" name="%s">' % (image_map_id, image_map_id) ] # Iterate over the segments and add them to the draw commands and image map for i, line in enumerate(segment_line_counts): start, end, count = line height = int( round(count / float(max(max_line_count, 1)) * self.max_bar_height)) bar_width = 6 bar_spacing = 4 top_left_x = i * (bar_width + bar_spacing) + 2 top_left_y = self.max_bar_height - height + 14 bottom_right_x = top_left_x + bar_width bottom_right_y = self.max_bar_height + 14 draw_commands.append('-draw') draw_commands.append( 'rectangle %s,%s,%s,%s' % (top_left_x, top_left_y, bottom_right_x, bottom_right_y)) if height > 0: image_map.append( '<area shape="rect" coords="%(coords)s" href="%(url)s" alt="%(alt)s">' % { "url": '/%s/%s/#show-selection' % (seconds_to_timestamp(start), seconds_to_timestamp(end)), "alt": '%d lines between %s and %s' % (count, seconds_to_timestamp(start), seconds_to_timestamp(end)), "coords": '%s,%s,%s,%s' % (top_left_x, top_left_y, bottom_right_x, bottom_right_y), }) # Output the basic graph image draw_commands.append(output_path) subprocess.call(draw_commands) # Iterate over the key scenes adding them to the graph and image map for i, key_scene in enumerate(act.key_scenes()): print ' - %s' % key_scene.title top_left_x = int( (self.graph_background_width / float(act_duration)) * (key_scene.start - act.start)) + 2 top_left_y = self.max_bar_height + 5 + 14 bottom_right_x = top_left_x + 20 bottom_right_y = top_left_y + 20 marker_image = self.key_scene_marker_files % (i + 1) subprocess.call([ 'composite', '-geometry', '+%s+%s' % (top_left_x, top_left_y), marker_image, output_path, output_path, ]) image_map.append( '<area shape="rect" coords="%(coords)s" href="%(url)s" alt="%(alt)s">' % { "url": '/%s/%s/#show-selection' % (seconds_to_timestamp(key_scene.start), seconds_to_timestamp(key_scene.end)), "alt": key_scene.title.decode('utf-8'), "coords": '%s,%s,%s,%s' % (top_left_x, top_left_y, bottom_right_x, bottom_right_y), }) # Finalise the image map image_map.append('</map>') self.redis_conn.hmset( 'act:%s:%s:stats' % (mission.name, act.number), { "image_map": "\n".join(image_map), "image_map_id": image_map_id, })
def act_query(self): return Act.Query(self.request.redis_conn, self.request.mission.name)