def transform(self, corpus: Corpus): ''' compiles a list of all utterances by each user, organized by conversation; also annotates user with summary statistics. :param corpus: the Corpus to transform. :type corpus: Corpus ''' user_to_convo_utts = defaultdict(lambda: defaultdict(list)) for utterance in corpus.iter_utterances(): if not self.utterance_filter(utterance): continue user_to_convo_utts[utterance.user.name][utterance.root].append( (utterance.id, utterance.timestamp)) for user, convo_utts in user_to_convo_utts.items(): user_convos = {} for convo, utts in convo_utts.items(): sorted_utts = sorted(utts, key=lambda x: x[1]) user_convos[convo] = { 'utterance_ids': [x[0] for x in sorted_utts], 'start_time': sorted_utts[0][1], 'n_utterances': len(sorted_utts) } corpus.get_user(user).add_meta('conversations', user_convos) for user in corpus.iter_users(): if 'conversations' not in user.meta: continue user.add_meta('n_convos', len(user.meta['conversations'])) sorted_convos = sorted(user.meta['conversations'].items(), key=lambda x: x[1]['start_time']) user.add_meta('start_time', sorted_convos[0][1]['start_time']) for idx, (convo_id, _) in enumerate(sorted_convos): user.meta['conversations'][convo_id]['idx'] = idx return corpus
def transform(self, corpus: Corpus): ''' creates and populates user, convo aggregates. :param corpus: the Corpus to transform. :type corpus: Corpus ''' for user in corpus.iter_users(): if 'conversations' not in user.meta: continue for convo_id, convo in user.meta['conversations'].items(): utterance_attrs = [corpus.get_utterance(utt_id).meta[self.attr_name] for utt_id in convo['utterance_ids']] user.meta['conversations'][convo_id][self.attr_name] = self.agg_fn(utterance_attrs) return corpus
def transform(self, corpus: Corpus): ''' creates and populates user, convo aggregates. :param corpus: the Corpus to transform. :type corpus: Corpus ''' for user in corpus.iter_users(): if 'conversations' not in user.meta: continue for convo_id, convo in user.meta['conversations'].items(): if self.recompute or (corpus.get_user_convo_info( user.name, convo_id, self.output_field) is None): utterance_attrs = [ corpus.get_utterance(utt_id).meta[self.attr_name] for utt_id in convo['utterance_ids'] ] corpus.set_user_convo_info(user.name, convo_id, self.output_field, self.agg_fn(utterance_attrs)) return corpus