Exemplo n.º 1
0
    def add_articles_chain(self, graph, records, serialize=True):
        '''Add transformation of article records to the bonobo pipeline.'''
        articles = graph.add_chain(ExtractKeyedValue(key='record'),
                                   ModelArticle(helper=self.helper),
                                   _input=records.output)

        people = graph.add_chain(ExtractKeyedValues(key='_people'),
                                 _input=articles.output)
        activities = graph.add_chain(ExtractKeyedValues(key='_activities'),
                                     _input=articles.output)
        groups = graph.add_chain(ExtractKeyedValues(key='_groups'),
                                 _input=articles.output)

        if serialize:
            # write ARTICLES data
            self.add_places_chain(graph,
                                  articles,
                                  key='_places',
                                  serialize=True)
            self.add_serialization_chain(graph,
                                         articles.output,
                                         model=self.models['LinguisticObject'])
            self.add_serialization_chain(graph,
                                         groups.output,
                                         model=self.models['Group'])
            self.add_serialization_chain(graph,
                                         activities.output,
                                         model=self.models['Activity'])
            _ = self.add_person_or_group_chain(graph, people, serialize=True)
        return articles
Exemplo n.º 2
0
	def add_series_chain(self, graph, records, serialize=True):
		series = graph.add_chain(
			ExtractKeyedValue(key='record'),
			ModelSeries(helper=self.helper),
			_input=records.output
		)

		activities = graph.add_chain(ExtractKeyedValues(key='_activities'), _input=series.output)
		texts = graph.add_chain(ExtractKeyedValues(key='_texts'), _input=series.output)

		if serialize:
			self.add_serialization_chain(graph, activities.output, model=self.models['Activity'])
			self.add_serialization_chain(graph, texts.output, model=self.models['LinguisticObject'])
			self.add_serialization_chain(graph, series.output, model=self.models['LinguisticObject'])
		return series
Exemplo n.º 3
0
    def add_person_or_group_chain(self,
                                  graph,
                                  input,
                                  key=None,
                                  serialize=True):
        '''Add extraction and serialization of people and groups.'''
        if key:
            extracted = graph.add_chain(ExtractKeyedValues(key=key),
                                        _input=input.output)
        else:
            extracted = input

        people = graph.add_chain(OnlyRecordsOfType(type=model.Person),
                                 _input=extracted.output)
        groups = graph.add_chain(OnlyRecordsOfType(type=model.Group),
                                 _input=extracted.output)
        if serialize:
            # write OBJECTS data
            self.add_serialization_chain(graph,
                                         people.output,
                                         model=self.models['Person'])
            self.add_serialization_chain(graph,
                                         groups.output,
                                         model=self.models['Group'])
        return people
Exemplo n.º 4
0
	def add_geog_chain(self, graph, records, serialize=True):
		places = graph.add_chain(
			ExtractKeyedValue(key='record'),
			ModelPlace(helper=self.helper),
			_input=records.output
		)

		activities = graph.add_chain(ExtractKeyedValues(key='_activities'), _input=places.output)

		if serialize:
			self.add_serialization_chain(graph, activities.output, model=self.models['Activity'])
			self.add_places_chain(graph, places, key=None, serialize=True)
			self.add_serialization_chain(graph, places.output, model=self.models['Place'])
		return places
Exemplo n.º 5
0
	def add_corp_chain(self, graph, records, serialize=True):
		corps = graph.add_chain(
			ExtractKeyedValue(key='record'),
			ModelCorp(helper=self.helper),
			_input=records.output
		)

		activities = graph.add_chain(ExtractKeyedValues(key='_activities'), _input=corps.output)

		if serialize:
			self.add_places_chain(graph, corps, key='_places', serialize=True)
			self.add_serialization_chain(graph, activities.output, model=self.models['Activity'])
			self.add_serialization_chain(graph, corps.output, model=self.models['Group'])
		return corps
Exemplo n.º 6
0
 def add_places_chain(self,
                      graph,
                      auction_events,
                      key='_locations',
                      serialize=True):
     '''Add extraction and serialization of locations.'''
     nodes = []
     if key:
         nodes.append(ExtractKeyedValues(key=key))
     nodes.append(RecursiveExtractKeyedValue(key='part_of'))
     places = graph.add_chain(*nodes, _input=auction_events.output)
     if serialize:
         # write OBJECTS data
         self.add_serialization_chain(graph,
                                      places.output,
                                      model=self.models['Place'])
     return places