select = Select( channel, UniversalSelect(channel.schema(), { 'oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v }, })) engines.append(select) counties_grouper = Group(select.output(), {'oid': lambda a, b: a == b}) engines.append(counties_grouper) joiner = Join(counties_grouper.output(), geonames_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) # mux_streams.append(counties_select.output()) mux = Mux(*mux_streams) engines.append(mux) result_stack = ResultFile( 'results.txt', mux.output(), ) engines.append(result_stack) #result_stack = ResultStack( # mux.output(),
'type': Geometry, 'args': ['counties.the_geom'], 'function': lambda v: v, }, })) engines.append(counties_oid_select) # Group states by OID states_group = Group(states_select.output(), { 'states.oid': lambda a, b: a == b }) engines.append(states_group) # Join counties and states states_counties_join = Join( states_group.output(), counties_oid_select.output(), ) engines.append(states_counties_join) # De-multiplex the joined stream across multiple tracks for better CPU core # utilization. demux = Demux(states_counties_join.output()) mux_streams = [] for i in range(tracks): channel = demux.channel() # To query the locations in the geonames layer, trim the counties to # the state and query boundary. counties_select = Select( channel, UniversalSelect(
aselect = Select( aggregate.output(), UniversalSelect( aggregate.output().schema(), { 'name_age': { 'type': str, 'args': ['name', 'age'], 'function': lambda name, age: '%s --> %d' % (name, age), } } ) ) joiner = Join(qselect.output(), aselect.output()) result_stack = ResultStack( # aggregate.output(), joiner.output(), # query_streamer.output(), # query_grouper.output(), # select.output(), ) info_queue = Queue() def manage(task): print 'Running: ' + str(task) task.run()
# create a data accessor data_accessor = DataAccessor( query_streamer.output(), data_source, FindRange ) name_age_combiner = NameAgeCombiner(data_accessor.output().schema()) select = Select(data_accessor.output(), name_age_combiner) query_grouper = Group( query_streamer.output(), {'age': lambda a, b: a is b} ) joiner = Join(query_grouper.output(), select.output()) filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema())) result_stack = ResultStack( filter.output(), # joiner.output(), # query_streamer.output(), # query_grouper.output(), # select.output(), ) info_queue = Queue() def manage(task): print 'Running: ' + str(task) task.run()
'function': lambda v: v } })) engines.append(family_id_select) # Data source for the genera. genus_source = DBTable(input_file, 'genus', genus_schema) # Data accessor for the genera data source. genus_accessor = DataAccessor(family_id_select.output(), genus_source, FindIdentities) engines.append(genus_accessor) # A join mini-engine to associate families with genera. family_genus_joiner = Join( family_id_grouper.output(), genus_accessor.output(), ) engines.append(family_genus_joiner) # A group mini-engine to split the (family, genus) IDs into groups. family_genus_id_grouper = Group( family_genus_joiner.output(), { 'family.id': lambda a, b: a == b, 'genus.id': lambda a, b: a == b }, ) engines.append(family_genus_id_grouper) # Select only the genus ID for querying species. genus_id_select = Select(
# schema definition of the data stream data_schema = Schema() data_schema.append(Attribute('name', str)) data_schema.append(Attribute('age', int)) data_schema.append(Attribute('rowid', int, True)) data_source = DBTable('test.db', 'person', data_schema) # create a data accessor data_accessor = DataAccessor(query_streamer.output(), data_source, FindRange) name_age_combiner = NameAgeCombiner(data_accessor.output().schema()) select = Select(data_accessor.output(), name_age_combiner) query_grouper = Group(query_streamer.output(), {'age': lambda a, b: a is b}) joiner = Join(query_grouper.output(), select.output()) filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema())) result_stack = ResultStack( filter.output(), # joiner.output(), # query_streamer.output(), # query_grouper.output(), # select.output(), ) info_queue = Queue() def manage(task): print 'Running: ' + str(task)
), ) engines.append(geonames_select) geonames_aggregate = Aggregate(geonames_select.output(), SumAggregator(geonames_select.output().schema(), "count")) engines.append(geonames_aggregate) select = Select( channel, UniversalSelect(channel.schema(), {"oid": {"type": int, "args": ["oid"], "function": lambda v: v}}) ) engines.append(select) counties_grouper = Group(select.output(), {"oid": lambda a, b: a == b}) engines.append(counties_grouper) joiner = Join(counties_grouper.output(), geonames_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) # mux_streams.append(counties_select.output()) mux = Mux(*mux_streams) engines.append(mux) result_stack = ResultFile("results.txt", mux.output()) engines.append(result_stack) # result_stack = ResultStack( # mux.output(), # ) # engines.append(result_stack)
'type': int, 'args': ['oid'], 'function': lambda v: v, }, 'states.geom': { 'type': Geometry, 'args': ['states.geom'], 'function': lambda v: v, } } ) ) engines.append(states_select) states_join = Join( query_streamer.output(), states_select.output() ) engines.append(states_join) states_trim = Select( states_join.output(), UniversalSelect( states_join.output().schema(), { 'states.oid': { 'type': int, 'args': ['states.oid'], 'function': lambda v: v, }, 'states.geom': { 'type': Geometry,
# Data source for the genera. genus_source = DBTable(input_file, 'genus', genus_schema) # Data accessor for the genera data source. genus_accessor = DataAccessor( family_id_select.output(), genus_source, FindIdentities ) engines.append(genus_accessor) # A join mini-engine to associate families with genera. family_genus_joiner = Join( family_id_grouper.output(), genus_accessor.output(), ) engines.append(family_genus_joiner) # A group mini-engine to split the (family, genus) IDs into groups. family_genus_id_grouper = Group( family_genus_joiner.output(), { 'family.id': lambda a, b: a == b, 'genus.id': lambda a, b: a == b }, ) engines.append(family_genus_id_grouper)
'type': int, 'args': ['plants.height'], 'function': lambda v: v } })) engines.append(plants_height_select) plants_height_aggregate = Aggregate( plants_height_select.output(), MaxHeightAggregator(plants_height_select.output().schema())) engines.append(plants_height_aggregate) species_id_grouper = Group(channel, {'species.id': lambda a, b: a == b}) engines.append(species_id_grouper) joiner = Join(species_id_grouper.output(), plants_height_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) mux = Mux(*mux_streams) result_stack = ResultFile( 'results.txt', mux.output(), ) info_queue = Queue() def manage(task): print 'Running: ' + str(task)
}, } ) ) engines.append(counties_oid_select) # Group states by OID states_group = Group( states_select.output(), {'states.oid': lambda a, b: a == b} ) engines.append(states_group) # Join counties and states states_counties_join = Join( states_group.output(), counties_oid_select.output(), ) engines.append(states_counties_join) # De-multiplex the joined stream across multiple tracks for better CPU core # utilization. demux = Demux(states_counties_join.output()) mux_streams = [] for i in range(tracks): channel = demux.channel() # To query the locations in the geonames layer, trim the counties to # the state and query boundary. counties_select = Select( channel, UniversalSelect(
UniversalSelect( states_accessor.output().schema(), { 'states.oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v, }, 'states.geom': { 'type': Geometry, 'args': ['states.geom'], 'function': lambda v: v, } })) engines.append(states_select) states_join = Join(query_streamer.output(), states_select.output()) engines.append(states_join) states_trim = Select( states_join.output(), UniversalSelect( states_join.output().schema(), { 'states.oid': { 'type': int, 'args': ['states.oid'], 'function': lambda v: v, }, 'states.geom': { 'type': Geometry, 'args': ['queries.geom', 'states.geom'], 'function': lambda a, b: intersection(a, b),