Exemplo n.º 1
0
        geonames_select.output(),
        SumAggregator(geonames_select.output().schema(), 'count'))
    engines.append(geonames_aggregate)

    select = Select(
        channel,
        UniversalSelect(channel.schema(), {
            'oid': {
                'type': int,
                'args': ['oid'],
                'function': lambda v: v
            },
        }))
    engines.append(select)

    counties_grouper = Group(select.output(), {'oid': lambda a, b: a == b})
    engines.append(counties_grouper)

    joiner = Join(counties_grouper.output(), geonames_aggregate.output())
    engines.append(joiner)
    mux_streams.append(joiner.output())
    # mux_streams.append(counties_select.output())

mux = Mux(*mux_streams)
engines.append(mux)

result_stack = ResultFile(
    'results.txt',
    mux.output(),
)
engines.append(result_stack)
Exemplo n.º 2
0
            'counties.oid': {
                'type': int,
                'args': ['oid'],
                'function': lambda v: v,
            },
            'counties.the_geom': {
                'type': Geometry,
                'args': ['counties.the_geom'],
                'function': lambda v: v,
            },
        }))
engines.append(counties_oid_select)

# Group states by OID
states_group = Group(states_select.output(), {
    'states.oid': lambda a, b: a == b
})
engines.append(states_group)

# Join counties and states
states_counties_join = Join(
    states_group.output(),
    counties_oid_select.output(),
)
engines.append(states_counties_join)

# De-multiplex the joined stream across multiple tracks for better CPU core
# utilization.
demux = Demux(states_counties_join.output())
mux_streams = []
for i in range(tracks):
Exemplo n.º 3
0
data_schema = Schema()
data_schema.append(Attribute('name', str))
data_schema.append(Attribute('age', int))

data_schema.append(Attribute('rowid', int, True))
data_source = DBTable('test.db', 'person', data_schema)

# create a data accessor
data_accessor = DataAccessor(
    query_streamer.output(), 
    data_source,
    FindRange
)

query_grouper = Group(
    query_streamer.output(), 
    {'age': lambda a, b: a is b}
)

qselect = Select(
    query_grouper.output(), 
    AttributeRename(
        query_grouper.output().schema(),
        { 'age': 'age_range' }
    )
)

aggregate = Aggregate(
    data_accessor.output(),
    SumAgeAggregator(data_accessor.output().schema())
)
Exemplo n.º 4
0
data_schema.append(Attribute('age', int))

data_schema.append(Attribute('rowid', int, True))
data_source = DBTable('test.db', 'person', data_schema)

# create a data accessor
data_accessor = DataAccessor(
    query_streamer.output(), 
    data_source,
    FindRange
)
name_age_combiner = NameAgeCombiner(data_accessor.output().schema())
select = Select(data_accessor.output(), name_age_combiner)

query_grouper = Group(
    query_streamer.output(), 
    {'age': lambda a, b: a is b}
)

joiner = Join(query_grouper.output(), select.output())
filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema()))

result_stack = ResultStack(
    filter.output(),
#    joiner.output(),
#    query_streamer.output(),
#    query_grouper.output(),
#    select.output(),
)

info_queue = Queue()
Exemplo n.º 5
0
# The query stream contains only a single query.
query_streamer = ArrayStreamer(query_schema, [
    (IntInterval(0, int(1E10)), ),
])
engines.append(query_streamer)

# Create a family data source: a table in the input database.
family_source = DBTable(input_file, 'family', family_schema)
# Data accessor for the species data source.
family_accessor = DataAccessor(query_streamer.output(), family_source,
                               FindRange)
engines.append(family_accessor)

# A group mini-engine to split the family IDs into groups.
family_id_grouper = Group(family_accessor.output(), {
    'family.id': lambda a, b: a == b
})
engines.append(family_id_grouper)

# Select only the family ID for querying genera.
family_id_select = Select(
    family_accessor.output(),
    UniversalSelect(
        family_accessor.output().schema(), {
            'genus.family_id': {
                'type': int,
                'args': ['family.id'],
                'function': lambda v: v
            }
        }))
engines.append(family_id_select)
Exemplo n.º 6
0
])

# schema definition of the data stream
data_schema = Schema()
data_schema.append(Attribute('name', str))
data_schema.append(Attribute('age', int))

data_schema.append(Attribute('rowid', int, True))
data_source = DBTable('test.db', 'person', data_schema)

# create a data accessor
data_accessor = DataAccessor(query_streamer.output(), data_source, FindRange)
name_age_combiner = NameAgeCombiner(data_accessor.output().schema())
select = Select(data_accessor.output(), name_age_combiner)

query_grouper = Group(query_streamer.output(), {'age': lambda a, b: a is b})

joiner = Join(query_grouper.output(), select.output())
filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema()))

result_stack = ResultStack(
    filter.output(),
    #    joiner.output(),
    #    query_streamer.output(),
    #    query_grouper.output(),
    #    select.output(),
)

info_queue = Queue()

Exemplo n.º 7
0
        UniversalSelect(
            geonames_accessor.output().schema(),
            {"count": {"type": int, "args": ["geonames.location"], "function": lambda v: 1}},
        ),
    )
    engines.append(geonames_select)

    geonames_aggregate = Aggregate(geonames_select.output(), SumAggregator(geonames_select.output().schema(), "count"))
    engines.append(geonames_aggregate)

    select = Select(
        channel, UniversalSelect(channel.schema(), {"oid": {"type": int, "args": ["oid"], "function": lambda v: v}})
    )
    engines.append(select)

    counties_grouper = Group(select.output(), {"oid": lambda a, b: a == b})
    engines.append(counties_grouper)

    joiner = Join(counties_grouper.output(), geonames_aggregate.output())
    engines.append(joiner)
    mux_streams.append(joiner.output())
    # mux_streams.append(counties_select.output())

mux = Mux(*mux_streams)
engines.append(mux)

result_stack = ResultFile("results.txt", mux.output())
engines.append(result_stack)

# result_stack = ResultStack(
#    mux.output(),
Exemplo n.º 8
0
])
engines.append(query_streamer)

# Create a family data source: a table in the input database.
family_source = DBTable(input_file, 'family', family_schema)
# Data accessor for the species data source.
family_accessor = DataAccessor(
    query_streamer.output(), 
    family_source,
    FindRange
)
engines.append(family_accessor)

# A group mini-engine to split the family IDs into groups.
family_id_grouper = Group(
    family_accessor.output(), 
    {'family.id': lambda a, b: a == b}
)
engines.append(family_id_grouper)

# Select only the family ID for querying genera.
family_id_select = Select(
    family_accessor.output(),
    UniversalSelect(
        family_accessor.output().schema(),
        {
            'genus.family_id': {
                'type': int,
                'args': ['family.id'],
                'function': lambda v: v
            }
        }
Exemplo n.º 9
0
                'args': ['states.oid'],
                'function': lambda v: v,
            },
            'states.geom': {
                'type': Geometry,
                'args': ['queries.geom', 'states.geom'],
                'function': lambda a, b: intersection(a, b),
            }
        }
    )
)
engines.append(states_trim)

states_group = Group(
    states_trim.output(),
    {
        'states.oid': lambda a, b: a == b,
    }
)
engines.append(states_group)

#############################################################
#
# Counties
#
#############################################################

counties_query = Select(
    states_trim.output(),
    UniversalSelect(
        states_trim.output().schema(),
        {
Exemplo n.º 10
0
    zip_accessor = DataAccessor(select.output(), zip_source, FindRange)
    sub_schema2 = SubSchema(zip_accessor.output().schema(), {'oid': 'zip'})
    zip_select = Select(zip_accessor.output(), sub_schema2)
    zip_selects.append(zip_select)
    sort = Sort(
        zip_select.output(),
        {
            'zip': lambda a, b: cmp(a / 100, b / 100)
        },
        # {'zip': None },
        # True
    )
    sorts.append(sort)
    group = Group(sort.output(), {
        'zip': lambda a, b: (a / 1000) == (b / 1000)
    }
                  # {'zip': None }
                  )
    groups.append(group)
    data_accessors.append(zip_accessor)

mux = Mux(*[s.output() for s in groups])

result_stack = ResultStack(
    #    query_streamer.output(),
    mux.output(),
    #    data_accessor.output(),
)

tasks = []
Exemplo n.º 11
0
        UniversalSelect(
            plants_filter.output().schema(), {
                'plants.height': {
                    'type': int,
                    'args': ['plants.height'],
                    'function': lambda v: v
                }
            }))
    engines.append(plants_height_select)

    plants_height_aggregate = Aggregate(
        plants_height_select.output(),
        MaxHeightAggregator(plants_height_select.output().schema()))
    engines.append(plants_height_aggregate)

    species_id_grouper = Group(channel, {'species.id': lambda a, b: a == b})
    engines.append(species_id_grouper)

    joiner = Join(species_id_grouper.output(),
                  plants_height_aggregate.output())
    engines.append(joiner)
    mux_streams.append(joiner.output())

mux = Mux(*mux_streams)

result_stack = ResultFile(
    'results.txt',
    mux.output(),
)

info_queue = Queue()
Exemplo n.º 12
0
                'args': ['oid'],
                'function': lambda v: v,
            },
            'counties.the_geom': {
                'type': Geometry,
                'args': ['counties.the_geom'],
                'function': lambda v: v,
            },
        }
    )
)
engines.append(counties_oid_select)

# Group states by OID
states_group = Group(
    states_select.output(), 
    {'states.oid': lambda a, b: a == b}
)
engines.append(states_group)

# Join counties and states
states_counties_join = Join(
    states_group.output(),
    counties_oid_select.output(),
)
engines.append(states_counties_join)

# De-multiplex the joined stream across multiple tracks for better CPU core
# utilization.
demux = Demux(states_counties_join.output())
mux_streams = []
for i in range(tracks):
Exemplo n.º 13
0
        states_join.output().schema(), {
            'states.oid': {
                'type': int,
                'args': ['states.oid'],
                'function': lambda v: v,
            },
            'states.geom': {
                'type': Geometry,
                'args': ['queries.geom', 'states.geom'],
                'function': lambda a, b: intersection(a, b),
            }
        }))
engines.append(states_trim)

states_group = Group(states_trim.output(), {
    'states.oid': lambda a, b: a == b,
})
engines.append(states_group)

#############################################################
#
# Counties
#
#############################################################

counties_query = Select(
    states_trim.output(),
    UniversalSelect(
        states_trim.output().schema(), {
            'counties.geom': {
                'type': Geometry,
Exemplo n.º 14
0
                    'args': ['plants.height'],
                    'function': lambda v: v
                }
            }
        )
    )
    engines.append(plants_height_select)

    plants_height_aggregate = Aggregate(
        plants_height_select.output(),
        MaxHeightAggregator(plants_height_select.output().schema())
    )
    engines.append(plants_height_aggregate)

    species_id_grouper = Group(
        channel, 
        {'species.id': lambda a, b: a == b}
    )
    engines.append(species_id_grouper)

    joiner = Join(species_id_grouper.output(), plants_height_aggregate.output())
    engines.append(joiner)
    mux_streams.append(joiner.output())

mux = Mux(*mux_streams)

result_stack = ResultFile(
    'results.txt',
    mux.output(),
)

info_queue = Queue()