Пример #1
0
    def test_merge(self):
        node = brewery.nodes.MergeNode()
        self.create_distinct_sample()

        input2 = brewery.streams.SimpleDataPipe()
        input2.fields = brewery.fieldlist(["type2", "name"])
        input2.put(["a", "apple"])
        input2.put(["b", "bananna"])
        input2.put(["c", "curry"])
        input2.put(["d", "dynamite"])

        input_len = len(self.input.buffer)

        node.inputs = [self.input, input2]
        node.outputs = [self.output]

        node.joins = [(1, "type", "type2")]

        node.maps = {
            0: brewery.FieldMap(drop=["id2"]),
            1: brewery.FieldMap(drop=["type2"])
        }
        self.initialize_node(node)

        self.assertEqual(5, len(node.output_fields))

        node.run()
        node.finalize()

        self.assertEqual(5, len(self.output.buffer[0]))
        self.assertEqual(input_len, len(self.output.buffer))
Пример #2
0
 def initialize(self):
     self.map = brewery.FieldMap(rename=self.mapped_fields,
                                 drop=self.dropped_fields,
                                 keep=self.kept_fields)
     self._output_fields = self.map.map(self.input.fields)
     self.filter = self.map.row_filter(self.input.fields)
Пример #3
0
    def initialize(self):
        pass
        # Check joins and normalize them first
        self._keys = {}
        self._kindexes = {}
        
        self.master_input = self.inputs[self.master]
        self.detail_inputs = []
        for (tag, pipe) in enumerate(self.inputs):
            if pipe is not self.master_input:
                self.detail_inputs.append( (tag, pipe) )

        for join in self.joins:
            joinlen = len(join)
            if joinlen == 3:
                (detail_tag, master_key, detail_key) = join
            elif joinlen == 2:
                # We use same key names for detail as master if no detail key is specified
                (detail_tag, master_key) = join
                detail_key = master_key
            else:
                raise Exception("Join specification should be a tuple/list of two or three elements.")

            # Convert to tuple if it is just a string (as expected later)
            if not (type(detail_key) == list or type(detail_key) == tuple):
                detail_key = (detail_key, )
            if not (type(master_key) == list or type(master_key) == tuple):
                master_key = (master_key, )

            if detail_tag == self.master:
                raise Exception("Can not join master to itself.")
            
            self._keys[detail_tag] = (detail_key, master_key)
            
            detail_input = self.inputs[detail_tag]
            
            # Get field indexes
            detail_indexes = detail_input.fields.indexes(detail_key)
            master_indexes = self.master_input.fields.indexes(master_key)
            self._kindexes[detail_tag] = (detail_indexes, master_indexes)

        # Prepare storage for input data
        self._input_rows = {}
        for (tag, pipe) in enumerate(self.inputs):
            self._input_rows[tag] = {}

        # Create map filters
        
        self._filters = {}
        self._maps = {}
        if self.maps:
            for (tag, fmap) in self.maps.items():
                if type(fmap) == dict:
                    fmap = brewery.FieldMap(rename = fmap.get("rename"), drop = fmap.get("drop"), keep=fmap.get("keep"))
                elif type(fmap) != brewery.FieldMap:
                    raise Exception("Unknown field map type: %s" % type(fmap) )
                f = fmap.row_filter(self.inputs[tag].fields)
                self._maps[tag] = fmap
                self._filters[tag] = f

        # Construct output fields
        fields = []
        for (tag, pipe) in enumerate(self.inputs):
            fmap = self._maps.get(tag, None)
            if fmap:
                fields += fmap.map(pipe.fields)
            else:
                fields += pipe.fields

        self._output_fields = brewery.FieldList(fields)