def _send_sinks(self): for sink in self._sinks: identifier = sink[_Fields.IDENTIFIER] collect = self._collector.collect collect(identifier) collect(sink[_Fields.PARENT][_Fields.ID]) for case in Switch(identifier): if case(_Identifier.SINK_CSV): collect(sink[_Fields.PATH]) collect(sink[_Fields.DELIMITER_FIELD]) collect(sink[_Fields.DELIMITER_LINE]) collect(sink[_Fields.WRITE_MODE]) break if case(_Identifier.SINK_TEXT): collect(sink[_Fields.PATH]) collect(sink[_Fields.WRITE_MODE]) break if case(_Identifier.SINK_PRINT): collect(sink[_Fields.TO_ERR]) break
def _send_sinks(self): for sink in self._sinks: identifier = sink.identifier collect = self._collector.collect collect(identifier) collect(sink.parent.id) for case in Switch(identifier): if case(_Identifier.SINK_CSV): collect(sink.path) collect(sink.delimiter_field) collect(sink.delimiter_line) collect(sink.write_mode) break if case(_Identifier.SINK_TEXT): collect(sink.path) collect(sink.write_mode) break if case(_Identifier.SINK_PRINT): collect(sink.to_err) break
def _send_sources(self): for source in self._sources: identifier = source.identifier collect = self._collector.collect collect(identifier) collect(source.id) for case in Switch(identifier): if case(_Identifier.SOURCE_CSV): collect(source.path) collect(source.delimiter_field) collect(source.delimiter_line) collect(source.types) break if case(_Identifier.SOURCE_TEXT): collect(source.path) break if case(_Identifier.SOURCE_VALUE): collect(len(source.values)) for value in source.values: collect(value) break
def _send_sources(self): for source in self._sources: identifier = source[_Fields.IDENTIFIER] collect = self._collector.collect collect(identifier) collect(source[_Fields.ID]) for case in Switch(identifier): if case(_Identifier.SOURCE_CSV): collect(source[_Fields.PATH]) collect(source[_Fields.DELIMITER_FIELD]) collect(source[_Fields.DELIMITER_LINE]) collect(source[_Fields.TYPES]) break if case(_Identifier.SOURCE_TEXT): collect(source[_Fields.PATH]) break if case(_Identifier.SOURCE_VALUE): collect(len(source[_Fields.VALUES])) for value in source[_Fields.VALUES]: collect(value) break
def _send_operations(self): collect = self._collector.collect for set in self._sets: identifier = set.identifier collect(set.identifier) collect(set.id) collect(set.parent.id) for case in Switch(identifier): if case(_Identifier.SORT): collect(set.field) collect(set.order) break if case(_Identifier.GROUP): collect(set.keys) break if case(_Identifier.COGROUP): collect(set.other.id) collect(set.key1) collect(set.key2) collect(set.types) collect(set.name) break if case(_Identifier.CROSS, _Identifier.CROSSH, _Identifier.CROSST): collect(set.other.id) collect(set.types) collect(len(set.projections)) for p in set.projections: collect(p[0]) collect(p[1]) collect(set.name) break if case(_Identifier.REDUCE, _Identifier.GROUPREDUCE): collect(set.types) collect(set.combine) collect(set.name) break if case(_Identifier.JOIN, _Identifier.JOINH, _Identifier.JOINT): collect(set.key1) collect(set.key2) collect(set.other.id) collect(set.types) collect(len(set.projections)) for p in set.projections: collect(p[0]) collect(p[1]) collect(set.name) break if case(_Identifier.MAP, _Identifier.MAPPARTITION, _Identifier.FLATMAP, _Identifier.FILTER): collect(set.types) collect(set.name) break if case(_Identifier.UNION): collect(set.other.id) break if case(_Identifier.PROJECTION): collect(set.keys) break if case(): raise KeyError( "Environment._send_child_sets(): Invalid operation identifier: " + str(identifier))
def _send_operations(self): collect = self._collector.collect collectBytes = self._collector.collectBytes for set in self._sets: identifier = set.get(_Fields.IDENTIFIER) collect(set[_Fields.IDENTIFIER]) collect(set[_Fields.ID]) collect(set[_Fields.PARENT][_Fields.ID]) for case in Switch(identifier): if case(_Identifier.SORT): collect(set[_Fields.FIELD]) collect(set[_Fields.ORDER]) break if case(_Identifier.GROUP): collect(set[_Fields.KEYS]) break if case(_Identifier.COGROUP): collect(set[_Fields.OTHER][_Fields.ID]) collect(set[_Fields.KEY1]) collect(set[_Fields.KEY2]) collectBytes(_dump(set[_Fields.OPERATOR])) collect(set[_Fields.META]) collect(set[_Fields.TYPES]) collect(set[_Fields.NAME]) break if case(_Identifier.CROSS, _Identifier.CROSSH, _Identifier.CROSST): collect(set[_Fields.OTHER][_Fields.ID]) if set[_Fields.OPERATOR] is None: collect(set[_Fields.OPERATOR]) else: collectBytes(_dump(set[_Fields.OPERATOR])) collect(set[_Fields.META]) collect(set[_Fields.TYPES]) collect(len(set[_Fields.PROJECTIONS])) for p in set[_Fields.PROJECTIONS]: collect(p[0]) collect(p[1]) collect(set[_Fields.NAME]) break if case(_Identifier.REDUCE, _Identifier.GROUPREDUCE): collectBytes(_dump(set[_Fields.OPERATOR])) collectBytes(_dump(set[_Fields.COMBINEOP])) collect(set[_Fields.META]) collect(set[_Fields.TYPES]) collect(set[_Fields.COMBINE]) collect(set[_Fields.NAME]) break if case(_Identifier.JOIN, _Identifier.JOINH, _Identifier.JOINT): collect(set[_Fields.KEY1]) collect(set[_Fields.KEY2]) collect(set[_Fields.OTHER][_Fields.ID]) if set[_Fields.OPERATOR] is None: collect(set[_Fields.OPERATOR]) else: collectBytes(_dump(set[_Fields.OPERATOR])) collect(set[_Fields.META]) collect(set[_Fields.TYPES]) collect(len(set[_Fields.PROJECTIONS])) for p in set[_Fields.PROJECTIONS]: collect(p[0]) collect(p[1]) collect(set[_Fields.NAME]) break if case(_Identifier.MAP, _Identifier.MAPPARTITION, _Identifier.FLATMAP, _Identifier.FILTER): collectBytes(_dump(set[_Fields.OPERATOR])) collect(set[_Fields.META]) collect(set[_Fields.TYPES]) collect(set[_Fields.NAME]) break if case(_Identifier.UNION): collect(set[_Fields.OTHER][_Fields.ID]) break if case(_Identifier.PROJECTION): collect(set[_Fields.KEYS]) break if case(): raise KeyError( "Environment._send_child_sets(): Invalid operation identifier: " + str(identifier))
def _send_operations(self): collect = self._collector.collect for set in self._sets: identifier = set.identifier collect(set.identifier) collect(set.id) collect(set.parent.id) for case in Switch(identifier): if case(_Identifier.REBALANCE): break if case(_Identifier.DISTINCT, _Identifier.PARTITION_HASH): collect(set.keys) break if case(_Identifier.FIRST): collect(set.count) break if case(_Identifier.SORT): collect(set.field) collect(set.order) break if case(_Identifier.GROUP): collect(set.keys) break if case(_Identifier.COGROUP): collect(set.other.id) collect(set.key1) collect(set.key2) collect(set.types) collect(set.name) break if case(_Identifier.CROSS, _Identifier.CROSSH, _Identifier.CROSST): collect(set.other.id) collect(set.uses_udf) collect(set.types) collect(set.name) break if case(_Identifier.REDUCE, _Identifier.GROUPREDUCE): collect(set.types) collect(set.name) break if case(_Identifier.JOIN, _Identifier.JOINH, _Identifier.JOINT): collect(set.key1) collect(set.key2) collect(set.other.id) collect(set.uses_udf) collect(set.types) collect(set.name) break if case(_Identifier.MAP, _Identifier.MAPPARTITION, _Identifier.FLATMAP, _Identifier.FILTER): collect(set.types) collect(set.name) break if case(_Identifier.UNION): collect(set.other.id) break if case(_Identifier.PROJECTION): collect(set.keys) break if case(): raise KeyError( "Environment._send_child_sets(): Invalid operation identifier: " + str(identifier))