def steps(self): """Re-define this to make a multi-step job. If you don't re-define this, we'll automatically create a one-step job using any of :py:meth:`mapper`, :py:meth:`mapper_init`, :py:meth:`mapper_final`, :py:meth:`reducer_init`, :py:meth:`reducer_final`, and :py:meth:`reducer` that you've re-defined. For example:: def steps(self): return [MRStep(mapper=self.transform_input, reducer=self.consolidate_1), MRStep(reducer_init=self.log_mapper_init, reducer=self.consolidate_2)] :return: a list of steps constructed with :py:class:`~mrjob.step.MRStep` or other classes in :py:mod:`mrjob.step`. """ # only include methods that have been redefined kwargs = dict( (func_name, getattr(self, func_name)) for func_name in _JOB_STEP_FUNC_PARAMS + ('spark',) if (_im_func(getattr(self, func_name)) is not _im_func(getattr(MRJob, func_name)))) # special case for spark() # TODO: support jobconf as well if 'spark' in kwargs: if sorted(kwargs) != ['spark']: raise ValueError( "Can't mix spark() and streaming functions") return [SparkStep( spark=kwargs['spark'], spark_args=self.spark_args())] # MRStep takes commands as strings, but the user defines them in the # class as functions that return strings, so call the functions. updates = {} for k, v in kwargs.items(): if k.endswith('_cmd') or k.endswith('_pre_filter'): updates[k] = v() kwargs.update(updates) if kwargs: return [MRStep(**kwargs)] else: return []