def createNewAtomicPomset(self, name=None, executableObject=None, commandBuilderType=None, executeEnvironmentType=None, *args, **kwds): newAtomicPomset = DefinitionModule.AtomicDefinition(*args, **kwds) if name is None: name = 'pomset %s' % uuid.uuid4().hex[:3] newAtomicPomset.name(name) newAtomicPomset.functionToExecute( DefinitionModule.executeTaskInEnvironment) newAtomicPomset.executable(executableObject) # create the parameter orderings parameterOrderings = DefinitionModule.createParameterOrderingTable() newAtomicPomset.parameterOrderingTable(parameterOrderings) if commandBuilderType is None: commandBuilderType = 'shell process' newAtomicPomset.commandBuilderType(commandBuilderType) if executeEnvironmentType is None: executeEnvironmentType = 'shell process' newAtomicPomset.executeEnvironmentType(executeEnvironmentType) newPomsetContext = ContextModule.wrapPomsetInContext(newAtomicPomset) return newPomsetContext
def createHadoopWordcountDefinition(): parameterOrdering = DefinitionModule.createParameterOrderingTable() row = parameterOrdering.addRow() row.setColumn('source', 'input file') row.setColumn('target', 'output file') # TODO: # need to be able to customize this for each host executable = HadoopModule.JarExecutable() executable.stageable(False) executable.path([HadoopModule.getExecutablePath()]) executable.jarFile([getExamplesJar()]) executable.jarClass(['wordcount']) definition = DefinitionModule.createShellProcessDefinition( inputParameters = { 'input file':{ ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True, ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True, }, 'output file':{ ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True, ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True, } }, parameterOrderings = parameterOrdering, executable = executable ) return definition
def createWordCountDefinition(dir=None): parameterOrdering = DefinitionModule.createParameterOrderingTable() row = parameterOrdering.addRow() row.setColumn('source', 'input file') row.setColumn('target', 'output file') if dir is None: dir = os.getcwd().split(os.path.sep) + ['resources', 'testdata', 'TestExecute'] command = dir + ['wordcount.py'] executable = TaskCommandModule.Executable() executable.stageable(True) executable.path(command) executable.staticArgs([]) definition = DefinitionModule.createShellProcessDefinition( inputParameters = { 'input file':{ ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True, ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True, }, 'output file':{ ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True, ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True, } }, parameterOrderings = parameterOrdering, executable = executable ) definition.name('wordcount mapper') definition.id(ID_WORDCOUNT) definition.isLibraryDefinition(True) return definition
def createHadoopStreamingDefinition(): parameterOrdering = DefinitionModule.createParameterOrderingTable() row = parameterOrdering.addRow() row.setColumn('source', 'input file') row.setColumn('target', 'output file') row = parameterOrdering.addRow() row.setColumn('source', 'output file') row.setColumn('target', 'mapper') row = parameterOrdering.addRow() row.setColumn('source', 'mapper') row.setColumn('target', 'reducer') # TODO: # need to be able to customize this for each host executable = HadoopModule.JarExecutable() executable.stageable(False) executable.path([HadoopModule.getExecutablePath()]) executable.jarFile([getStreamingJar()]) executable.jarClass([]) definition = DefinitionModule.createShellProcessDefinition( inputParameters = { 'input file':{ ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True, ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True, ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{ ParameterModule.COMMANDLINE_PREFIX_FLAG:['-input'] }, }, 'output file':{ ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True, ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True, ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{ ParameterModule.COMMANDLINE_PREFIX_FLAG:['-output'] }, }, 'mapper':{ ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True, ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True, ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{ ParameterModule.COMMANDLINE_PREFIX_FLAG:['-mapper'] }, }, 'reducer':{ ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True, ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True, ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{ ParameterModule.COMMANDLINE_PREFIX_FLAG:['-reducer'] }, }, }, parameterOrderings = parameterOrdering, executable = executable ) return definition
def createHadoopPipesDefinition(): parameterOrdering = DefinitionModule.createParameterOrderingTable() row = parameterOrdering.addRow() row.setColumn('source', 'input file') row.setColumn('target', 'output file') # TODO: # need to be able to customize this for each host command = ['pipesProgram'] executable = HadoopModule.PipesExecutable() executable.stageable(False) executable.path([HadoopModule.getExecutablePath()]) executable.pipesFile(command) definition = DefinitionModule.createShellProcessDefinition( inputParameters = { 'input file':{ ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True, ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True, ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{ ParameterModule.COMMANDLINE_PREFIX_FLAG:['-input'] }, }, 'output file':{ ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True, ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True, ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{ ParameterModule.COMMANDLINE_PREFIX_FLAG:['-output'] }, }, }, parameterOrderings = parameterOrdering, executable = executable ) return definition