def addDivisionToValue(graph, superpipeline, task, nodeId, instructions, baseValues, optionArgument, optionValue, randomString): # If there are no base values, no values can be constructed. Return an empty list and when values are checked, # gkno will terminate. if not baseValues: return [] # Get tool information for this task. tool = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, task, 'tool') toolData = superpipeline.getToolData(tool) # Get the argument with ehich the filename construction is associated. argument = toolData.getLongFormArgument(instructions['use argument']) # Determine the allowed extensions for the input argument as well as whether this is a stub. inputExtensions = toolData.getArgumentAttribute(argument, 'extensions') isStub = toolData.getArgumentAttribute(argument, 'isStub') # Determine the extension to place on the filename. outputExtensions = gr.pipelineGraph.CM_getArgumentAttribute(graph, task, nodeId, 'extensions') # Determine if this is an intermediate file. isIntermediate = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, nodeId, 'isIntermediate') # FIXME HANDLE STUBS if isStub: print('NOT HANDLED STUBS FOR DIVISIONS: constructFilenames.constructDivisions'); exit(1) # Loop over all the values associated with the division and add the argument and value to the filename. updatedValues = [] for value in baseValues: # If the file being used to construct the output filename already has a path, strip this off. updatedValue = value.rsplit('/')[-1] # Get the extension on this value and remove it. extension = getExtension(updatedValue, inputExtensions) if extension == False: print('ERROR WITH EXTENSION - constructFilenames'); exit(1) if extension: updatedValue = updatedValue.replace('.' + str(extension), '') updatedValue = str(updatedValue + '_' + optionArgument.strip('-') + optionValue) # Check if there are instructions from the pipeline configuration file to add an extra text field # to the filename. addText = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, nodeId, 'addTextToFilename') if addText: updatedValue += str('_' + addText) # If the file is an intermediate file, add a string of random text (already supplied). if isIntermediate: updatedValue += str('_' + superpipeline.randomString) # Add the updated value to the list of updated values. updatedValues.append(furnishExtension(instructions, updatedValue, extension, outputExtensions)) # Return the values. return updatedValues
def constructKnownFilename(graph, superpipeline, instructions, task, nodeId, argument, isTerminate): tool = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, task, 'tool') toolData = superpipeline.getToolData(tool) # Get the filename to use. value = instructions['filename'] # Determine the number of subphases for this graph. If there are multiple subphases, is is possible # that any arguments from the tool whose values are used in the construction can have as many values # as there are subphases. subphases = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, task, 'subphases') # Loop over the number of subphases. if 'modify text' in instructions: values = [modifyText(graph, task, argument, toolData, instructions, counter, value, isTerminate) for counter in range(0, subphases)] else: values = [value] * subphases # Check if the 'directory argument' field is set. This will determine if the filename should be # prepended with a path defined by a tool argument. pathNodeId = None pathArgument = None updatedValues = [] if 'path argument' in instructions: pathArgument = instructions['path argument'] pathNodeId = gr.pipelineGraph.CM_getNodeForInputArgument(graph, task, pathArgument) pathNodeValues = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, pathNodeId, 'values') # If there is a single argument for the path node, use this value for all values. if len(pathNodeValues) == 1: for value in values: updatedValues.append(str(pathNodeValues[0] + '/' + value)) # If there are multiple path node values, but only a single argument value, the result is a new value for each # subphase. elif len(values) == 1: for value in pathValues: updatedValues.append(str(values[0] + '/' + value)) # If there are the same number of values as path node values, each path node values applies to a specific subphase. elif len(values) == len(pathNodeValues): for counter, value in enumerate(values): updatedValues.append(str(pathNodeValues[counter] + '/' + value)) # Any other combination of values is invalid. #TODO ERROR else: print('constructFilenames.constructKnownFilename - PATH VALUES.'); exit(1) else: updatedValues = values # Set the values. gr.pipelineGraph.CM_setGraphNodeAttribute(graph, nodeId, 'values', updatedValues) # Return the constructed values. return updatedValues
def addIndexToSingleBaseValue(graph, superpipeline, task, argument, values, subphases): # Get the configuration data for this tool and get the long form version of the argument to # use for building the filenames. tool = superpipeline.tasks[task] toolData = superpipeline.getToolData(tool) # Determine the allowed extensions for the input argument as well as whether it is a stub. extensions = toolData.getArgumentAttribute(argument, 'extensions') # Get the extension on this file (this method is for an array of values of length 1). extension = getExtension(values[0], extensions) # Extract the extension. value = values[0].replace('.' + extension, '') # Generate a set of values of length len(subphases). updatedValues = [] for i in range(1, subphases + 1): updatedValues.append(value + '_' + str(i) + '.' + extension) # Return the modified values. return updatedValues
def checkRequiredArguments(graph, superpipeline, args, isTerminate): # Define error handling, errors = er.consistencyErrors() isSuccess = True # Keep track of nodes that can have their values constructed. constructableNodes = [] # Loop over the defined pipeline arguments and check that all arguments listed as required have # been set. for argument in args.arguments: # If the pipeline specifies if this argument is required or not, update the properties of the nodes. # If not specified (i.e. isRequired == None), it is left to the tool configuration to determine if # the argument is required. This allows the pipeline configuration file to not only specify that an # argument is required even if the underlying tool doesn't require the value, the pipeline can also # override the tools claim that the argument is required. if args.arguments[argument].isRequired == False: for nodeId in args.arguments[argument].graphNodeIds: graph.setGraphNodeAttribute(nodeId, 'isRequired', False) if args.arguments[argument].isRequired: # Loop over the associated graph nodes and see if the values are set. for nodeId in args.arguments[argument].graphNodeIds: graph.setGraphNodeAttribute(nodeId, 'isRequired', False) # Check if this argument was imported from a task in the pipeline. If so, determine if there are # any instructions for constructing the filename (if not an option). Only terminate if the argument # is for an option, or there are no instructions. hasInstructions = False if args.arguments[argument].isImported: task = args.arguments[argument].importedFromTask tool = graph.getGraphNodeAttribute(task, 'tool') toolData = superpipeline.getToolData(tool) hasInstructions = True if toolData.getArgumentAttribute(argument, 'constructionInstructions') else False # If the values haven't been set, terminate. This is a pipeline argument listed as required # and so must be set by the user (and not constructed). if not graph.getGraphNodeAttribute(nodeId, 'values') and not hasInstructions: isSuccess = False shortFormArgument = args.arguments[argument].shortFormArgument description = args.arguments[argument].description if isTerminate: errors.unsetRequiredArgument(args.arguments[argument].longFormArgument, shortFormArgument, description) # Loop over all tasks in the workflow for task in graph.workflow: # Get the tool for the task. tool = superpipeline.tasks[task] toolData = superpipeline.getToolData(tool) # Loop over all of the arguments for the tool and check that all required arguments have a node # and that the node has values. for argument in toolData.arguments: # Check if the argument is required. if toolData.getArgumentAttribute(argument, 'isRequired'): # Record if a node for this node is seen. foundNode = False # Determine if the argument is for an input file, output file or an option. isInput = toolData.getArgumentAttribute(argument, 'isInput') isOutput = toolData.getArgumentAttribute(argument, 'isOutput') # If this is an output file with construction instructions, the filenames will be constructed # later, so this does not need to be checked. Keep track of the nodes which will be constructed # as these could be inputs to other tasks and so the check for existence is also not required # for these input files. # Start with input files and options. if not isOutput: # Loop over all input nodes looking for edges that use this argument. for nodeId in graph.CM_getInputNodes(graph.graph, task): edgeArgument = graph.getArgumentAttribute(nodeId, task, 'longFormArgument') # If this node uses the required argument. if edgeArgument == argument: foundNode = True # If this node has already been marked as not required (i.e. the tools requirement has been superceded # by instructions in the pipeline configuration file). if graph.getGraphNodeAttribute(nodeId, 'isRequired'): hasInstructions = False if graph.getArgumentAttribute(nodeId, task, 'constructionInstructions') == None else True hasValues = True if len(graph.getGraphNodeAttribute(nodeId, 'values')) != 0 else False if not hasValues and not hasInstructions and nodeId not in constructableNodes: isSuccess = False # Check to see if this node can have it's values set with a top level pipeline argument (e.g. can # be set without defining the task on the command line). longFormArgument = args.arguments[graph.getGraphNodeAttribute(nodeId, 'longFormArgument')].longFormArgument if longFormArgument and '.' not in longFormArgument: # Get the short form of the pipeline argument and the argument description. #shortFormArgument = args.arguments[longFormArgument].shortFormArgument shortFormArgument = args.arguments[graph.getGraphNodeAttribute(nodeId, 'longFormArgument')].shortFormArgument description = graph.getGraphNodeAttribute(nodeId, 'description') if isTerminate: errors.unsetRequiredArgument(longFormArgument, shortFormArgument, description) # If this is not a top level argument, provide a different error. # TODO CHECK THIS else: # Get the short form version of the argument as well as the argument description. This is as defined # for the tool, so if this argument can be set using a pipeline argument, these values are incorrect. shortFormArgument = graph.getArgumentAttribute(nodeId, task, 'shortFormArgument') description = graph.getArgumentAttribute(nodeId, task, 'description') if isTerminate: errors.unsetRequiredNestedArgument(task, argument, shortFormArgument, description, superpipeline.pipeline) # If there is no node for this argument, this means that the pipeline configuration file does not contain # a unique or shared node for this argument. In addition, the value has not been provided on the command # line. This means that no values will get assigned to this argument, so terminate. if not foundNode: instructions = toolData.getArgumentAttribute(argument, 'constructionInstructions') if not instructions: isSuccess = False # Check if arguments were imported for this task. If so, check to see if this argument is therefore # available on the command line. if task == superpipeline.pipelineConfigurationData[superpipeline.pipeline].importArgumentsFromTool: if isTerminate: errors.unsetRequiredArgument(argument, args.arguments[argument].shortFormArgument, args.arguments[argument].description) else: if isTerminate: errors.noInputNode(task, tool, argument) # If there are instructions, but no node, construct the node. else: if instructions['method'] == 'from tool argument': argumentToUse = instructions['use argument'] # Find all nodes for this task using this argument. for predecessorNodeId in graph.graph.predecessors(task): if graph.getArgumentAttribute(predecessorNodeId, task, 'longFormArgument') == argumentToUse: nodeAddress = str(predecessorNodeId + '.' + argument) # Add the node and edge. argumentAttributes = toolData.getArgumentData(argument) graph.addFileNode(nodeAddress, nodeAddress) graph.addEdge(nodeAddress, task, argumentAttributes) # Attach the name of the node from which this filename is constructed to the node. graph.setGraphNodeAttribute(nodeAddress, 'constructUsingNode', predecessorNodeId) # If there are instructions, but the construction method does not use another argument, create a node. else: nodeAddress = str(task + '.' + argument) # Add the node and edge. argumentAttributes = toolData.getArgumentData(argument) graph.addFileNode(nodeAddress, nodeAddress) graph.addEdge(nodeAddress, task, argumentAttributes) # Now consider output files. else: instructions = toolData.getArgumentAttribute(argument, 'constructionInstructions') # Loop over all output nodes looking for edges that use this argument. for nodeId in graph.CM_getOutputNodes(graph.graph, task): edgeArgument = graph.getArgumentAttribute(task, nodeId, 'longFormArgument') # If this node uses the required argument. if edgeArgument == argument: foundNode = True # If construction instructions are provided. if instructions: # If the construction is to proceed by using an argument from this task, ensure that that # argument is either set, or is itelf a successor to another task and so has the chance # of being set. if instructions['method'] == 'from tool argument': longFormArgument = toolData.getLongFormArgument(instructions['use argument']) foundNode = False for predecessorNodeId in graph.graph.predecessors(task): edgeArgument = graph.getArgumentAttribute(predecessorNodeId, task, 'longFormArgument') if edgeArgument == longFormArgument: foundNode = True constructionNodeId = predecessorNodeId # If the node being used to construct the file does not exist, then it cannot be used to # construct the filename and so some data must be missing. if not foundNode: isSuccess = False if isTerminate: errors.noNodeForConstruction(task, tool, argument, longFormArgument) # If the node used to construct this filename exists, but it has no values or predecessors, # it also will not be able to be used to construct the argument. #elif not graph.getGraphNodeAttribute(constructionNodeId, 'values'): #if not graph.graph.predecessors(constructionNodeId): # TODO ERROR #print('dataConsistency - checkRequiredArguments - cannot construct output', task, argument); exit(1) # Add the node to the list of nodes that have the potential to be constructed. if nodeId not in constructableNodes: constructableNodes.append(nodeId) # If no instructions are provided check that there are values supplied. if not instructions and not graph.getGraphNodeAttribute(nodeId, 'values'): isSuccess = False if isTerminate: errors.noConstructionMethod(task, tool, argument) # If no node exists for this argument, determine the course of action. if not foundNode: # If there are no instructions for constructing the filename, terminate. if not instructions: print('dataConsistency.checkRequiredArguments - no output node', task, argument); exit(1) # If there are instructions, but no node, construct the node. nodeAddress = str(task + '.' + argument) argumentAttributes = toolData.getArgumentData(argument) # Determine if this node is a stub. If so, this is an output that is not shared with any other tasks, so # construct as many nodes as required. if argumentAttributes.isStub: #graph.constructOutputStubs() for i, stubExtension in enumerate(argumentAttributes.stubExtensions): modifiedNodeAddress = str(nodeAddress + '.' + stubExtension) stubAttributes = deepcopy(argumentAttributes) stubAttributes.stubExtension = stubExtension stubAttributes.isPrimaryStubNode = True if i == 0 else False graph.addFileNode(modifiedNodeAddress, modifiedNodeAddress) graph.addEdge(task, modifiedNodeAddress, stubAttributes) # If this is not a stub, add the node and edge. else: graph.addFileNode(nodeAddress, nodeAddress) graph.addEdge(task, nodeAddress, argumentAttributes) # Return if the operation was a success. return isSuccess
def constructFromFilename(graph, superpipeline, instructions, task, nodeId, argument, baseValues): # If there are no base values, no values can be constructed. Return an empty list and when values are checked, # gkno will terminate. if not baseValues: return [] # Define a list of updated values. updatedValues = [] # Get the input file from which the filenames should be built. try: inputArgument = instructions['use argument'] except: print('constructFilenames.constructFromFilename - no \'use argument\' field'); exit(1) # Get the configuration data for this tool and get the long form version of the argument to # use for building the filenames. tool = superpipeline.tasks[task] toolData = superpipeline.getToolData(tool) longFormArgument = toolData.getLongFormArgument(inputArgument) # Determine the allowed extensions for the input argument as well as whether it is a stub. extensions = toolData.getArgumentAttribute(longFormArgument, 'extensions') isInputAStub = toolData.getArgumentAttribute(longFormArgument, 'isStub') # Determine if this output file is a stub. isOutputAStub = toolData.getArgumentAttribute(argument, 'isStub') # Get the node corresponding to the input argument. inputNodeId = gr.pipelineGraph.CM_getNodeForInputArgument(graph, task, inputArgument) # Get the associated stub extension, if one exists. stubExtension = gr.pipelineGraph.CM_getArgumentAttribute(graph, task, nodeId, 'stubExtension') # Determine if this is an intermediate file. isIntermediate = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, nodeId, 'isIntermediate') # Now loop over each of the values and modify them accoriding to the provided instructions. for counter, value in enumerate(baseValues): # If the file being used to construct the output filename already has a path, strip this off. updatedValue = value.rsplit('/')[-1] # Determine the extension on the input, then create a working version of the new name with the # extension removed. This is only necessary if the input file is not a stub. If it is, it will # have no extension, so this becomes unnecessary. extension = getExtension(updatedValue, extensions) if extension: updatedValue = updatedValue.replace('.' + str(extension), '') # If there are instructions on text to add, add it. if 'modify text' in instructions: updatedValue = modifyText(graph, task, argument, toolData, instructions, counter, updatedValue, isTerminate = True) # Check if there are instructions from the pipeline configuration file to add an extra text field # to the filename. addText = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, nodeId, 'addTextToFilename') if addText: updatedValue += str('_' + addText) # Determine the extension to place on the filename. If the file is a stub, attach the stub extension associated with this node. if isOutputAStub: newExtensions = [stubExtension] else: newExtensions = gr.pipelineGraph.CM_getArgumentAttribute(graph, task, nodeId, 'extensions') updatedValue = furnishExtension(instructions, updatedValue, extension, newExtensions) # Add the updated value to the modifiedValues list. updatedValues.append(updatedValue) # Return the values. return updatedValues