Пример #1
0
    def _tagToDictAndCache(self, aspect, tag):
        if tag.taskName:
            try:
                t = yield tasks_tx.loadTask(tag.taskName)
                state = t.state
            except:
                state = None
        else:
            state = None

        d = {}
        d.update({"files": tag.files, "metadata": tag.metadata})

        d.update(
            {
                "tag_name": tag.tagName,
                "file_count": len(tag.files),
                "pipelines": [],
                "task_name": tag.taskName,
                "state": state,
                "phantom": config.configToDict(tag.phantom) if tag.phantom else None,
            }
        )

        yield self.cache.save(d)
        self.changed(aspect, d)
Пример #2
0
def pipelineToDict(p):
    return dict(
        name=p.name,
        taskName=p.taskName,
        pid=p.pid,
        ptype=p.ptypeStr(),
        config=[kv for kv in config.configToDict(p.config).iteritems()])
Пример #3
0
def tagToDict(t):
    return {
        "tag_name": t.tagName,
        "files": t.files,
        "metadata": dict(t.metadata),
        "phantom": config.configToDict(t.phantom) if t.phantom else None,
        "task_name": t.taskName,
    }
Пример #4
0
def tagToDict(t):
    return {
        'tag_name': t.tagName,
        'files': t.files,
        'metadata': dict(t.metadata),
        'phantom': config.configToDict(t.phantom) if t.phantom else None,
        'task_name': t.taskName
    }
Пример #5
0
def handleCredentialConfig(request):
    conf = config.configToDict(request.credential.credInstance.conf)
    conf = func.updateDict(conf,
                           {'general.ctype': request.credential.credential.getCType()})
    
    queue.returnQueueSuccess(request.mq, request.body['return_queue'], conf)
    
    return defer_pipe.ret(request)
Пример #6
0
def pipelineToDict(p):
    return dict(
        name=p.name,
        taskName=p.taskName,
        pid=p.pid,
        ptype=p.ptypeStr(),
        config=[kv for kv in config.configToDict(p.config).iteritems()],
    )
Пример #7
0
def handleCredentialConfig(request):
    conf = config.configToDict(request.credential.credInstance.conf)
    conf = func.updateDict(
        conf, {'general.ctype': request.credential.credential.getCType()})

    queue.returnQueueSuccess(request.mq, request.body['return_queue'], conf)

    return defer_pipe.ret(request)
Пример #8
0
def loadTagFile(fname):
    """
    Loads a tagfile, returns a config object of attributes

    Also considering a .phantom type which would represent files that don't really exist.  I think this makes sense
    as you should be able to tarnsfer .phantom files around but .metadata's should be generated when you make a tag

    Will explain more abou this in a wiki page somewhere...
    """
    ##
    # Phantom filse are in a format that configFromStream can read.  This is because phantom files
    # are expected to be written and modified by humans.  .metadata files on the other hand
    # are just expected to be the produce of a machine storing information so uses json
    if os.path.exists(fname + '.phantom'):
        ##
        # Put everythin under phantom
        # We want to do it lazily too since we will be adding
        # data it can access later
        phantom = configFromMap(
            {
                'phantom_tag':
                True,
                'phantom':
                configToDict(
                    configFromStream(open(fname + '.phantom'), lazy=True))
            },
            lazy=True)
    else:
        phantom = configFromMap({})

    ##
    # If the fname actually exists, open its meta data + files
    # if the fname does not exist but the phantom does, return the phantom
    # otherwise, throw an exception about missing the tagfile
    if os.path.exists(fname):
        if os.path.exists(fname + '.metadata'):
            metadata = configFromMap(
                {'metadata': json.loads(open(fname + '.metadata').read())},
                phantom,
                lazy=True)
        else:
            metadata = configFromMap({}, phantom)

        return configFromMap(
            {'files': [f.strip() for f in open(fname) if f.strip()]},
            metadata,
            lazy=True)
    elif not os.path.exists(fname) and os.path.exists(fname + '.phantom'):
        if os.path.exists(fname + '.metadata'):
            metadata = configFromMap(
                {'metadata': json.loads(open(fname + '.metadata').read())},
                phantom,
                lazy=True)
            return metadata
        else:
            return phantom
    else:
        raise MissingTagFileError(fname)
Пример #9
0
def pipelineSSToDict(pss):
    return dict(name=pss.name,
                taskName=pss.taskName,
                pid=pss.pid,
                ptype=pss.ptypeStr(),
                config=config.configToDict(pss.config),
                complete=pss.complete,
                total=pss.total,
                state=pss.state)
Пример #10
0
def _extractInnerPipelineConfig(batchConfig):
    batchDict = config.configToDict(batchConfig)
    innerPipelineConfigDict = dict([(k.split('.', 1)[1], v)
                                    for k, v in batchDict.iteritems()
                                    if k.startswith('batch_pipeline.')])

    for k in ['pipeline.PIPELINE_WRAPPER_NAME', 'pipeline.PIPELINE_NAME']:
        innerPipelineConfigDict[k] = batchDict[k]
        
    return innerPipelineConfigDict
Пример #11
0
def _extractInnerPipelineConfig(batchConfig):
    batchDict = config.configToDict(batchConfig)
    innerPipelineConfigDict = dict([(k.split('.', 1)[1], v)
                                    for k, v in batchDict.iteritems()
                                    if k.startswith('batch_pipeline.')])

    for k in ['pipeline.PIPELINE_WRAPPER_NAME', 'pipeline.PIPELINE_NAME']:
        innerPipelineConfigDict[k] = batchDict[k]

    return innerPipelineConfigDict
Пример #12
0
def pipelineToDict(p):
    return {'pipeline_id': p.pipelineId,
            'pipeline_name': p.pipelineName,
            'user_name': p.userName,
            'protocol': p.protocol,
            'checksum': p.checksum,
            'task_name': p.taskName,
            'queue': p.queue,
            'children': p.children,
            'config': config_.configToDict(p.config)}
Пример #13
0
def saveCredential(credName, description, ctype, cert, pkey, metadata, conf):
    return credentials.saveCredential('localhost',
                                      'local',
                                      credName,
                                      description,
                                      ctype,
                                      cert,
                                      pkey,
                                      metadata,
                                      config.configToDict(conf))
Пример #14
0
def _documentFromPipeline(p):
    jsonConf = json.dumps(config_.configToDict(p.config))
    return {'pipeline_id': p.pipelineId,
            'pipeline_name': p.pipelineName,
            'user_name': p.userName,
            'protocol': p.protocol,
            'checksum': p.checksum,
            'task_name': p.taskName,
            'queue': p.queue,
            'children': p.children,
            'config': jsonConf}
Пример #15
0
 def clusterToDocument(self, cluster):
     return dict(user_name=cluster.userName,
                 cluster_name=cluster.clusterName,
                 state=cluster.state,
                 cred_name=cluster.credName,
                 config=json.dumps(config.configToDict(cluster.config)),
                 start_task=cluster.startTask,
                 add_instance_tasks=cluster.addInstanceTasks,
                 exec_nodes=cluster.execNodes,
                 data_nodes=cluster.dataNodes,
                 master=cluster.master)
Пример #16
0
def pipelineSSToDict(pss):
    return dict(
        name=pss.name,
        taskName=pss.taskName,
        pid=pss.pid,
        ptype=pss.ptypeStr(),
        config=config.configToDict(pss.config),
        complete=pss.complete,
        total=pss.total,
        state=pss.state,
    )
Пример #17
0
def createExecDataFile(conf, master, masterMachineConf):
    """
    Creates a exec data file as the perl start_cluster works

    This is very similar to createMasterDataFile, should be refactored a bit
    """
    outName = os.path.join('/tmp', str(time.time()))

    ##
    # Going to load the master machine.conf and modify node type
    masterConf = config.configFromStream(open(masterMachineConf), lazy=True)
    masterConf = config.configFromMap({'NODE_TYPE': EXEC_NODE},
                                      masterConf,
                                      lazy=True)

    fout = open(outName, 'w')
    fout.write('\n'.join([
        k + '=' + str(v)
        for k, v in config.configToDict(masterConf).iteritems()
    ]))
    fout.close()

    template = open(conf('cluster.exec_user_data_tmpl')).read()
    clusterPrivateKey = open(conf('cluster.cluster_private_key')).read()

    outf = []
    runSingleProgramEx('ssh-keygen -y -f ' +
                       conf('cluster.cluster_private_key'),
                       outf.append,
                       None,
                       log=True)

    if conf('general.ctype') == 'ec2':
        template = template.replace('<TMPL_VAR NAME=MASTER_DNS>',
                                    master['private_dns'])
    else:
        template = template.replace('<TMPL_VAR NAME=MASTER_DNS>',
                                    master['public_dns'])

    clusterPublicKey = ''.join(outf)

    template = template.replace('<TMPL_VAR NAME=CLUSTER_PRIVATE_KEY>',
                                clusterPrivateKey)
    template = template.replace('<TMPL_VAR NAME=CLUSTER_PUBLIC_KEY>',
                                clusterPublicKey)
    template = template.replace('<TMPL_VAR NAME=MACHINE_CONF>',
                                open(outName).read().replace('${', '\\${'))

    os.remove(outName)

    outf = os.path.join(conf('general.secure_tmp'), 'exec_user_data.sh')
    open(outf, 'w').write(template)

    return outf
Пример #18
0
    def pipelineToDict(self, pipeline):
        protocolConf = protocol_format.load(self.machineConf, pipeline.config("pipeline.PIPELINE_TEMPLATE"))

        inputTagsList = [
            pipeline.config(k).split(",")
            for k, v in protocolConf
            if v.get("type").split()[0] in ["dataset", "blastdb_dataset", "paired_dataset", "singleton_dataset"]
            and pipeline.config(k)
        ]
        inputTags = []
        for i in inputTagsList:
            inputTags.extend(i)

        possibleOutputTags = set(
            [
                pipeline.pipelineName + "_" + t.strip()
                for t in pipeline.config("output.TAGS_TO_DOWNLOAD", default="").split(",")
            ]
        )

        query = [{"tag_name": t} for t in possibleOutputTags]

        tags = yield www_tags.loadTagsBy("localhost", "local", pipeline.userName, {"$or": query}, False)

        tags = set([t["tag_name"] for t in tags])

        outputTags = list(tags & possibleOutputTags)

        pipelineTask = yield tasks_tx.loadTask(pipeline.taskName)

        pipelineWrapper = pipeline_misc.determineWrapper(
            self.machineConf, pipeline.config("pipeline.PIPELINE_TEMPLATE")
        )

        pipelineDict = {
            "pipeline_id": pipeline.pipelineId,
            "pipeline_name": pipeline.pipelineName,
            "user_name": pipeline.userName,
            "wrapper": pipeline.protocol == pipelineWrapper,
            "protocol": pipeline.config("pipeline.PIPELINE_TEMPLATE"),
            "checksum": pipeline.checksum,
            "task_name": pipeline.taskName,
            "queue": pipeline.queue,
            "children": pipeline.children,
            "state": pipelineTask.state,
            "num_steps": pipelineTask.numTasks,
            "num_complete": pipelineTask.completedTasks,
            "input_tags": inputTags,
            "output_tags": outputTags,
            "pipeline_desc": pipeline.config("pipeline.PIPELINE_DESC", default=""),
            "config": config.configToDict(pipeline.config, lazy=True),
        }

        defer.returnValue(pipelineDict)
Пример #19
0
 def clusterToDocument(self, cluster):
     return dict(user_name=cluster.userName,
                 cluster_name=cluster.clusterName,
                 state=cluster.state,
                 cred_name=cluster.credName,
                 config=json.dumps(config.configToDict(cluster.config)),
                 start_task=cluster.startTask,
                 add_instance_tasks=cluster.addInstanceTasks,
                 exec_nodes=cluster.execNodes,
                 data_nodes=cluster.dataNodes,
                 master=cluster.master)
Пример #20
0
def pipelineToDict(p):
    return {
        'pipeline_id': p.pipelineId,
        'pipeline_name': p.pipelineName,
        'user_name': p.userName,
        'protocol': p.protocol,
        'checksum': p.checksum,
        'task_name': p.taskName,
        'queue': p.queue,
        'children': p.children,
        'config': config_.configToDict(p.config)
    }
Пример #21
0
def credentialToDict(cred):
    """
    The main difference here is the ctype is turned into a string representation
    of the class/module name
    """
    return dict(name=cred.name,
                desc=cred.desc,
                ctype=cred.getCType(),
                cert=cred.cert,
                pkey=cred.pkey,
                active=cred.active,
                metadata=cred.metadata,
                conf=config.configToDict(cred.conf))
Пример #22
0
def _documentFromPipeline(p):
    jsonConf = json.dumps(config_.configToDict(p.config))
    return {
        'pipeline_id': p.pipelineId,
        'pipeline_name': p.pipelineName,
        'user_name': p.userName,
        'protocol': p.protocol,
        'checksum': p.checksum,
        'task_name': p.taskName,
        'queue': p.queue,
        'children': p.children,
        'config': jsonConf
    }
Пример #23
0
def runPipeline(host, cluster, parentName, bareRun, conf, queue=None, overwrite=False):
    return performQuery(
        host,
        RUN_URL,
        dict(
            cluster=cluster,
            config=config.configToDict(conf),
            parent_pipeline=parentName,
            queue=queue,
            bare_run=bareRun,
            overwrite=overwrite,
        ),
    )
Пример #24
0
def runPipelineConfig(taskName, name, pipeline, conf, queue=None):
    """
    Takes a config object representing a pipeline options, validates those options
    in pipeline.OPTIONS and passes the results onto runPipelineWithConfig
    """
    ##
    # Mocheezmo way to have it load a conf file.  This will be removed in the future
    tmpConfigName = os.path.join("/tmp", str(time.time()) + ".config")
    options = list(pipeline.OPTIONS)
    options.append(("conf", "", "--conf", "Conf file (DO NOT SPECIFY, FOR INTERNAL USE)", const("/tmp/machine.conf")))
    options.append(
        (
            "CONFIG_FILE",
            "-c",
            "--CONFIG_FILE",
            "Config file for the pipeline.  Specify this if you do not want to specify options on the comamnd line",
            const(tmpConfigName),
        )
    )

    ##
    # Load up machine.conf and apply it to our current config
    conf = config.configFromConfig(
        conf, config.configFromStream(open("/tmp/machine.conf"), config.configFromEnv()), lazy=True
    )
    vals = {}
    for o in options:
        vals[o[0]] = cli.applyOption(conf(o[0], default=None), o, conf)

    conf = config.configFromMap(vals, conf)

    ##
    # For some ergatis trickery we then need to output this config to a temp file so ergatis can pull variables from it
    confDict = config.configToDict(conf)
    confVals = {}
    cv = [(".".join(k.split(".")[:-1]), k.split(".")[-1], v) for k, v in confDict.iteritems()]
    for s, k, v in cv:
        confVals.setdefault(s, {})[k] = v

    fout = open(tmpConfigName, "w")
    for s, d in confVals.iteritems():
        if s not in ["", "env"]:
            fout.write("[" + s + "]\n")
            for k, v in d.iteritems():
                fout.write("%s=%s\n" % (k, str(v)))

    fout.close()

    return runPipelineWithConfig(taskName, name, pipeline, conf, queue)
Пример #25
0
    def pipelineToDict(self, pipeline):
        protocolConf = protocol_format.load(self.machineConf, pipeline.config('pipeline.PIPELINE_TEMPLATE'))

        inputTagsList = [pipeline.config(k).split(',')
                         for k, v in protocolConf
                         if v.get('type').split()[0] in ['dataset',
                                                         'blastdb_dataset',
                                                         'paired_dataset',
                                                         'singleton_dataset'] and pipeline.config(k)]
        inputTags = []
        for i in inputTagsList:
            inputTags.extend(i)


        possibleOutputTags = set([pipeline.pipelineName + '_' + t.strip()
                                  for t in pipeline.config('output.TAGS_TO_DOWNLOAD', default='').split(',')])

        query = [{'tag_name': t} for t in possibleOutputTags]

        tags = yield www_tags.loadTagsBy('localhost', 'local', pipeline.userName, {'$or': query}, False)

        tags = set([t['tag_name'] for t in tags])

        outputTags = list(tags & possibleOutputTags)

        pipelineTask = yield tasks_tx.loadTask(pipeline.taskName)

        pipelineWrapper = pipeline_misc.determineWrapper(self.machineConf,
                                                         pipeline.config('pipeline.PIPELINE_TEMPLATE'))

        pipelineDict = {'pipeline_id': pipeline.pipelineId,
                        'pipeline_name': pipeline.pipelineName,
                        'user_name': pipeline.userName,
                        'wrapper': pipeline.protocol == pipelineWrapper,
                        'protocol': pipeline.config('pipeline.PIPELINE_TEMPLATE'),
                        'checksum': pipeline.checksum,
                        'task_name': pipeline.taskName,
                        'queue': pipeline.queue,
                        'children': pipeline.children,
                        'state': pipelineTask.state,
                        'num_steps': pipelineTask.numTasks,
                        'num_complete': pipelineTask.completedTasks,
                        'input_tags': inputTags,
                        'output_tags': outputTags,
                        'pipeline_desc': pipeline.config('pipeline.PIPELINE_DESC', default=''),
                        'config': config.configToDict(pipeline.config, lazy=True),
                        }

        defer.returnValue(pipelineDict)
Пример #26
0
def runPipelineConfig(taskName, name, pipeline, conf, queue=None):
    """
    Takes a config object representing a pipeline options, validates those options
    in pipeline.OPTIONS and passes the results onto runPipelineWithConfig
    """
    ##
    # Mocheezmo way to have it load a conf file.  This will be removed in the future
    tmpConfigName = os.path.join('/tmp', str(time.time()) + '.config')
    options = list(pipeline.OPTIONS)
    options.append(
        ('conf', '', '--conf', 'Conf file (DO NOT SPECIFY, FOR INTERNAL USE)',
         const('/tmp/machine.conf')))
    options.append((
        'CONFIG_FILE', '-c', '--CONFIG_FILE',
        'Config file for the pipeline.  Specify this if you do not want to specify options on the comamnd line',
        const(tmpConfigName)))

    ##
    # Load up machine.conf and apply it to our current config
    conf = config.configFromConfig(conf,
                                   config.configFromStream(
                                       open('/tmp/machine.conf'),
                                       config.configFromEnv()),
                                   lazy=True)
    vals = {}
    for o in options:
        vals[o[0]] = cli.applyOption(conf(o[0], default=None), o, conf)

    conf = config.configFromMap(vals, conf)

    ##
    # For some ergatis trickery we then need to output this config to a temp file so ergatis can pull variables from it
    confDict = config.configToDict(conf)
    confVals = {}
    cv = [('.'.join(k.split('.')[:-1]), k.split('.')[-1], v)
          for k, v in confDict.iteritems()]
    for s, k, v in cv:
        confVals.setdefault(s, {})[k] = v

    fout = open(tmpConfigName, 'w')
    for s, d in confVals.iteritems():
        if s not in ['', 'env']:
            fout.write('[' + s + ']\n')
            for k, v in d.iteritems():
                fout.write('%s=%s\n' % (k, str(v)))

    fout.close()

    return runPipelineWithConfig(taskName, name, pipeline, conf, queue)
Пример #27
0
def runPipeline(host,
                cluster,
                parentName,
                bareRun,
                conf,
                queue=None,
                overwrite=False):
    return performQuery(
        host, RUN_URL,
        dict(cluster=cluster,
             config=config.configToDict(conf),
             parent_pipeline=parentName,
             queue=queue,
             bare_run=bareRun,
             overwrite=overwrite))
Пример #28
0
def createExecDataFile(conf, master, masterMachineConf):
    """
    Creates a exec data file as the perl start_cluster works

    This is very similar to createMasterDataFile, should be refactored a bit
    """
    outName = os.path.join('/tmp', str(time.time()))

    ##
    # Going to load the master machine.conf and modify node type
    masterConf = config.configFromStream(open(masterMachineConf), lazy=True)
    masterConf = config.configFromMap({'NODE_TYPE': EXEC_NODE}, masterConf, lazy=True)

    fout = open(outName, 'w')
    fout.write('\n'.join([k + '=' + str(v) for k, v in config.configToDict(masterConf).iteritems()]))
    fout.close()

    
    template = open(conf('cluster.exec_user_data_tmpl')).read()
    clusterPrivateKey = open(conf('cluster.cluster_private_key')).read()
    
    outf = []
    runSingleProgramEx('ssh-keygen -y -f ' + conf('cluster.cluster_private_key'),
                       outf.append,
                       None,
                       log=True)

    if conf('general.ctype') == 'ec2':
        template = template.replace('<TMPL_VAR NAME=MASTER_DNS>', master['private_dns'])
    else:
        template = template.replace('<TMPL_VAR NAME=MASTER_DNS>', master['public_dns'])
    
    clusterPublicKey = ''.join(outf)

    template = template.replace('<TMPL_VAR NAME=CLUSTER_PRIVATE_KEY>', clusterPrivateKey)
    template = template.replace('<TMPL_VAR NAME=CLUSTER_PUBLIC_KEY>', clusterPublicKey)
    template = template.replace('<TMPL_VAR NAME=MACHINE_CONF>', open(outName).read().replace('${', '\\${'))

    os.remove(outName)
    
    outf = os.path.join(conf('general.secure_tmp'), 'exec_user_data.sh')
    open(outf, 'w').write(template)
    

    return outf
Пример #29
0
def loadTagFile(fname):
    """
    Loads a tagfile, returns a config object of attributes

    Also considering a .phantom type which would represent files that don't really exist.  I think this makes sense
    as you should be able to tarnsfer .phantom files around but .metadata's should be generated when you make a tag

    Will explain more abou this in a wiki page somewhere...
    """
    ##
    # Phantom filse are in a format that configFromStream can read.  This is because phantom files
    # are expected to be written and modified by humans.  .metadata files on the other hand
    # are just expected to be the produce of a machine storing information so uses json
    if os.path.exists(fname + '.phantom'):
        ##
        # Put everythin under phantom
        # We want to do it lazily too since we will be adding
        # data it can access later
        phantom = configFromMap({'phantom_tag': True, 'phantom': configToDict(configFromStream(open(fname + '.phantom'), lazy=True))}, lazy=True)
    else:
        phantom = configFromMap({})

    ##
    # If the fname actually exists, open its meta data + files
    # if the fname does not exist but the phantom does, return the phantom
    # otherwise, throw an exception about missing the tagfile
    if os.path.exists(fname):
        if os.path.exists(fname + '.metadata'):
            metadata = configFromMap({'metadata': json.loads(open(fname + '.metadata').read())}, phantom, lazy=True)
        else:
            metadata = configFromMap({}, phantom)

        return configFromMap({'files': [f.strip() for f in open(fname) if f.strip()]}, metadata, lazy=True)
    elif not os.path.exists(fname) and os.path.exists(fname + '.phantom'):
	if os.path.exists(fname + '.metadata'):
		metadata = configFromMap({'metadata': json.loads(open(fname + '.metadata').read())}, phantom, lazy=True)
		return metadata
	else :
		return phantom
    else:
        raise MissingTagFileError(fname)
Пример #30
0
    def _tagToDictAndCache(self, aspect, tag):
        if tag.taskName:
            try:
                t = yield tasks_tx.loadTask(tag.taskName)
                state = t.state
            except:
                state = None
        else:
           state = None

        d = {}
        d.update({'files': tag.files,
                  'metadata': tag.metadata})

        d.update({'tag_name': tag.tagName,
                  'file_count': len(tag.files),
                  'pipelines': [],
                  'task_name': tag.taskName,
                  'state': state,
                  'phantom': config.configToDict(tag.phantom) if tag.phantom else None})

        yield self.cache.save(d)
        self.changed(aspect, d)
Пример #31
0
def validatePipelineConfig(host, cluster, bareRun, conf):
    return performQuery(
        host, VALIDATE_URL,
        dict(cluster=cluster,
             bare_run=bareRun,
             config=config.configToDict(conf)))
Пример #32
0
def saveCredential(credName, description, ctype, cert, pkey, metadata, conf):
    return credentials.saveCredential('localhost', 'local', credName,
                                      description, ctype, cert, pkey, metadata,
                                      config.configToDict(conf))
Пример #33
0
def validatePipelineConfig(host, cluster, bareRun, conf):
    return performQuery(host, VALIDATE_URL, dict(cluster=cluster, bare_run=bareRun, config=config.configToDict(conf)))