示例#1
0
def payload4(task):
    """
    run merge from N inputs
    input: Makefile.all, *.fasta.{sfx list}, *1.{N}.fastq, *2.{N}.fastq, {N}reads.tgz, {N}maps.tgz
    output: bam file + results.tgz?
    :param task:
    :return:
    """
    logger.debug("payload4: Start")

    #### Prepare
    # Check type of task
    task_type = task.task_type

    # Get user
    user = users_.get(task.owner_id)

    task.tag = "task." + commands.getoutput('uuidgen')
    tasks_.save(task)

    n = 10
    if task.params is not None:
        n = int(task.params)
        if n == 0:
            n = 10

    # Get containers
    input_cont = conts_.get(task.input)
    #TO_DO do smth with output container?
    output_cont = conts_.get(task.output)

    # Get container
    container = Container()
    container.guid = task.tag
    conts_.save(container)

    # Add input files to container
    files_template_list = task_type.ifiles_template.split(',')
    for item in input_cont.files:
        f = item.file
        for file_template in files_template_list:
            # TO_DO: Change file template here
            m = re.match(file_template, f.lfn)
            if m is not None:
                # Register file in container
                fc.reg_file_in_cont(f, container, 'input')

    # reg additional output
    fc.reg_file_in_cont_byname(user, 'output.bam', container, 'output')
    fc.reg_file_in_cont_byname(user, 'myresults.bz2', container, 'output')

    # Prepare trf script
    script = task.task_type.trf_template
    # TO_DO just for test - only emulate, not real jobs
    pipeline_path_name = 'paleomix_bam'
    swdir = '/s/ls2/users/poyda/swp/' + pipeline_path_name + '/'
    script = "/bin/bash " + swdir + "runmerge.sh -t " + str(n)
    send_job_(task, container, script)

    return True
示例#2
0
def get_start_task(p):
    """
    Returns start task object
    :param p: Pipeline obj
    :return: Task obj
    """
    if not isinstance(p, Pipeline):
        raise Exception("Illegal pipeline class: not Pipeline")

    # Create start_task
    start_task_type = task_types_.first(method='start')
    start_task = new_task(start_task_type)
    start_task.owner_id = p.owner_id
    tasks_.save(start_task)

    # Update Pipeline obj
    set_current_task(p, start_task)
    return start_task
示例#3
0
def check_running_tasks():
    """
    Checks PanDA jobs statuses for all running tasks
    :return:
    """
    # Get tasks in running state
    tasks = tasks_.find(status='running')
    for task in tasks:
        # Check if tag defined
        if task.tag is not None and task.tag != "":
            # Check failed Panda jobs
            jobs = jobs_.find(tags=task.tag, status='failed')
            if jobs.count() > 0:
                task.status = 'failed'
                task.modification_time = datetime.utcnow()
                task.comment = "Failed task due to {n} failed jobs".format(
                    n=jobs.count())
                tasks_.save(task)
                return False

            # Check failed Panda jobs
            jobs = jobs_.find(tags=task.tag, status='canceled')
            if jobs.count() > 0:
                task.status = 'cancelled'
                task.modification_time = datetime.utcnow()
                tasks_.save(task)
                return False

            # Check finished Panda jobs
            jobs = jobs_.find(tags=task.tag, status='finished')
            jobs_all = jobs_.find(tags=task.tag)
            if jobs.count() == jobs_all.count():
                # Register files from jobs into task container
                cont = conts_.get(task.input)
                for job in jobs:
                    files_catalog = job.container.files
                    for f in files_catalog:
                        if f.type == 'output':
                            # Register file in container
                            fc.reg_file_in_cont(f.file, cont, 'intermediate')

                # Change task status
                task.status = 'finished'
                task.modification_time = datetime.utcnow()
                tasks_.save(task)
                return True
        else:
            # If tag is not defined
            task.status = 'finished'
            task.modification_time = datetime.utcnow()
            tasks_.save(task)
    return True
示例#4
0
def check_running_tasks():
    """
    Checks PanDA jobs statuses for all running tasks
    :return:
    """
    # Get tasks in running state
    tasks = tasks_.find(status='running')
    for task in tasks:
        # Check if tag defined
        if task.tag is not None and task.tag != "":
            # Check failed Panda jobs
            jobs = jobs_.find(tags=task.tag, status='failed')
            if jobs.count() > 0:
                task.status = 'failed'
                task.modification_time = datetime.utcnow()
                task.comment = "Failed task due to {n} failed jobs".format(n=jobs.count())
                tasks_.save(task)
                return False

            # Check failed Panda jobs
            jobs = jobs_.find(tags=task.tag, status='canceled')
            if jobs.count() > 0:
                task.status = 'cancelled'
                task.modification_time = datetime.utcnow()
                tasks_.save(task)
                return False

            # Check finished Panda jobs
            jobs = jobs_.find(tags=task.tag, status='finished')
            jobs_all = jobs_.find(tags=task.tag)
            if jobs.count() == jobs_all.count():
                # Register files from jobs into task container
                cont = conts_.get(task.input)
                for job in jobs:
                    files_catalog = job.container.files
                    for f in files_catalog:
                        if f.type == 'output':
                            # Register file in container
                            fc.reg_file_in_cont(f.file, cont, 'intermediate')

                # Change task status
                task.status = 'finished'
                task.modification_time = datetime.utcnow()
                tasks_.save(task)
                return True
        else:
            # If tag is not defined
            task.status = 'finished'
            task.modification_time = datetime.utcnow()
            tasks_.save(task)
    return True
示例#5
0
def new_pipeline():
    form = NewPipelineForm(request.form)

    if request.method == 'POST':
        ifiles = request.form.getlist('iguids[]')

        current_user = g.user

        # Prepare pipeline
        pp = Pipeline()
        pp.status = 'running'
        pp.type_id = pipeline_types_.get(1).id
        pp.owner_id = current_user.id
        pipelines_.save(pp)

        # Prepare container
        pp_cont = Container()
        pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen')
        conts_.save(pp_cont)

        # Add guids to container
        for item in ifiles:
            if item != '':
                f = files_.first(guid=item)
                if f is not None:
                        # Register file in catalog
                        fc.reg_file_in_cont(f, pp_cont, 'input')
                else:
                    pp_cont.status = 'broken'
                    conts_.save(pp_cont)
                    return make_response(jsonify({'error': "GUID {} not found".format(f)}))

        # Set current task
        start_task = pclient.get_start_task(pp)
        start_task.input = pp_cont.id
        start_task.output = pp_cont.id
        tasks_.save(start_task)

        return redirect(url_for('pipelines.list_all'))

    return render_template('dashboard/pp/new.html', form=form)
示例#6
0
def get_next_task(p):
    """
    Returns next task object
    :param p: Pipeline obj
    :return: Task obj
    """
    if not isinstance(p, Pipeline):
        raise Exception("Illegal pipeline class: not Pipeline")

    # Fetch current_task
    current_task_id = p.current_task_id
    current_task = tasks_.get(current_task_id)
    if not isinstance(current_task, Task):
        raise Exception("Unable to fetch current_task by id")
    current_task_type = current_task.task_type

    # Fetch pipeline_type
    pipeline_type = p.pipeline_type
    if not isinstance(pipeline_type, PipelineType):
        raise Exception("Illegal pipeline_type class: not PipelineType")

    # Fetch pipeline_catalog item
    pp_c = pipeline_catalog_.first(pipeline_type_id=pipeline_type.id, current_task_type_id=current_task_type.id)
    if not isinstance(pp_c, PipelineCatalog):
        raise Exception("Unable to fetch PipelineCatalog item")

    # Return next_task
    next_task = new_task(pp_c.next_task_type)
    next_task.owner_id = p.owner_id
    next_task.input = current_task.input
    next_task.output = current_task.output
    next_task.params = current_task.params
    tasks_.save(next_task)

    # Update Pipeline obj
    set_current_task(p, next_task)
    return next_task
示例#7
0
def new_pipeline_from_cont():
    form = RunForm(request.form)

    if request.method == 'POST':
        icont = conts_.first(guid=form.guid.data)
        if icont is None:
            raise WebpandaError("Container not found")

        current_user = g.user

        # Prepare pipeline
        pp = Pipeline()
        pp.status = 'running'
        pp.type_id = pipeline_types_.get(1).id
        pp.owner_id = current_user.id
        pipelines_.save(pp)

        # Prepare container
        pp_cont = Container()
        pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen')
        conts_.save(pp_cont)

        # Add guids to container
        for item in icont.files:
            f = item.file
            # Register file in catalog
            fc.reg_file_in_cont(f, pp_cont, 'input')

        # Set current task
        start_task = pclient.get_start_task(pp)
        start_task.input = pp_cont.id
        start_task.output = pp_cont.id
        tasks_.save(start_task)

        return redirect(url_for('pipelines.list_all'))

    return render_template('dashboard/pp/new.html', form=form)
示例#8
0
def run(task):
    try:
        method = task.task_type.method
        if task.status != 'sent':
            return False
        #    raise WebpandaError('Illegal task status to start')

        # Change task state to 'running'
        task.status = 'preparing'
        task.modification_time = datetime.utcnow()
        tasks_.save(task)

        # Custom payload
        if method == 'init_task':
            payload1(task)
        elif method == 'split_task':
            payload2(task)
        elif method == 'run1_task':
            payload3(task)
        elif method == 'merge_task':
            payload4(task)
        else:
            raise WebpandaError("Task payload error: method not found")

        # Change task state to 'finished'
        task.status = 'running'
        task.modification_time = datetime.utcnow()
        tasks_.save(task)
        return True

    except WebpandaError as e:
        # Change task state to 'finished'
        task.status = 'failed'
        task.modification_time = datetime.utcnow()
        task.comment = e.msg
        tasks_.save(task)
        return False
示例#9
0
def payload3(task):
    """
    run1 - N parallel jobs. {N} = sequence 0..01,0..02,...,N, not less than 2 placeholders
    #TODO deal with {N}.fastq.bz2 ??
    input: Makefile.{N}, *.fasta.{sfx list}, *1.{N}.fastq, *2.{N}.fastq
    output: likely reads{N}.tgz, maps{N}.tgz
    :param task:
    :return:
    """
    logger.debug("payload3: Start")

    #### Prepare
    # Check type of task
    task_type = task.task_type
#    if task_type.id != 3or6?:
#        raise WebpandaError("Illegal task_type.id")

    # Get user
    user = users_.get(task.owner_id)

    n=10
    if task.params is not None:
        n=int(task.params)
        if n==0:
            n=10

    task.tag = "task." + commands.getoutput('uuidgen')
    tasks_.save(task)

    # Get containers
    input_cont = conts_.get(task.input)
    #TO_DO do smth with output container?
    output_cont = conts_.get(task.output)

    for jobname in gen_sfx("a",n):
        # Get container
        container = Container()
        container.guid = task.tag + "."+jobname
        conts_.save(container)

        # Add input files to container
        files_template_list = task_type.ifiles_template.split(',')
        for item in input_cont.files:
            f = item.file
            for file_template in files_template_list:
                # TO_DO: Change file template here
                m = re.match(file_template, f.lfn)
                if m is not None:
                    # Register file in container
                    fc.reg_file_in_cont(f, container, 'input')

        # reg additional output
        fc.reg_file_in_cont_byname(user, jobname+'.reads.bz2', container, 'output')
        fc.reg_file_in_cont_byname(user, jobname + '.maps.bz2', container, 'output')

        # Prepare trf script
        script = task.task_type.trf_template
        # TO_DO just for test - only emulate, not real jobs
        pipeline_path_name = 'paleomix_bam'
        swdir = '/s/ls2/users/poyda/swp/' + pipeline_path_name +'/'
        script = "/bin/bash "+swdir+"run11.sh -t " +jobname

        send_job_(task, container, script)

    return True
示例#10
0
def payload2(task):
    """
    split_task
    Split input *.1.fastq and *.2.fastq into 'rn' pieces=
    run panda /bin/bash split.sh
    :param task:
    :return:
    """
    logger.debug("payload2: Start")

    #### Prepare
    # Check type of task
    task_type = task.task_type
    if task_type.id != 1:
        raise WebpandaError("Illegal task_type.id")

    logger.debug("payload2: tasktype " + str(task_type.id))

    # Get user
    user = users_.get(task.owner_id)
    logger.debug("payload2: user " + str(user.id))

    # Get containers
    input_cont = conts_.get(task.input)
    #TODO do smth with output container?
    output_cont = conts_.get(task.output)

    task.tag = "task." + commands.getoutput('uuidgen')
    tasks_.save(task)
    logger.debug("payload2: tag " + task.tag)

    # Get container
    container = Container()
    container.guid = task.tag + ".0"
    conts_.save(container)
    logger.debug("payload2: cont " + container.guid)

    script_add = ""

    rn = 0
    # Add input files to container
    files_template_list = task_type.ifiles_template.split(',')
    for item in input_cont.files:
        f = item.file
        if rn == 0:
            if f.lfn.endswith('fastq'):
                rn = getn(f.fsize)
            elif f.lfn.endswith('fastq.bz2'):
                rn = getn2(f.fsize)
        for file_template in files_template_list:
            # TODO: Change file template here
            m = re.match(file_template, f.lfn)
            if m is not None:
                # Register file in container
                fc.reg_file_in_cont(f, container, 'input')
                if f.lfn.endswith('.fastq'):
                    for fi in gen_sfx(f.lfn[:-5]+'a', rn, '.fastq'):
                        fc.reg_file_in_cont_byname(user, fi, container, 'output')
                if f.lfn.endswith('.fastq.bz2'):
                    for fi in gen_sfx(f.lfn[:-9]+'a', rn, '.fastq'):
                        fc.reg_file_in_cont_byname(user, fi, container, 'output')
                if f.lfn.endswith('.fasta'):
                    fn=f.lfn+'.'
                    fc.reg_file_in_cont_byname(user, fn[:-6]+'dict', container, 'output')
                    # itert: validated file has null size
                    #for sfx in ('amb','ann','bwt','fai','pac','sa','validated'):
                    for sfx in ('amb','ann','bwt','fai','pac','sa', 'validated'):
                        fc.reg_file_in_cont_byname(user, fn+sfx, container, 'output')

                    script_add += "; echo 123 > ../{fname}".format(fname=fn+"validated")

    logger.debug("payload2: reg Makefile")
    #reg additional output
    for fi in gen_sfx('Makefile.a', rn, '.yaml'):
        fc.reg_file_in_cont_byname(user, fi, container, 'output')

    #guids = ["web.it_4b7d4757-9ba4-4ed7-8bc0-6edb8bcc68d2",
    #         "web.it_3bc78e60-241b-418a-a631-2461d4ba1977",
    #         "web.it_1b88049e-463b-4b4f-8454-9587301a53e5",
    #         "web.it_a02271ea-8a9b-42f3-add2-ed6d0f9ff07e",
    #         "web.it_61bb7c80-e53c-4641-88b0-fbd16b0f3d56",
    #         "web.it_3930f596-25ea-49b0-8943-7a83c84c7940",
    #         "web.it_aa7b77a3-c765-464e-a4fa-29ce6dd50346",
    #         "web.it_211f2187-41f2-489f-ba63-73f004f21c66"
    #         ]
    #for guid in guids:
    #    fc.reg_file_in_cont(files_.first(guid=guid), container, 'input')

    # Prepare trf script
    script = task.task_type.trf_template
    # TO_DO just for test add "1" - script1.sh- only emulate, not real jobs
    pipeline_path_name = 'paleomix_bam'
    swdir='/s/ls2/users/poyda/swp/' + pipeline_path_name +'/'
    script = "/bin/bash " + swdir + "genref.sh && /bin/bash " + swdir + "runtmplgen.sh -t 1>bam.out 2>bam.err & ;"
    script += "/bin/bash " + swdir + "split.sh -t " + str(rn)
    script += script_add

    # Save rn as task param
    task.params = str(rn)
    tasks_.save(task)

    logger.debug("payload2: script " + script)
    logger.debug("payload2: send_job " + container.guid)
    send_job_(task, container, script)

    return True
示例#11
0
def run():
    """
    Starts current defined task
    :return:
    """
    print "main started"

    # Fetch pipelines (init state)
    #TODO add SQL filter on status if possible
    pipelines = pipelines_.all()
    for pipeline in pipelines:
        # Check if finished
        if pipeline.status in ['finished', 'failed', 'cancelled']:
            continue

        # Fetch task object
        current_task = pclient.get_current_task(pipeline)

        if current_task is None:
            return WebpandaError('Illegal task ID')

        if current_task.status == 'failed':
            #TODO: What to do if failed?
            pipeline.status = 'failed'
            current_task.modification_time = datetime.utcnow()
            pipelines_.save(pipeline)
            continue

        if current_task.status == 'cancelled':
            #TODO: What to do if cancelled? Who or by whom? If by system - resubmit, if by user -nothing?
            pipeline.status = 'cancelled'
            current_task.modification_time = datetime.utcnow()
            pipelines_.save(pipeline)
            continue

        if current_task.status == 'finished':
            # Get next_task
            current_task = pclient.get_next_task(pipeline)

        if current_task.status == 'defined':
            if current_task.task_type.method == 'start':

                # Do some general pipeline checks

                current_task.status = 'finished'
                current_task.modification_time = datetime.utcnow()
                tasks_.save(current_task)
                continue
            elif current_task.task_type.method == 'finish':
                current_task.status = 'finished'
                current_task.modification_time = datetime.utcnow()
                tasks_.save(current_task)

                # Process system finish task
                pipeline.status = 'finished'
                pipeline.modification_time = datetime.utcnow()
                pipelines_.save(pipeline)
                continue
            else:
                # Process system start task
                # Do some checks if it usefull - or we have all files already, or there would be never enough of them.
                if not paleomix.has_input(current_task):
                    current_task.status = "failed"
                    current_task.modification_time = datetime.utcnow()
                    current_task.comment = "Input files check failed"
                    tasks_.save(current_task)

                    pipeline.status = 'failed'
                    pipeline.modification_time = datetime.utcnow()
                    pipelines_.save(pipeline)
                    continue

                # Run task if defined
                current_task.status = 'sent'
                tasks_.save(current_task)

                #TO_DO: Run async regime
                paleomix.run(current_task) #, current_task.task_type.method) - we already get task from id. Not need to obtain again, is it?
                # if we use async run, all params must be serializable (BaseQuery is not)
                continue
示例#12
0
def run():
    """
    Starts current defined task
    :return:
    """
    print "main started"

    # Fetch pipelines (init state)
    #TODO add SQL filter on status if possible
    pipelines = pipelines_.all()
    for pipeline in pipelines:
        # Check if finished
        if pipeline.status in ['finished', 'failed', 'cancelled']:
            continue

        # Fetch task object
        current_task = pclient.get_current_task(pipeline)

        if current_task is None:
            return WebpandaError('Illegal task ID')

        if current_task.status == 'failed':
            #TODO: What to do if failed?
            pipeline.status = 'failed'
            current_task.modification_time = datetime.utcnow()
            pipelines_.save(pipeline)
            continue

        if current_task.status == 'cancelled':
            #TODO: What to do if cancelled? Who or by whom? If by system - resubmit, if by user -nothing?
            pipeline.status = 'cancelled'
            current_task.modification_time = datetime.utcnow()
            pipelines_.save(pipeline)
            continue

        if current_task.status == 'finished':
            # Get next_task
            current_task = pclient.get_next_task(pipeline)

        if current_task.status == 'defined':
            if current_task.task_type.method == 'start':

                # Do some general pipeline checks

                current_task.status = 'finished'
                current_task.modification_time = datetime.utcnow()
                tasks_.save(current_task)
                continue
            elif current_task.task_type.method == 'finish':
                current_task.status = 'finished'
                current_task.modification_time = datetime.utcnow()
                tasks_.save(current_task)

                # Process system finish task
                pipeline.status = 'finished'
                pipeline.modification_time = datetime.utcnow()
                pipelines_.save(pipeline)
                continue
            else:
                # Process system start task
                # Do some checks if it usefull - or we have all files already, or there would be never enough of them.
                if not paleomix.has_input(current_task):
                    current_task.status = "failed"
                    current_task.modification_time = datetime.utcnow()
                    current_task.comment = "Input files check failed"
                    tasks_.save(current_task)

                    pipeline.status = 'failed'
                    pipeline.modification_time = datetime.utcnow()
                    pipelines_.save(pipeline)
                    continue

                # Run task if defined
                current_task.status = 'sent'
                tasks_.save(current_task)

                #TO_DO: Run async regime
                paleomix.run(
                    current_task
                )  #, current_task.task_type.method) - we already get task from id. Not need to obtain again, is it?
                # if we use async run, all params must be serializable (BaseQuery is not)
                continue