Пример #1
0
def payload4(task):
    """
    run merge from N inputs
    input: Makefile.all, *.fasta.{sfx list}, *1.{N}.fastq, *2.{N}.fastq, {N}reads.tgz, {N}maps.tgz
    output: bam file + results.tgz?
    :param task:
    :return:
    """
    logger.debug("payload4: Start")

    #### Prepare
    # Check type of task
    task_type = task.task_type

    # Get user
    user = users_.get(task.owner_id)

    task.tag = "task." + commands.getoutput('uuidgen')
    tasks_.save(task)

    n = 10
    if task.params is not None:
        n = int(task.params)
        if n == 0:
            n = 10

    # Get containers
    input_cont = conts_.get(task.input)
    #TO_DO do smth with output container?
    output_cont = conts_.get(task.output)

    # Get container
    container = Container()
    container.guid = task.tag
    conts_.save(container)

    # Add input files to container
    files_template_list = task_type.ifiles_template.split(',')
    for item in input_cont.files:
        f = item.file
        for file_template in files_template_list:
            # TO_DO: Change file template here
            m = re.match(file_template, f.lfn)
            if m is not None:
                # Register file in container
                fc.reg_file_in_cont(f, container, 'input')

    # reg additional output
    fc.reg_file_in_cont_byname(user, 'output.bam', container, 'output')
    fc.reg_file_in_cont_byname(user, 'myresults.bz2', container, 'output')

    # Prepare trf script
    script = task.task_type.trf_template
    # TO_DO just for test - only emulate, not real jobs
    pipeline_path_name = 'paleomix_bam'
    swdir = '/s/ls2/users/poyda/swp/' + pipeline_path_name + '/'
    script = "/bin/bash " + swdir + "runmerge.sh -t " + str(n)
    send_job_(task, container, script)

    return True
Пример #2
0
def check_running_tasks():
    """
    Checks PanDA jobs statuses for all running tasks
    :return:
    """
    # Get tasks in running state
    tasks = tasks_.find(status='running')
    for task in tasks:
        # Check if tag defined
        if task.tag is not None and task.tag != "":
            # Check failed Panda jobs
            jobs = jobs_.find(tags=task.tag, status='failed')
            if jobs.count() > 0:
                task.status = 'failed'
                task.modification_time = datetime.utcnow()
                task.comment = "Failed task due to {n} failed jobs".format(
                    n=jobs.count())
                tasks_.save(task)
                return False

            # Check failed Panda jobs
            jobs = jobs_.find(tags=task.tag, status='canceled')
            if jobs.count() > 0:
                task.status = 'cancelled'
                task.modification_time = datetime.utcnow()
                tasks_.save(task)
                return False

            # Check finished Panda jobs
            jobs = jobs_.find(tags=task.tag, status='finished')
            jobs_all = jobs_.find(tags=task.tag)
            if jobs.count() == jobs_all.count():
                # Register files from jobs into task container
                cont = conts_.get(task.input)
                for job in jobs:
                    files_catalog = job.container.files
                    for f in files_catalog:
                        if f.type == 'output':
                            # Register file in container
                            fc.reg_file_in_cont(f.file, cont, 'intermediate')

                # Change task status
                task.status = 'finished'
                task.modification_time = datetime.utcnow()
                tasks_.save(task)
                return True
        else:
            # If tag is not defined
            task.status = 'finished'
            task.modification_time = datetime.utcnow()
            tasks_.save(task)
    return True
Пример #3
0
def check_running_tasks():
    """
    Checks PanDA jobs statuses for all running tasks
    :return:
    """
    # Get tasks in running state
    tasks = tasks_.find(status='running')
    for task in tasks:
        # Check if tag defined
        if task.tag is not None and task.tag != "":
            # Check failed Panda jobs
            jobs = jobs_.find(tags=task.tag, status='failed')
            if jobs.count() > 0:
                task.status = 'failed'
                task.modification_time = datetime.utcnow()
                task.comment = "Failed task due to {n} failed jobs".format(n=jobs.count())
                tasks_.save(task)
                return False

            # Check failed Panda jobs
            jobs = jobs_.find(tags=task.tag, status='canceled')
            if jobs.count() > 0:
                task.status = 'cancelled'
                task.modification_time = datetime.utcnow()
                tasks_.save(task)
                return False

            # Check finished Panda jobs
            jobs = jobs_.find(tags=task.tag, status='finished')
            jobs_all = jobs_.find(tags=task.tag)
            if jobs.count() == jobs_all.count():
                # Register files from jobs into task container
                cont = conts_.get(task.input)
                for job in jobs:
                    files_catalog = job.container.files
                    for f in files_catalog:
                        if f.type == 'output':
                            # Register file in container
                            fc.reg_file_in_cont(f.file, cont, 'intermediate')

                # Change task status
                task.status = 'finished'
                task.modification_time = datetime.utcnow()
                tasks_.save(task)
                return True
        else:
            # If tag is not defined
            task.status = 'finished'
            task.modification_time = datetime.utcnow()
            tasks_.save(task)
    return True
Пример #4
0
def new_pipeline():
    form = NewPipelineForm(request.form)

    if request.method == 'POST':
        ifiles = request.form.getlist('iguids[]')

        current_user = g.user

        # Prepare pipeline
        pp = Pipeline()
        pp.status = 'running'
        pp.type_id = pipeline_types_.get(1).id
        pp.owner_id = current_user.id
        pipelines_.save(pp)

        # Prepare container
        pp_cont = Container()
        pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen')
        conts_.save(pp_cont)

        # Add guids to container
        for item in ifiles:
            if item != '':
                f = files_.first(guid=item)
                if f is not None:
                        # Register file in catalog
                        fc.reg_file_in_cont(f, pp_cont, 'input')
                else:
                    pp_cont.status = 'broken'
                    conts_.save(pp_cont)
                    return make_response(jsonify({'error': "GUID {} not found".format(f)}))

        # Set current task
        start_task = pclient.get_start_task(pp)
        start_task.input = pp_cont.id
        start_task.output = pp_cont.id
        tasks_.save(start_task)

        return redirect(url_for('pipelines.list_all'))

    return render_template('dashboard/pp/new.html', form=form)
Пример #5
0
def new_pipeline_from_cont():
    form = RunForm(request.form)

    if request.method == 'POST':
        icont = conts_.first(guid=form.guid.data)
        if icont is None:
            raise WebpandaError("Container not found")

        current_user = g.user

        # Prepare pipeline
        pp = Pipeline()
        pp.status = 'running'
        pp.type_id = pipeline_types_.get(1).id
        pp.owner_id = current_user.id
        pipelines_.save(pp)

        # Prepare container
        pp_cont = Container()
        pp_cont.guid = 'pipeline.' + commands.getoutput('uuidgen')
        conts_.save(pp_cont)

        # Add guids to container
        for item in icont.files:
            f = item.file
            # Register file in catalog
            fc.reg_file_in_cont(f, pp_cont, 'input')

        # Set current task
        start_task = pclient.get_start_task(pp)
        start_task.input = pp_cont.id
        start_task.output = pp_cont.id
        tasks_.save(start_task)

        return redirect(url_for('pipelines.list_all'))

    return render_template('dashboard/pp/new.html', form=form)
Пример #6
0
def payload2(task):
    """
    split_task
    Split input *.1.fastq and *.2.fastq into 'rn' pieces=
    run panda /bin/bash split.sh
    :param task:
    :return:
    """
    logger.debug("payload2: Start")

    #### Prepare
    # Check type of task
    task_type = task.task_type
    if task_type.id != 1:
        raise WebpandaError("Illegal task_type.id")

    logger.debug("payload2: tasktype " + str(task_type.id))

    # Get user
    user = users_.get(task.owner_id)
    logger.debug("payload2: user " + str(user.id))

    # Get containers
    input_cont = conts_.get(task.input)
    #TODO do smth with output container?
    output_cont = conts_.get(task.output)

    task.tag = "task." + commands.getoutput('uuidgen')
    tasks_.save(task)
    logger.debug("payload2: tag " + task.tag)

    # Get container
    container = Container()
    container.guid = task.tag + ".0"
    conts_.save(container)
    logger.debug("payload2: cont " + container.guid)

    script_add = ""

    rn = 0
    # Add input files to container
    files_template_list = task_type.ifiles_template.split(',')
    for item in input_cont.files:
        f = item.file
        if rn == 0:
            if f.lfn.endswith('fastq'):
                rn = getn(f.fsize)
            elif f.lfn.endswith('fastq.bz2'):
                rn = getn2(f.fsize)
        for file_template in files_template_list:
            # TODO: Change file template here
            m = re.match(file_template, f.lfn)
            if m is not None:
                # Register file in container
                fc.reg_file_in_cont(f, container, 'input')
                if f.lfn.endswith('.fastq'):
                    for fi in gen_sfx(f.lfn[:-5]+'a', rn, '.fastq'):
                        fc.reg_file_in_cont_byname(user, fi, container, 'output')
                if f.lfn.endswith('.fastq.bz2'):
                    for fi in gen_sfx(f.lfn[:-9]+'a', rn, '.fastq'):
                        fc.reg_file_in_cont_byname(user, fi, container, 'output')
                if f.lfn.endswith('.fasta'):
                    fn=f.lfn+'.'
                    fc.reg_file_in_cont_byname(user, fn[:-6]+'dict', container, 'output')
                    # itert: validated file has null size
                    #for sfx in ('amb','ann','bwt','fai','pac','sa','validated'):
                    for sfx in ('amb','ann','bwt','fai','pac','sa', 'validated'):
                        fc.reg_file_in_cont_byname(user, fn+sfx, container, 'output')

                    script_add += "; echo 123 > ../{fname}".format(fname=fn+"validated")

    logger.debug("payload2: reg Makefile")
    #reg additional output
    for fi in gen_sfx('Makefile.a', rn, '.yaml'):
        fc.reg_file_in_cont_byname(user, fi, container, 'output')

    #guids = ["web.it_4b7d4757-9ba4-4ed7-8bc0-6edb8bcc68d2",
    #         "web.it_3bc78e60-241b-418a-a631-2461d4ba1977",
    #         "web.it_1b88049e-463b-4b4f-8454-9587301a53e5",
    #         "web.it_a02271ea-8a9b-42f3-add2-ed6d0f9ff07e",
    #         "web.it_61bb7c80-e53c-4641-88b0-fbd16b0f3d56",
    #         "web.it_3930f596-25ea-49b0-8943-7a83c84c7940",
    #         "web.it_aa7b77a3-c765-464e-a4fa-29ce6dd50346",
    #         "web.it_211f2187-41f2-489f-ba63-73f004f21c66"
    #         ]
    #for guid in guids:
    #    fc.reg_file_in_cont(files_.first(guid=guid), container, 'input')

    # Prepare trf script
    script = task.task_type.trf_template
    # TO_DO just for test add "1" - script1.sh- only emulate, not real jobs
    pipeline_path_name = 'paleomix_bam'
    swdir='/s/ls2/users/poyda/swp/' + pipeline_path_name +'/'
    script = "/bin/bash " + swdir + "genref.sh && /bin/bash " + swdir + "runtmplgen.sh -t 1>bam.out 2>bam.err & ;"
    script += "/bin/bash " + swdir + "split.sh -t " + str(rn)
    script += script_add

    # Save rn as task param
    task.params = str(rn)
    tasks_.save(task)

    logger.debug("payload2: script " + script)
    logger.debug("payload2: send_job " + container.guid)
    send_job_(task, container, script)

    return True
Пример #7
0
def send_job(jobid, siteid):
    _logger.debug('Jobid: ' + str(jobid))

    site = sites_.get(siteid)

    job = jobs_.get(int(jobid))
    cont = job.container
    files_catalog = cont.files

    fscope = getScope(job.owner.username)
    datasetName = '{}:{}'.format(fscope, cont.guid)

    distributive = job.distr.name
    release = job.distr.release

    # Prepare runScript
    parameters = job.distr.command
    parameters = parameters.replace("$COMMAND$", job.params)
    parameters = parameters.replace("$USERNAME$", job.owner.username)
    parameters = parameters.replace("$WORKINGGROUP$", job.owner.working_group)

    # Prepare metadata
    metadata = dict(user=job.owner.username)

    # Prepare PanDA Object
    pandajob = JobSpec()
    pandajob.jobDefinitionID = int(time.time()) % 10000
    pandajob.jobName = cont.guid
    pandajob.transformation = client_config.DEFAULT_TRF
    pandajob.destinationDBlock = datasetName
    pandajob.destinationSE = site.se
    pandajob.currentPriority = 1000
    pandajob.prodSourceLabel = 'user'
    pandajob.computingSite = site.ce
    pandajob.cloud = 'RU'
    pandajob.VO = 'atlas'
    pandajob.prodDBlock = "%s:%s" % (fscope, pandajob.jobName)
    pandajob.coreCount = job.corecount
    pandajob.metadata = json.dumps(metadata)
    #pandajob.workingGroup = job.owner.working_group

    if site.encode_commands:
        # It requires script wrapper on cluster side
        pandajob.jobParameters = '%s %s %s "%s"' % (cont.guid, release,
                                                    distributive, parameters)
    else:
        pandajob.jobParameters = parameters

    has_input = False
    for fcc in files_catalog:
        if fcc.type == 'input':
            f = fcc.file
            guid = f.guid
            fileIT = FileSpec()
            fileIT.lfn = f.lfn
            fileIT.dataset = pandajob.prodDBlock
            fileIT.prodDBlock = pandajob.prodDBlock
            fileIT.type = 'input'
            fileIT.scope = fscope
            fileIT.status = 'ready'
            fileIT.GUID = guid
            pandajob.addFile(fileIT)

            has_input = True
        if fcc.type == 'output':
            f = fcc.file
            fileOT = FileSpec()
            fileOT.lfn = f.lfn
            fileOT.destinationDBlock = pandajob.prodDBlock
            fileOT.destinationSE = pandajob.destinationSE
            fileOT.dataset = pandajob.prodDBlock
            fileOT.type = 'output'
            fileOT.scope = fscope
            fileOT.GUID = f.guid
            pandajob.addFile(fileOT)

            # Save replica meta
            fc.new_replica(f, site)

    if not has_input:
        # Add fake input
        fileIT = FileSpec()
        fileIT.lfn = "fake.input"
        fileIT.dataset = pandajob.prodDBlock
        fileIT.prodDBlock = pandajob.prodDBlock
        fileIT.type = 'input'
        fileIT.scope = fscope
        fileIT.status = 'ready'
        fileIT.GUID = "fake.guid"
        pandajob.addFile(fileIT)

    # Prepare lof file
    fileOL = FileSpec()
    fileOL.lfn = "%s.log.tgz" % pandajob.jobName
    fileOL.destinationDBlock = pandajob.destinationDBlock
    fileOL.destinationSE = pandajob.destinationSE
    fileOL.dataset = '{}:logs'.format(fscope)
    fileOL.type = 'log'
    fileOL.scope = 'panda'
    pandajob.addFile(fileOL)

    # Save log meta
    log = File()
    log.scope = fscope
    log.lfn = fileOL.lfn
    log.guid = getGUID(log.scope, log.lfn)
    log.type = 'log'
    log.status = 'defined'
    files_.save(log)

    # Save replica meta
    fc.new_replica(log, site)

    # Register file in container
    fc.reg_file_in_cont(log, cont, 'log')

    # Submit job
    o = submitJobs([pandajob])
    x = o[0]

    try:
        #update PandaID
        PandaID = int(x[0])
        job.pandaid = PandaID
        job.ce = site.ce
    except:
        job.status = 'submit_error'
    jobs_.save(job)

    return 0
Пример #8
0
def new_job():
    """Creates new job
    """
    g.user = request.oauth.user
    scope = getScope(request.oauth.user.username)

    js = request.json
    data = js['data']

    distr_id = data['sw_id']
    params = data['script']
    corecount = data['cores']

    site = sites_.first(ce=current_app.config['DEFAULT_CE'])
    distr = distrs_.get(id)

    container = Container()
    guid = 'job.' + commands.getoutput('uuidgen')
    container.guid = guid
    container.status = 'open'
    conts_.save(container)

    # Process ftp files
    if 'ftp_dir' in data.keys():
        ftp_dir = data['ftp_dir']
        register_ftp_files(ftp_dir, scope, container.guid)

    # Process guid list
    if 'guids' in data.keys():
        guids = data['guids']
        for f in guids:
            if f != '':
                file_ = files_.first(guid=f)
                if file_ is not None:
                    # Register file in catalog
                    fc.reg_file_in_cont(file_, container, 'input')
                else:
                    raise WebpandaError('File with guid %s not found' % f)

    ofiles = ['results.tgz']

    # Starts cloneReplica tasks
    ftasks = prepareInputFiles(container.id, site.se)

    # Saves output files meta
    for lfn in ofiles:
        file = File()
        file.scope = scope
        file.guid = getGUID(scope, lfn)
        file.lfn = lfn
        file.status = 'defined'
        files_.save(file)

        # Register file in catalog
        fc.reg_file_in_cont(file, container, 'output')

    # Counts files
    allfiles = container.files
    nifiles = 0
    nofiles = 0
    for f in allfiles:
        if f.type == 'input':
            nifiles += 1
        if f.type == 'output':
            nofiles += 1

    # Defines job meta
    job = Job()
    job.pandaid = None
    job.status = 'pending'
    job.owner = request.oauth.user
    job.params = params
    job.distr = distr
    job.container = container
    job.creation_time = datetime.utcnow()
    job.modification_time = datetime.utcnow()
    job.ninputfiles = nifiles
    job.noutputfiles = nofiles
    job.corecount = corecount
    job.tags = data['tags'] if 'tags' in data.keys() else ""
    jobs_.save(job)

    # Async sendjob
    res = chord(ftasks)(async_send_job.s(jobid=job.id, siteid=site.id))
    return {'id': job.id, 'container_id': guid}
Пример #9
0
def upload():
    form = request.form

    # Create a unique container quid for this particular batch of uploads.
    cguid = 'job.' + commands.getoutput('uuidgen')

    # Is the upload using Ajax, or a direct POST by the form?
    is_ajax = False
    if form.get("__ajax", None) == "true":
        is_ajax = True

    # Create new container
    container = Container()
    container.guid = cguid
    container.status = 'open'
    conts_.save(container)

    # Process files in request
    for upload in request.files.getlist("file"):
        # Define file params
        lfn = upload.filename.rsplit("/")[0]
        scope = getScope(g.user.username)
        guid = getGUID(scope, lfn)
        site = sites_.first(se=current_app.config['DEFAULT_SE'])

        # Target folder for these uploads.
        dir = '/' + os.path.join('system', scope, guid)
        target = site.datadir + dir
        try:
            os.makedirs(target)
        except:
            if is_ajax:
                return ajax_response(False, "Couldn't create upload directory: %s" % target)
            else:
                return "Couldn't create upload directory: %s" % target

        replfn = os.path.join(dir, lfn)
        destination = os.path.join(target, lfn)
        upload.save(destination)

        if os.path.isfile(destination):
            # Check file existence in catalog
            adler = adler32(destination)
            md5 = md5sum(destination)
            size = fsize(destination)
            file_id = ddm_checkifexists(lfn, size, adler, md5)

            if file_id:
                # If file exists
                file = files_.get(file_id)
            else:
                # Otherwise create new
                file = File()
                file.scope = scope
                file.guid = guid
                file.type = 'input'
                file.lfn = lfn
                file.token = ''
                file.status = 'defined'
                files_.save(file)
                setFileMeta(file.id, destination)

                replica = Replica()
                replica.se = site.se
                replica.status = 'ready'
                replica.lfn = replfn
                replica.original = file
                replicas_.save(replica)

            # Register file in container
            fc.reg_file_in_cont(file, container, 'input')

        else:
            return ajax_response(False, "Couldn't save file: %s" % target)

    if is_ajax:
        return ajax_response(True, cguid)
    else:
        return redirect(url_for("jobs.jobs"))
Пример #10
0
def file_save(container_guid, lfn):
    """
    POST: /pilot/file/<container_guid>/<lfn>/save

    Saves file from request, returns file guid

    :param container_guid: Guid of container
    :type container_guid: str
    :param lfn: Local FileName
    :type lfn: str
    :return: guid
    :rtype: json
    """
    site = sites_.first(se=current_app.config['DEFAULT_SE'])

    if ':' in container_guid:
        container_guid = container_guid.split(':')[-1]
    container = conts_.first(guid=container_guid)
    if container.status != 'open':
        raise WebpandaError('Unable to upload: Container is not open')
    cc = container.files

    ff = None
    for c in cc:
        f = c.file
        if f.lfn == lfn:
            ff = f
    if not ff:
        ff = File()
        ff.scope = getScope(g.user.username)
        ff.lfn = lfn
        ff.guid = getGUID(ff.scope, ff.lfn)
        ff.status = 'defined'
        files_.save(ff)

        # Register file in container
        fc.reg_file_in_cont(ff, container, 'input')

    path = os.path.join(site.datadir, getScope(g.user.username), container.guid)
    replfn = '/' + os.path.join(getScope(g.user.username), container.guid, ff.lfn)
    destination = os.path.join(path, ff.lfn)

    for r in ff.replicas:
        if r.se == site.se:
            destination = site.datadir + r.lfn
            file_dir = '/'.join(destination.split('/')[:-1])
            if r.status == 'ready':
                if os.path.isfile(destination):  # Check fsize, md5 or adler
                    raise WebpandaError('Replica exists')
                else:
                    r.status = 'broken'
                    replicas_.save(r)
                    raise WebpandaError('Broken replica')
            elif r.status == 'defined':
                try:
                    os.makedirs(file_dir)
                except(Exception):
                    pass
                f = open(destination, 'wb')
                f.write(request.data)
                f.close()

                # Update file info
                setFileMeta(ff.id, destination)

                r.status = 'ready'
                replicas_.save(r)
                return {'guid': ff.guid}
            else:
                raise WebpandaError('Replica status: %s' % r.status)


    replica = Replica()
    if os.path.isfile(destination):
        raise WebpandaError('Unable to upload: File exists')
    try:
        os.makedirs(path)
    except(Exception):
        _logger.debug('Path exists: %s' % path)
    f = open(destination, 'wb')
    f.write(request.data)
    f.close()

    # Update file info
    setFileMeta(ff.id, destination)

    # Create/change replica
    replica.se = site.se
    replica.status = 'ready'
    replica.lfn = replfn
    replica.token = ''
    replica.original = ff
    replicas_.save(replica)
    return {'guid': ff.guid}
Пример #11
0
def upload_dir(user_id, cont_id, se_id, path):
    """
    Uploads files from external dir path into defined contained
    :param cont_id: id of Container
    :param se_id: id of SE
    :param path: dir path on SE
    :return:
    """
    user = users_.get(user_id)
    cont = conts_.get(cont_id)
    se = sites_.get(se_id)

    # Initialize SE connector
    print "=Initialize SE connector"
    _logger.debug("=Initialize SE connector")
    conn_factory = SEFactory()
    connector = conn_factory.getSE(se.plugin, None)

    # Fetch list of files
    print "=Fetch list of files"
    _logger.debug("=Fetch list of files")
    try:
        list_of_lfn = connector.ls(path, rel=False)
        for item in list_of_lfn:
            # Check empty items
            if item == "":
                list_of_lfn.remove(item)
    except:
        raise WebpandaError("Unable to get list of files from SE: " +
                            str(se_id))
    print "=" + str(len(list_of_lfn))
    _logger.debug("=" + str(len(list_of_lfn)))

    # Create list of File objs
    print "=Create list of File objs"
    _logger.debug("=Create list of File objs")
    list_of_obj = list()
    for item in list_of_lfn:
        list_of_obj.append(fc.new_file(user, item))
    print "=" + str(len(list_of_obj))
    _logger.debug("=" + str(len(list_of_obj)))

    # Iterate through files objects
    print "=IterateLoop:Start"
    _logger.debug("=IterateLoop:Start")
    for item in list_of_obj:
        # Add files to container:
        print "=Add file to container"
        _logger.debug("=Add file to container")
        fc.reg_file_in_cont(item, cont, 'intermediate')

        # Copy files into system dir
        print "=Copy file into system dir"
        _logger.debug("=Copy file into system dir")
        connector.link(os.path.join(path, item.lfn),
                       fc.get_file_dir(item),
                       rel=False)

        # Calculate fsize, adler32, md5hash
        print "=Calculate fsize, adler32, md5hash"
        _logger.debug("=Calculate fsize, adler32, md5hash")
        item.fsize = connector.fsize(fc.get_file_path(item))
        item.md5sum = connector.md5sum(fc.get_file_path(item))
        item.checksum = connector.adler32(fc.get_file_path(item))
        fc.save(item)

        # Create list of Replica objs
        print "=Create Replica object"
        _logger.debug("=Create Replica object")
        r = fc.new_replica(item, se)
        r.status = 'ready'
        fc.save(r)

        # Update files' status
        print "=Update files' status"
        _logger.debug("=Update files' status")
        item.status = 'ready'
        fc.save(item)
    print "=IterateLoop:Finish"
    _logger.debug("=IterateLoop:Finish")

    # Return container id
    print "=Return container id"
    _logger.debug("=Return container id")
    return cont_id
Пример #12
0
def upload():
    form = request.form

    # Create a unique container quid for this particular batch of uploads.
    cguid = 'job.' + commands.getoutput('uuidgen')

    # Is the upload using Ajax, or a direct POST by the form?
    is_ajax = False
    if form.get("__ajax", None) == "true":
        is_ajax = True

    # Create new container
    container = Container()
    container.guid = cguid
    container.status = 'open'
    conts_.save(container)

    # Process files in request
    for upload in request.files.getlist("file"):
        # Define file params
        lfn = upload.filename.rsplit("/")[0]
        scope = getScope(g.user.username)
        guid = getGUID(scope, lfn)
        site = sites_.first(se=current_app.config['DEFAULT_SE'])

        # Target folder for these uploads.
        dir = '/' + os.path.join('system', scope, guid)
        target = site.datadir + dir
        try:
            os.makedirs(target)
        except:
            if is_ajax:
                return ajax_response(
                    False, "Couldn't create upload directory: %s" % target)
            else:
                return "Couldn't create upload directory: %s" % target

        replfn = os.path.join(dir, lfn)
        destination = os.path.join(target, lfn)
        upload.save(destination)

        if os.path.isfile(destination):
            # Check file existence in catalog
            adler = adler32(destination)
            md5 = md5sum(destination)
            size = fsize(destination)
            file_id = ddm_checkifexists(lfn, size, adler, md5)

            if file_id:
                # If file exists
                file = files_.get(file_id)
            else:
                # Otherwise create new
                file = File()
                file.scope = scope
                file.guid = guid
                file.type = 'input'
                file.lfn = lfn
                file.token = ''
                file.status = 'defined'
                files_.save(file)
                setFileMeta(file.id, destination)

                replica = Replica()
                replica.se = site.se
                replica.status = 'ready'
                replica.lfn = replfn
                replica.original = file
                replicas_.save(replica)

            # Register file in container
            fc.reg_file_in_cont(file, container, 'input')

        else:
            return ajax_response(False, "Couldn't save file: %s" % target)

    if is_ajax:
        return ajax_response(True, cguid)
    else:
        return redirect(url_for("jobs.jobs"))
Пример #13
0
def upload_dir(user_id, cont_id, se_id, path):
    """
    Uploads files from external dir path into defined contained
    :param cont_id: id of Container
    :param se_id: id of SE
    :param path: dir path on SE
    :return:
    """
    user = users_.get(user_id)
    cont = conts_.get(cont_id)
    se = sites_.get(se_id)

    # Initialize SE connector
    print "=Initialize SE connector"
    _logger.debug("=Initialize SE connector")
    conn_factory = SEFactory()
    connector = conn_factory.getSE(se.plugin, None)

    # Fetch list of files
    print "=Fetch list of files"
    _logger.debug("=Fetch list of files")
    try:
        list_of_lfn = connector.ls(path, rel=False)
        for item in list_of_lfn:
            # Check empty items
            if item == "":
                list_of_lfn.remove(item)
    except:
        raise WebpandaError("Unable to get list of files from SE: " + str(se_id))
    print "=" + str(len(list_of_lfn))
    _logger.debug("=" + str(len(list_of_lfn)))

    # Create list of File objs
    print "=Create list of File objs"
    _logger.debug("=Create list of File objs")
    list_of_obj = list()
    for item in list_of_lfn:
        list_of_obj.append(fc.new_file(user, item))
    print "=" + str(len(list_of_obj))
    _logger.debug("=" + str(len(list_of_obj)))

    # Iterate through files objects
    print "=IterateLoop:Start"
    _logger.debug("=IterateLoop:Start")
    for item in list_of_obj:
        # Add files to container:
        print "=Add file to container"
        _logger.debug("=Add file to container")
        fc.reg_file_in_cont(item, cont, 'intermediate')

        # Copy files into system dir
        print "=Copy file into system dir"
        _logger.debug("=Copy file into system dir")
        connector.link(os.path.join(path, item.lfn), fc.get_file_dir(item), rel=False)

        # Calculate fsize, adler32, md5hash
        print "=Calculate fsize, adler32, md5hash"
        _logger.debug("=Calculate fsize, adler32, md5hash")
        item.fsize = connector.fsize(fc.get_file_path(item))
        item.md5sum = connector.md5sum(fc.get_file_path(item))
        item.checksum = connector.adler32(fc.get_file_path(item))
        fc.save(item)

        # Create list of Replica objs
        print "=Create Replica object"
        _logger.debug("=Create Replica object")
        r = fc.new_replica(item, se)
        r.status = 'ready'
        fc.save(r)

        # Update files' status
        print "=Update files' status"
        _logger.debug("=Update files' status")
        item.status = 'ready'
        fc.save(item)
    print "=IterateLoop:Finish"
    _logger.debug("=IterateLoop:Finish")

    # Return container id
    print "=Return container id"
    _logger.debug("=Return container id")
    return cont_id
Пример #14
0
def registerLocalFile(arg, dirname, names, scope):
    """Register files from local dir to container
    :param arg: Container guid
    :param dirname: Abs dir
    :param names: File name
    :param scope: Scope to upload files in
    :return:
    """
    site = sites_.first(se=client_config.DEFAULT_SE)
    _logger.debug(str(arg))
    cont = conts_.first(guid=arg)
    files = cont.files

    for name in names:
        fpath = os.path.join(dirname, name)

        fobj = None
        # Check in container
        for file in files:
            if file.lfn == name:
                fobj = file

        # Check in catalog
        if not fobj:
            destination = os.path.join(dirname, name)
            adler = adler32(destination)
            md5 = md5sum(destination)
            size = fsize(destination)
            file_id = ddm_checkifexists(name, size, adler, md5)

            if file_id:
                # If file exists
                fobj = files_.get(file_id)

        if not fobj:
            fobj = File()
            fobj.scope = scope
            fobj.lfn = name
            fobj.guid = getGUID(fobj.scope, fobj.lfn)
            fobj.type = 'input'
            fobj.status = 'defined'
            files_.save(fobj)
            setFileMeta(fobj.id, fpath)

        # Register file in catalog
        fc.reg_file_in_cont(fobj, cont, "input")

        replicas = fobj.replicas
        replica = None
        for r in replicas:
            if r.se == site.se and r.status == 'ready':
                replica = r
        if not replica:
            ldir = '/' + os.path.join('system', fobj.scope, fobj.guid)
            ddm_localmakedirs(ldir)
            ddm_localcp(fpath[len(site.datadir):], ldir)

            replica = Replica()
            replica.se = site.se
            replica.status = 'ready'
            replica.token = ''
            replica.lfn = os.path.join(ldir, fobj.lfn)
            replica.original = fobj
            replicas_.save(replica)
Пример #15
0
def job():
    """
    New job form view
    :return: Response obj
    """
    form = NewJobForm(request.form)
    if request.method == 'POST':
        site = sites_.get(int(form.site.data))
        distr_name, distr_release = form.distr.data.split(':')
        distr = distrs_.first(name=distr_name, release=int(distr_release))

        container_guid = form.container.data
        try:
            container = conts_.first(guid=container_guid)
        except(Exception):
            _logger.error(Exception.message)
            return make_response(jsonify({'error': 'Container not found'}), 404)

        if site.encode_commands:
            # By default frontend encodes with base64 job script parts separated by ";"
            # It requires script wrapper on cluster side
            jparams = form.params.data
        else:
            # Set site.encode_commands as False if you want to send command string without base64 encoding
            jparams = ';'.join([b64decode(command) for command in form.params.data.split(';')])

        ifiles = request.form.getlist('ifiles[]')
        iguids = request.form.getlist('iguids[]')
        iconts = request.form.getlist('iconts[]')
        ofiles = ['{guid}.out.tgz'.format(guid=container.guid)]

        scope = getScope(g.user.username)

        # Process ftp files
        ftp_dir = form.ftpdir.data
        register_ftp_files(ftp_dir, scope, container.guid)

        # Process guid list
        for f in iguids:
            if f != '':
                file = files_.first(guid=f)
                if file is not None:
                    # Register files in container
                    fc.reg_file_in_cont(file, container, 'input')
                else:
                    return make_response(jsonify({'error': "GUID {} not found".format(f)}))

        # Process containers
        for c in iconts:
            if c != '':
                try:
                    form_cont = conts_.first(guid=c)
                except(Exception):
                    _logger.error(Exception.message)
                    return make_response(jsonify({'error': 'Container in form not found'}), 404)
                for f in form_cont.files:
                    # Register file in catalog
                    fc.reg_file_in_cont(f.file, container, 'input')

        # Processes urls
        for f in ifiles:
            if f != '':
                from_se, path, token = getUrlInfo(f)
                replfn = ':/'.join([from_se, path])

                # Check if used before
                file_id = ddm_checkexternalifexists('', replfn)
                if file_id:
                    file = files_.get(file_id)
                else:
                    lfn = path.split('/')[-1]
                    guid = getGUID(scope, lfn)

                    file = File()
                    file.scope = scope
                    file.guid = guid
                    file.type = 'input'
                    file.lfn = lfn
                    file.status = 'defined'
                    files_.save(file)

                    replica = Replica()
                    replica.se = from_se
                    replica.status = 'link'
                    # Separate url & token
                    replica.lfn = replfn
                    replica.token = token
                    replica.original = file
                    replicas_.save(replica)

                # Register file in container
                fc.reg_file_in_cont(file, container, 'input')

        # Starts cloneReplica tasks
        ftasks = prepareInputFiles(container.id, site.se)

        # Saves output files meta
        for lfn in ofiles:
            file = File()
            file.scope = scope
            file.guid = getGUID(scope, lfn)
            file.type = 'output'
            file.lfn = lfn
            file.status = 'defined'
            files_.save(file)

            # Register file in container
            fc.reg_file_in_cont(file, container, 'output')

        # Counts files
        allfiles = container.files
        nifiles = 0
        nofiles = 0
        for f in allfiles:
            if f.type == 'input':
                nifiles += 1
            if f.type == 'output':
                nofiles += 1

        # Defines job meta
        job = Job()
        job.pandaid = None
        job.status = 'pending'
        job.owner = g.user
        job.params = jparams
        job.distr = distr
        job.container = container
        job.creation_time = datetime.utcnow()
        job.modification_time = datetime.utcnow()
        job.ninputfiles = nifiles
        job.noutputfiles = nofiles
        job.corecount = form.corecount.data
        job.tags = form.tags.data if form.tags.data != "" else None
        jobs_.save(job)

        # Async sendjob
        res = chord(ftasks)(async_send_job.s(jobid=job.id, siteid=site.id))

        return redirect(url_for('jobs.jobs'))

    form.distr.choices = [("%s:%s" % (distr.name, distr.release), "%s: %s" % (distr.name, distr.version)) for distr in distrs_.find().order_by('name').order_by('version')]
    form.site.choices = [(site.id, "{ce}".format(ce=site.ce)) for site in sites_.find(active=1).order_by('ce')]
    return render_template("dashboard/jobs/new.html", form=form)
Пример #16
0
def send_job(jobid, siteid):
    _logger.debug('Jobid: ' + str(jobid))

    site = sites_.get(siteid)

    job = jobs_.get(int(jobid))
    cont = job.container
    files_catalog = cont.files

    fscope = getScope(job.owner.username)
    datasetName = '{}:{}'.format(fscope, cont.guid)

    distributive = job.distr.name
    release = job.distr.release

    # Prepare runScript
    parameters = job.distr.command
    parameters = parameters.replace("$COMMAND$", job.params)
    parameters = parameters.replace("$USERNAME$", job.owner.username)
    parameters = parameters.replace("$WORKINGGROUP$", job.owner.working_group)

    # Prepare metadata
    metadata = dict(user=job.owner.username)

    # Prepare PanDA Object
    pandajob = JobSpec()
    pandajob.jobDefinitionID = int(time.time()) % 10000
    pandajob.jobName = cont.guid
    pandajob.transformation = client_config.DEFAULT_TRF
    pandajob.destinationDBlock = datasetName
    pandajob.destinationSE = site.se
    pandajob.currentPriority = 1000
    pandajob.prodSourceLabel = 'user'
    pandajob.computingSite = site.ce
    pandajob.cloud = 'RU'
    pandajob.VO = 'atlas'
    pandajob.prodDBlock = "%s:%s" % (fscope, pandajob.jobName)
    pandajob.coreCount = job.corecount
    pandajob.metadata = json.dumps(metadata)
    #pandajob.workingGroup = job.owner.working_group

    if site.encode_commands:
        # It requires script wrapper on cluster side
        pandajob.jobParameters = '%s %s %s "%s"' % (cont.guid, release, distributive, parameters)
    else:
        pandajob.jobParameters = parameters


    has_input = False
    for fcc in files_catalog:
        if fcc.type == 'input':
            f = fcc.file
            guid = f.guid
            fileIT = FileSpec()
            fileIT.lfn = f.lfn
            fileIT.dataset = pandajob.prodDBlock
            fileIT.prodDBlock = pandajob.prodDBlock
            fileIT.type = 'input'
            fileIT.scope = fscope
            fileIT.status = 'ready'
            fileIT.GUID = guid
            pandajob.addFile(fileIT)

            has_input = True
        if fcc.type == 'output':
            f = fcc.file
            fileOT = FileSpec()
            fileOT.lfn = f.lfn
            fileOT.destinationDBlock = pandajob.prodDBlock
            fileOT.destinationSE = pandajob.destinationSE
            fileOT.dataset = pandajob.prodDBlock
            fileOT.type = 'output'
            fileOT.scope = fscope
            fileOT.GUID = f.guid
            pandajob.addFile(fileOT)

            # Save replica meta
            fc.new_replica(f, site)

    if not has_input:
        # Add fake input
        fileIT = FileSpec()
        fileIT.lfn = "fake.input"
        fileIT.dataset = pandajob.prodDBlock
        fileIT.prodDBlock = pandajob.prodDBlock
        fileIT.type = 'input'
        fileIT.scope = fscope
        fileIT.status = 'ready'
        fileIT.GUID = "fake.guid"
        pandajob.addFile(fileIT)

    # Prepare lof file
    fileOL = FileSpec()
    fileOL.lfn = "%s.log.tgz" % pandajob.jobName
    fileOL.destinationDBlock = pandajob.destinationDBlock
    fileOL.destinationSE = pandajob.destinationSE
    fileOL.dataset = '{}:logs'.format(fscope)
    fileOL.type = 'log'
    fileOL.scope = 'panda'
    pandajob.addFile(fileOL)

    # Save log meta
    log = File()
    log.scope = fscope
    log.lfn = fileOL.lfn
    log.guid = getGUID(log.scope, log.lfn)
    log.type = 'log'
    log.status = 'defined'
    files_.save(log)

    # Save replica meta
    fc.new_replica(log, site)

    # Register file in container
    fc.reg_file_in_cont(log, cont, 'log')

    # Submit job
    o = submitJobs([pandajob])
    x = o[0]

    try:
        #update PandaID
        PandaID = int(x[0])
        job.pandaid = PandaID
        job.ce = site.ce
    except:
        job.status = 'submit_error'
    jobs_.save(job)

    return 0
Пример #17
0
def new_job():
    """Creates new job
    """
    g.user = request.oauth.user
    scope = getScope(request.oauth.user.username)

    js = request.json
    data = js['data']

    distr_id = data['sw_id']
    params = data['script']
    corecount = data['cores']

    site = sites_.first(ce=current_app.config['DEFAULT_CE'])
    distr = distrs_.get(id)

    container = Container()
    guid = 'job.' + commands.getoutput('uuidgen')
    container.guid = guid
    container.status = 'open'
    conts_.save(container)

    # Process ftp files
    if 'ftp_dir' in data.keys():
        ftp_dir = data['ftp_dir']
        register_ftp_files(ftp_dir, scope, container.guid)

    # Process guid list
    if 'guids' in data.keys():
        guids = data['guids']
        for f in guids:
            if f != '':
                file_ = files_.first(guid=f)
                if file_ is not None:
                    # Register file in catalog
                    fc.reg_file_in_cont(file_, container, 'input')
                else:
                    raise WebpandaError('File with guid %s not found' % f)

    ofiles = ['results.tgz']

    # Starts cloneReplica tasks
    ftasks = prepareInputFiles(container.id, site.se)

    # Saves output files meta
    for lfn in ofiles:
        file = File()
        file.scope = scope
        file.guid = getGUID(scope, lfn)
        file.lfn = lfn
        file.status = 'defined'
        files_.save(file)

        # Register file in catalog
        fc.reg_file_in_cont(file, container, 'output')

    # Counts files
    allfiles = container.files
    nifiles = 0
    nofiles = 0
    for f in allfiles:
        if f.type == 'input':
            nifiles += 1
        if f.type == 'output':
            nofiles += 1

    # Defines job meta
    job = Job()
    job.pandaid = None
    job.status = 'pending'
    job.owner = request.oauth.user
    job.params = params
    job.distr = distr
    job.container = container
    job.creation_time = datetime.utcnow()
    job.modification_time = datetime.utcnow()
    job.ninputfiles = nifiles
    job.noutputfiles = nofiles
    job.corecount = corecount
    job.tags = data['tags'] if 'tags' in data.keys() else ""
    jobs_.save(job)

    # Async sendjob
    res = chord(ftasks)(async_send_job.s(jobid=job.id, siteid=site.id))
    return {'id': job.id, 'container_id': guid}
Пример #18
0
def payload3(task):
    """
    run1 - N parallel jobs. {N} = sequence 0..01,0..02,...,N, not less than 2 placeholders
    #TODO deal with {N}.fastq.bz2 ??
    input: Makefile.{N}, *.fasta.{sfx list}, *1.{N}.fastq, *2.{N}.fastq
    output: likely reads{N}.tgz, maps{N}.tgz
    :param task:
    :return:
    """
    logger.debug("payload3: Start")

    #### Prepare
    # Check type of task
    task_type = task.task_type
#    if task_type.id != 3or6?:
#        raise WebpandaError("Illegal task_type.id")

    # Get user
    user = users_.get(task.owner_id)

    n=10
    if task.params is not None:
        n=int(task.params)
        if n==0:
            n=10

    task.tag = "task." + commands.getoutput('uuidgen')
    tasks_.save(task)

    # Get containers
    input_cont = conts_.get(task.input)
    #TO_DO do smth with output container?
    output_cont = conts_.get(task.output)

    for jobname in gen_sfx("a",n):
        # Get container
        container = Container()
        container.guid = task.tag + "."+jobname
        conts_.save(container)

        # Add input files to container
        files_template_list = task_type.ifiles_template.split(',')
        for item in input_cont.files:
            f = item.file
            for file_template in files_template_list:
                # TO_DO: Change file template here
                m = re.match(file_template, f.lfn)
                if m is not None:
                    # Register file in container
                    fc.reg_file_in_cont(f, container, 'input')

        # reg additional output
        fc.reg_file_in_cont_byname(user, jobname+'.reads.bz2', container, 'output')
        fc.reg_file_in_cont_byname(user, jobname + '.maps.bz2', container, 'output')

        # Prepare trf script
        script = task.task_type.trf_template
        # TO_DO just for test - only emulate, not real jobs
        pipeline_path_name = 'paleomix_bam'
        swdir = '/s/ls2/users/poyda/swp/' + pipeline_path_name +'/'
        script = "/bin/bash "+swdir+"run11.sh -t " +jobname

        send_job_(task, container, script)

    return True
Пример #19
0
def file_save(container_guid, lfn):
    """
    POST: /pilot/file/<container_guid>/<lfn>/save

    Saves file from request, returns file guid

    :param container_guid: Guid of container
    :type container_guid: str
    :param lfn: Local FileName
    :type lfn: str
    :return: guid
    :rtype: json
    """
    site = sites_.first(se=current_app.config['DEFAULT_SE'])

    if ':' in container_guid:
        container_guid = container_guid.split(':')[-1]
    container = conts_.first(guid=container_guid)
    if container.status != 'open':
        raise WebpandaError('Unable to upload: Container is not open')
    cc = container.files

    ff = None
    for c in cc:
        f = c.file
        if f.lfn == lfn:
            ff = f
    if not ff:
        ff = File()
        ff.scope = getScope(g.user.username)
        ff.lfn = lfn
        ff.guid = getGUID(ff.scope, ff.lfn)
        ff.status = 'defined'
        files_.save(ff)

        # Register file in container
        fc.reg_file_in_cont(ff, container, 'input')

    path = os.path.join(site.datadir, getScope(g.user.username),
                        container.guid)
    replfn = '/' + os.path.join(getScope(g.user.username), container.guid,
                                ff.lfn)
    destination = os.path.join(path, ff.lfn)

    for r in ff.replicas:
        if r.se == site.se:
            destination = site.datadir + r.lfn
            file_dir = '/'.join(destination.split('/')[:-1])
            if r.status == 'ready':
                if os.path.isfile(destination):  # Check fsize, md5 or adler
                    raise WebpandaError('Replica exists')
                else:
                    r.status = 'broken'
                    replicas_.save(r)
                    raise WebpandaError('Broken replica')
            elif r.status == 'defined':
                try:
                    os.makedirs(file_dir)
                except (Exception):
                    pass
                f = open(destination, 'wb')
                f.write(request.data)
                f.close()

                # Update file info
                setFileMeta(ff.id, destination)

                r.status = 'ready'
                replicas_.save(r)
                return {'guid': ff.guid}
            else:
                raise WebpandaError('Replica status: %s' % r.status)

    replica = Replica()
    if os.path.isfile(destination):
        raise WebpandaError('Unable to upload: File exists')
    try:
        os.makedirs(path)
    except (Exception):
        _logger.debug('Path exists: %s' % path)
    f = open(destination, 'wb')
    f.write(request.data)
    f.close()

    # Update file info
    setFileMeta(ff.id, destination)

    # Create/change replica
    replica.se = site.se
    replica.status = 'ready'
    replica.lfn = replfn
    replica.token = ''
    replica.original = ff
    replicas_.save(replica)
    return {'guid': ff.guid}
Пример #20
0
def registerLocalFile(arg, dirname, names, scope):
    """Register files from local dir to container
    :param arg: Container guid
    :param dirname: Abs dir
    :param names: File name
    :param scope: Scope to upload files in
    :return:
    """
    site = sites_.first(se=client_config.DEFAULT_SE)
    _logger.debug(str(arg))
    cont = conts_.first(guid=arg)
    files = cont.files

    for name in names:
        fpath = os.path.join(dirname, name)

        fobj = None
        # Check in container
        for file in files:
            if file.lfn == name:
                fobj = file

        # Check in catalog
        if not fobj:
            destination = os.path.join(dirname, name)
            adler = adler32(destination)
            md5 = md5sum(destination)
            size = fsize(destination)
            file_id = ddm_checkifexists(name, size, adler, md5)

            if file_id:
                # If file exists
                fobj = files_.get(file_id)

        if not fobj:
            fobj = File()
            fobj.scope = scope
            fobj.lfn = name
            fobj.guid = getGUID(fobj.scope, fobj.lfn)
            fobj.type = 'input'
            fobj.status = 'defined'
            files_.save(fobj)
            setFileMeta(fobj.id, fpath)

        # Register file in catalog
        fc.reg_file_in_cont(fobj, cont, "input")

        replicas = fobj.replicas
        replica = None
        for r in replicas:
            if r.se == site.se and r.status == 'ready':
                replica = r
        if not replica:
            ldir = '/' + os.path.join('system', fobj.scope, fobj.guid)
            ddm_localmakedirs(ldir)
            ddm_localcp(fpath[len(site.datadir):], ldir)

            replica = Replica()
            replica.se = site.se
            replica.status = 'ready'
            replica.token = ''
            replica.lfn = os.path.join(ldir, fobj.lfn)
            replica.original = fobj
            replicas_.save(replica)