Пример #1
0
    def update_jobs():

        global jobs, server

        command = ['jobsub_q']
        if server != None:
            command.append('--jobsub-server=%s' % server)
        command.append('--group=%s' % project_utilities.get_experiment())
        command.append('--user=%s' % project_utilities.get_user())
        command.append('--role=%s' % project_utilities.get_role())
        jobinfo = subprocess.Popen(command,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
        jobout, joberr = jobinfo.communicate()
        rc = jobinfo.poll()
        if rc != 0:
            #raise JobsubError(command, rc, jobout, joberr)
            # Simply return in case jobsub_q fails.
            return
        jobs = jobout.split('\n')
Пример #2
0
def getmetadata(inputfile):
    # Set up the experiment name for samweb Python API
    samweb = samweb_cli.SAMWebClient(
        experiment=project_utilities.get_experiment())

    # Extract metadata into a pipe.
    local = project_utilities.path_to_local(inputfile)
    if local != '':
        proc = subprocess.Popen(["sam_metadata_dumper", "-H", local],
                                stdout=subprocess.PIPE)
    else:
        url = project_utilities.path_to_url(inputfile)
        proc = subprocess.Popen(["sam_metadata_dumper", "-H", url],
                                stdout=subprocess.PIPE)
    lines = proc.stdout.readlines()
    if local != '' and local != inputfile:
        os.remove(local)

    # Count the number of lines in the file (for later use!)
    num_lines = len(lines)

    # define an empty python dictionary
    md = {}

    # Read tbe columns from the file and fill the dictionary
    c = 0
    p = 0
    parents = []
    PName = False
    gen = False
    for line in lines:
        c = c + 1
        columns = line.split(" ")
        columns = [col.strip() for col in columns]
        if c >= 4 and c <= num_lines - 2:
            if columns[1] == 'dataTier':
                md['data_tier'] = columns[-1]
                if columns[-1] == 'generated':
                    gen = True
            elif columns[1] == 'endTime':
                E = time.localtime(int(columns[-1]))
                md['end_time'] = str(E[0]) + '-' + str(E[1]) + '-' + str(
                    E[2]) + 'T' + str(E[3]) + ':' + str(E[4]) + ':' + str(E[5])
            elif columns[1] == 'startTime':
                S = time.localtime(int(columns[-1]))
                md['start_time'] = str(S[0]) + '-' + str(S[1]) + '-' + str(
                    S[2]) + 'T' + str(S[3]) + ':' + str(S[4]) + ':' + str(S[5])
            elif columns[1] == 'group':
                md['group'] = columns[-1]
            elif columns[1] == 'eventCount':
                md['event_count'] = columns[-1]
            elif columns[1] == 'fclName':
                md['fcl.name'] = columns[-1]
            elif columns[1] == 'fclVersion':
                md['fcl.version'] = columns[-1]
            elif columns[1] == 'fileFormat':
                md['file_format'] = columns[-1]
            elif columns[1] == 'ubProjectStage':
                md['ub_project.stage'] = columns[-1]
            elif columns[1] == 'ubProjectVersion':
                md['ub_project.version'] = columns[-1]
            elif columns[1] == 'lastEvent':
                md['last_event'] = columns[-1]
            elif columns[1] == 'firstEvent':
                md['first_event'] = columns[-1]
            elif columns[1] == 'fileType':
                md['file_type'] = columns[-1]
            elif columns[1] == 'group':
                md['group'] = columns[-1]
            elif columns[1] == 'group':
                md['group'] = columns[-1]
            elif columns[1] == 'run':
                run = columns[-1]
            elif columns[1] == 'runType':
                run_type = columns[-1]
            elif columns[1] == 'applicationFamily':
                app_family = columns[-1]
            elif columns[1] == 'applicationVersion':
                app_version = columns[-1]
            elif columns[1] == 'process_name':
                app_name = columns[-1]
            elif columns[1] == 'ubProjectName':
                PName = True
                md['ub_project.name'] = columns[-1]
            elif columns[1] == 'parent':
                parents.append({'file_name': columns[-1]})

    # Get the other meta data field parameters
    md['file_name'] = inputfile.split("/")[-1]
    md['file_size'] = os.path.getsize(inputfile)
    # For now, skip the checksum for dCache files.
    md['crc'] = root_metadata.fileEnstoreChecksum(inputfile)
    md['runs'] = [[run, run_type]]
    md['application'] = {
        'family': app_family,
        'name': app_name,
        'version': app_version
    }
    md['parents'] = parents

    # If ub_project.name is not in the internal metadata,
    # for generator files, get the ub_project.name from the fcl_filename (without the '.fcl' part) for gen files.
    # for all other stages, get this from the parents
    if gen == True:
        md['parents'] = []
        if PName == False:
            md['ub_project.name'] = md['fcl.name'].split(".fcl")[0]
    else:
        if PName == False:
            if 'parents' in md:
                parent = md['parents'][0]['file_name']
                mdparent = samweb.getMetadata(parent)
                if 'ub_project.name' in mdparent:
                    md['ub_project.name'] = mdparent['ub_project.name']

    return md
Пример #3
0
def main():
        
    ana = 0
    nproc = 0
    
    import_samweb() 
    
    
    # Parse arguments.
    checkdir=''
    logdir=''
    outdir=''
    declare_file = 0
    copy_to_dropbox = 0
    maintain_parentage = 0
    data_file_types = []
    args = sys.argv[1:]
    while len(args) > 0:

        if args[0] == '--dir' and len(args) > 1:
            checkdir = args[1]
            del args[0:2]
        elif args[0] == '--logfiledir' and len(args) > 1:
            logdir = args[1]
            del args[0:2]
        elif args[0] == '--outdir' and len(args) > 1:
            outdir = args[1]
            del args[0:2]
        elif args[0] == '--declare' and len(args) > 1:
            declare_file = int(args[1])
            del args[0:2]    
        elif args[0] == '--copy' and len(args) > 1:
            copy_to_dropbox = int(args[1])
            del args[0:2]        
        elif args[0] == '--maintain_parentage' and len(args) > 1:
            maintain_parentage = int(args[1])
            del args[0:2]        
        elif args[0] == '--data_file_type' and len(args) > 1:
            data_file_types.append(args[1])
            del args[0:2]        
        else:
            print('Unknown option %s' % args[0])
            return 1

    # Add default data_file_types.

    if len(data_file_types) == 0:
        data_file_types.append('root')

    status = 0 #global status code to tell us everything is ok.
    
    print("Do decleration in job: %d" % declare_file) 
    
    # Check lar exit status (if any).
    stat_filename = os.path.join(logdir, 'lar.stat')
    if project_utilities.safeexist(stat_filename):      
        try:
            status = int(project_utilities.saferead(stat_filename)[0].strip())
            if status != 0:
                print('Job in subdirectory %s ended with non-zero exit status %d.' % (checkdir, status))
                status = 1
        
        except:
            print('Bad file lar.stat in subdirectory %s.' % checkdir)
            status = 1
    
    if checkdir == '':
        print('No directory specified (use the --dir option.) Exiting.')
        return 1
    if logdir == '':
        print('No log file directory specified (use the --logfiledir option.) Exiting.')
        return 1  
    
    nevts,rootfiles,hists = check_root(checkdir, logdir, data_file_types)

    # Set flag to do analysis-style validation if all of the following are true:
    #
    # 1.  There is at least one valid histogram file.
    # 2.  The total number of artroot files and artroot events is zero.

    if len(hists) > 0 and len(rootfiles) == 0 and nevts <= 0:
        ana = 1
    
    if not ana:
        if len(rootfiles) == 0 or nevts < 0:
            print('Problem with root file(s) in  %s.' % checkdir)
            status = 1
      
    
    elif nevts < -1 or len(hists) == 0:
        print('Problem with analysis root file(s) in  %s.' % checkdir)
        status = 1
    
    
# Then we need to loop over rootfiles and hists because those are good.
# Then we could make a list of those and check that the file in question for
# declaration is in that liast. also require that the par exit code is good for 
# declaration.

    validate_list = open('validate.list','w')
    file_list = open('files.list', 'w')
    ana_file_list = open('filesana.list', 'w')
    
    events_list = open('events.list', 'w')
    
    #will be empty if the checks succeed    
    bad_list = open('bad.list', 'w')
    missing_list = open('missing_files.list', 'w')    
    
    # Print summary.

    if ana:
        print("%d processes completed successfully." % nproc)
        print("%d total good histogram files." % len(hists))
    
    else:
        print("%d total good events." % nevts)
        print("%d total good root files." % len(rootfiles))
        print("%d total good histogram files." % len(hists))
    
    file_list_stream = {}

    # Generate bookkeeping files pertaining to artroot files.

    for rootfile in rootfiles:

        rootpath = rootfile[0]
        nev = rootfile[1]
        streamname = rootfile[2]
        
        # Make sure root file names do not exceed 200 characters.       
        rootname = os.path.basename(rootpath)
        if len(rootname) >= 200:
            print('Filename %s in subdirectory %s is longer than 200 characters.' % (
                rootname, outdir))
            status = 1

        if streamname not in file_list_stream:
            file_list_stream[streamname] = open('files_%s.list' % streamname, 'w')
        validate_list.write(rootpath + '\n')
        file_on_scratch = os.path.join(outdir, os.path.basename(rootpath))
        file_list.write(file_on_scratch + '\n')
        file_list_stream[streamname].write(file_on_scratch + '\n')
        events_list.write('%s %d \n' % (file_on_scratch, nev) )

    # Generate bookkeeping files pertaining to analysis files.
        
    for histfile in hists:
        validate_list.write(histfile + '\n')
        file_on_scratch = os.path.join(outdir, os.path.basename(histfile))
        ana_file_list.write(file_on_scratch + '\n')
    
        
    
    validate_list.close()
    file_list.close()
    ana_file_list.close()
    for streamname in list(file_list_stream.keys()):
        file_list_stream[streamname].close()
    events_list.close()
    
    #decide at this point if all the checks are ok. Write to missing_file_list first
    missing_list.write('%d \n' %status)
    
    if status == 0:
        bad_list.close()

        # begin SAM decleration

        if declare_file:

            # Declare artroot files.

            for rootfile in rootfiles:

                rootpath = rootfile[0]
                fn   = os.path.basename(rootpath)
                declare_ok = False

                # Decide if we need to declare this file.
                # It is OK if the file is already declared.
                # In that case, do not try to declare it again.

                try:
                    md = samweb.getMetadata(fn)
                    if len(md) > 0:
                        declare_ok = True
                        print('File %s is already declared.' % fn)
                except:
                    declare_ok = False

                if not declare_ok:
                    print('Declaring %s' % fn)
                    expSpecificMetaData = expMetaData(project_utilities.get_experiment(), rootpath)
                    md = expSpecificMetaData.getmetadata()

                    # Decide if we want to override the internal parentage metadata.

                    if maintain_parentage == 1:

                        # Delete the old parents, if any.

                        if 'parents' in md:                  
                            del md['parents']

                        # change the parentage of the file based on it's parents and aunts from condor_lar

                        jobs_parents = os.getenv('JOBS_PARENTS', '').split(" ")
                        jobs_aunts   = os.getenv('JOBS_AUNTS', '').split(" ")
                        if(jobs_parents[0] != '' ):
                            md['parents'] = [{'file_name': parent} for parent in jobs_parents]
                        if(jobs_aunts[0] != '' ):
                            for aunt in jobs_aunts:
                                mixparent_dict = {'file_name': aunt}
                                if 'parents' not in md:
                                    md['parents'] = []
                                md['parents'].append(mixparent_dict)
                                             
                    if len(md) > 0:
                        project_utilities.test_kca()

                        # Make lack of parent files a nonfatal error.
                        # This should probably be removed at some point.
      
                        try:
                            samweb.declareFile(md=md)
                            declare_ok = True

                        except samweb_cli.exceptions.SAMWebHTTPError as e:
                            print(e)
                            print('SAM declare failed.')
                            return 1
             
                        except:
                            print('SAM declare failed.')
                            return 1
                     
                    else:
                        print('No sam metadata found for %s.' % fn)
                        declare_ok = False
                        status = 1
             
                if copy_to_dropbox == 1 and declare_ok:
                    print("Copying to Dropbox")
                    dropbox_dir = project_utilities.get_dropbox(fn)
                    rootPath = os.path.join(dropbox_dir, fn)
                    jsonPath = rootPath + ".json"
                    ifdh_cp(rootpath, rootPath)

            # Declare histogram files.
             
            for histpath in hists:

                declare_ok = False
                fn   = os.path.basename(histpath)

                # Decide if we need to declare this file.
                # It is OK if the file is already declared.
                # In that case, do not try to declare it again.

                try:
                    md = samweb.getMetadata(fn)
                    if len(md) > 0:
                        declare_ok = True
                        print('File %s is already declared.' % fn)
                except:
                    declare_ok = False

                if not declare_ok:
                    print('Declaring %s' % fn)
                    json_file = os.path.join(logdir, fn + '.json')

                    # Get metadata from json

                    md = {}
                    if project_utilities.safeexist(json_file):
                        mdlines = project_utilities.saferead(json_file)
                        mdtext = ''
                        for line in mdlines:
                            mdtext = mdtext + line
                        try:
                            md = json.loads(mdtext)
                        except:
                            md = {}
                            pass

                    if maintain_parentage == 1:

                        # Delete the old parents, if any.

                        if 'parents' in md:                  
                            del md['parents']

                        # change the parentage of the file based on it's parents and aunts from condor_lar

                        jobs_parents = os.getenv('JOBS_PARENTS', '').split(" ")
                        jobs_aunts   = os.getenv('JOBS_AUNTS', '').split(" ")
                        if(jobs_parents[0] != '' ):
                            md['parents'] = [{'file_name': parent} for parent in jobs_parents]
                        if(jobs_aunts[0] != '' ):
                            for aunt in jobs_aunts:
                                mixparent_dict = {'file_name': aunt}
                                if 'parents' not in md:
                                    md['parents'] = []
                                md['parents'].append(mixparent_dict)
                                             
                    if len(md) > 0 and 'file_type' in md:
                        project_utilities.test_kca()

                        # Make lack of parent files a nonfatal error.
                        # This should probably be removed at some point.
      
                        try:
                            samweb.declareFile(md=md)
                            declare_ok = True
             
                        except samweb_cli.exceptions.SAMWebHTTPError as e:
                            print(e)
                            print('SAM declare failed.')
                            declare_ok = False
             
                        except:
                            print('SAM declare failed.')
                            declare_ok = False
                     
                    else:
                        print('No sam metadata found for %s.' % fn)
                        declare_ok = False
             
                if copy_to_dropbox == 1 and declare_ok:
                    print("Copying to Dropbox")
                    dropbox_dir = project_utilities.get_dropbox(fn)
                    rootPath = dropbox_dir + "/" + fn
                    jsonPath = rootPath + ".json"
                    ifdh_cp(histpath, rootPath)
             
        return status
    
    # something went wrong, so make a list of bad directories and potentially missing files
    else:      
        # first get the subdir name on pnfs. this contains the job id
        dir_on_scratch = os.path.basename(outdir)
        print('Dir on scratch ' + dir_on_scratch)
        bad_list.write('%s \n' % dir_on_scratch)
        bad_list.close()
        return status