Exemplo n.º 1
0
    def __init__(self, path, mode='r', buf=-1):

        self.path = path
        self.mode = mode
        if path.startswith('/pnfs/'):
            self.local_path = str(uuid.uuid4()) + os.path.basename(path)
        else:
            self.local_path = path

        # Fetch copy of file from dCache, if necessary.

        if path != self.local_path and (mode.find('r') >= 0 or mode.find('a') >= 0):
            larbatch_utilities.ifdh_cp(path, self.local_path)

        # Open local copy of file.

        self.local_file = __builtins__['open'](self.local_path, mode, buf)
Exemplo n.º 2
0
def copy(src, dest):
    if exists(dest):
        remove(dest)
    if (src.startswith('/pnfs/') or dest.startswith('/pnfs/')):
        if prefer_grid or not pnfs_is_mounted:
            if debug:
                print('*** Larbatch_posix: Copy %s to %s using ifdh.' % (src, dest))
            larbatch_utilities.ifdh_cp(src, dest)
        else:
            if debug:
                print('*** Larbatch_posix: Copy %s to %s using posix with timeout.' % (src, dest))
            larbatch_utilities.posix_cp(src, dest)
    else:
        if debug:
            print('*** Larbatch_posix: Copy %s to %s using posix.' % (src, dest))
        shutil.copy(src, dest)

    # Done

    return
Exemplo n.º 3
0
    def close(self):

        if self.local_file and not self.local_file.closed:

            # Close local copy of file.

            self.local_file.close()

            # If the local path and real path are different, do some cleanups.

            if self.path != self.local_path:

                # If file was opend for writing, transfer local copy to dCache.

                if self.mode.find('w') >= 0 or self.mode.find('a') >= 0 or self.mode.find('+') >= 0:
                    larbatch_utilities.ifdh_cp(self.local_path, self.path)

                # Delete the local copy regardless of whether the file was open for
                # reading or writing.

                os.remove(self.local_path)
Exemplo n.º 4
0
def main():
        
    ana = 0
    nproc = 0
    
    import_samweb() 
    
    
    # Parse arguments.
    checkdir=''
    logdir=''
    outdir=''
    declare_file = 0
    copy_to_dropbox = 0
    maintain_parentage = 0
    data_file_types = []
    args = sys.argv[1:]
    while len(args) > 0:

        if args[0] == '--dir' and len(args) > 1:
            checkdir = args[1]
            del args[0:2]
        elif args[0] == '--logfiledir' and len(args) > 1:
            logdir = args[1]
            del args[0:2]
        elif args[0] == '--outdir' and len(args) > 1:
            outdir = args[1]
            del args[0:2]
        elif args[0] == '--declare' and len(args) > 1:
            declare_file = int(args[1])
            del args[0:2]    
        elif args[0] == '--copy' and len(args) > 1:
            copy_to_dropbox = int(args[1])
            del args[0:2]        
        elif args[0] == '--maintain_parentage' and len(args) > 1:
            maintain_parentage = int(args[1])
            del args[0:2]        
        elif args[0] == '--data_file_type' and len(args) > 1:
            data_file_types.append(args[1])
            del args[0:2]        
        else:
            print('Unknown option %s' % args[0])
            return 1

    # Add default data_file_types.

    if len(data_file_types) == 0:
        data_file_types.append('root')

    status = 0 #global status code to tell us everything is ok.
    
    print("Do decleration in job: %d" % declare_file) 
    
    # Check lar exit status (if any).
    stat_filename = os.path.join(logdir, 'lar.stat')
    if project_utilities.safeexist(stat_filename):      
        try:
            status = int(project_utilities.saferead(stat_filename)[0].strip())
            if status != 0:
                print('Job in subdirectory %s ended with non-zero exit status %d.' % (checkdir, status))
                status = 1
        
        except:
            print('Bad file lar.stat in subdirectory %s.' % checkdir)
            status = 1
    
    if checkdir == '':
        print('No directory specified (use the --dir option.) Exiting.')
        return 1
    if logdir == '':
        print('No log file directory specified (use the --logfiledir option.) Exiting.')
        return 1  
    
    nevts,rootfiles,hists = check_root(checkdir, logdir, data_file_types)

    # Set flag to do analysis-style validation if all of the following are true:
    #
    # 1.  There is at least one valid histogram file.
    # 2.  The total number of artroot files and artroot events is zero.

    if len(hists) > 0 and len(rootfiles) == 0 and nevts <= 0:
        ana = 1
    
    if not ana:
        if len(rootfiles) == 0 or nevts < 0:
            print('Problem with root file(s) in  %s.' % checkdir)
            status = 1
      
    
    elif nevts < -1 or len(hists) == 0:
        print('Problem with analysis root file(s) in  %s.' % checkdir)
        status = 1
    
    
# Then we need to loop over rootfiles and hists because those are good.
# Then we could make a list of those and check that the file in question for
# declaration is in that liast. also require that the par exit code is good for 
# declaration.

    validate_list = open('validate.list','w')
    file_list = open('files.list', 'w')
    ana_file_list = open('filesana.list', 'w')
    
    events_list = open('events.list', 'w')
    
    #will be empty if the checks succeed    
    bad_list = open('bad.list', 'w')
    missing_list = open('missing_files.list', 'w')    
    
    # Print summary.

    if ana:
        print("%d processes completed successfully." % nproc)
        print("%d total good histogram files." % len(hists))
    
    else:
        print("%d total good events." % nevts)
        print("%d total good root files." % len(rootfiles))
        print("%d total good histogram files." % len(hists))
    
    file_list_stream = {}

    # Generate bookkeeping files pertaining to artroot files.

    for rootfile in rootfiles:

        rootpath = rootfile[0]
        nev = rootfile[1]
        streamname = rootfile[2]
        
        # Make sure root file names do not exceed 200 characters.       
        rootname = os.path.basename(rootpath)
        if len(rootname) >= 200:
            print('Filename %s in subdirectory %s is longer than 200 characters.' % (
                rootname, outdir))
            status = 1

        if streamname not in file_list_stream:
            file_list_stream[streamname] = open('files_%s.list' % streamname, 'w')
        validate_list.write(rootpath + '\n')
        file_on_scratch = os.path.join(outdir, os.path.basename(rootpath))
        file_list.write(file_on_scratch + '\n')
        file_list_stream[streamname].write(file_on_scratch + '\n')
        events_list.write('%s %d \n' % (file_on_scratch, nev) )

    # Generate bookkeeping files pertaining to analysis files.
        
    for histfile in hists:
        validate_list.write(histfile + '\n')
        file_on_scratch = os.path.join(outdir, os.path.basename(histfile))
        ana_file_list.write(file_on_scratch + '\n')
    
        
    
    validate_list.close()
    file_list.close()
    ana_file_list.close()
    for streamname in list(file_list_stream.keys()):
        file_list_stream[streamname].close()
    events_list.close()
    
    #decide at this point if all the checks are ok. Write to missing_file_list first
    missing_list.write('%d \n' %status)
    
    if status == 0:
        bad_list.close()

        # begin SAM decleration

        if declare_file:

            # Declare artroot files.

            for rootfile in rootfiles:

                rootpath = rootfile[0]
                fn   = os.path.basename(rootpath)
                declare_ok = False

                # Decide if we need to declare this file.
                # It is OK if the file is already declared.
                # In that case, do not try to declare it again.

                try:
                    md = samweb.getMetadata(fn)
                    if len(md) > 0:
                        declare_ok = True
                        print('File %s is already declared.' % fn)
                except:
                    declare_ok = False

                if not declare_ok:
                    print('Declaring %s' % fn)
                    expSpecificMetaData = expMetaData(project_utilities.get_experiment(), rootpath)
                    md = expSpecificMetaData.getmetadata()

                    # Decide if we want to override the internal parentage metadata.

                    if maintain_parentage == 1:

                        # Delete the old parents, if any.

                        if 'parents' in md:                  
                            del md['parents']

                        # change the parentage of the file based on it's parents and aunts from condor_lar

                        jobs_parents = os.getenv('JOBS_PARENTS', '').split(" ")
                        jobs_aunts   = os.getenv('JOBS_AUNTS', '').split(" ")
                        if(jobs_parents[0] != '' ):
                            md['parents'] = [{'file_name': parent} for parent in jobs_parents]
                        if(jobs_aunts[0] != '' ):
                            for aunt in jobs_aunts:
                                mixparent_dict = {'file_name': aunt}
                                if 'parents' not in md:
                                    md['parents'] = []
                                md['parents'].append(mixparent_dict)
                                             
                    if len(md) > 0:
                        project_utilities.test_kca()

                        # Make lack of parent files a nonfatal error.
                        # This should probably be removed at some point.
      
                        try:
                            samweb.declareFile(md=md)
                            declare_ok = True

                        except samweb_cli.exceptions.SAMWebHTTPError as e:
                            print(e)
                            print('SAM declare failed.')
                            return 1
             
                        except:
                            print('SAM declare failed.')
                            return 1
                     
                    else:
                        print('No sam metadata found for %s.' % fn)
                        declare_ok = False
                        status = 1
             
                if copy_to_dropbox == 1 and declare_ok:
                    print("Copying to Dropbox")
                    dropbox_dir = project_utilities.get_dropbox(fn)
                    rootPath = os.path.join(dropbox_dir, fn)
                    jsonPath = rootPath + ".json"
                    ifdh_cp(rootpath, rootPath)

            # Declare histogram files.
             
            for histpath in hists:

                declare_ok = False
                fn   = os.path.basename(histpath)

                # Decide if we need to declare this file.
                # It is OK if the file is already declared.
                # In that case, do not try to declare it again.

                try:
                    md = samweb.getMetadata(fn)
                    if len(md) > 0:
                        declare_ok = True
                        print('File %s is already declared.' % fn)
                except:
                    declare_ok = False

                if not declare_ok:
                    print('Declaring %s' % fn)
                    json_file = os.path.join(logdir, fn + '.json')

                    # Get metadata from json

                    md = {}
                    if project_utilities.safeexist(json_file):
                        mdlines = project_utilities.saferead(json_file)
                        mdtext = ''
                        for line in mdlines:
                            mdtext = mdtext + line
                        try:
                            md = json.loads(mdtext)
                        except:
                            md = {}
                            pass

                    if maintain_parentage == 1:

                        # Delete the old parents, if any.

                        if 'parents' in md:                  
                            del md['parents']

                        # change the parentage of the file based on it's parents and aunts from condor_lar

                        jobs_parents = os.getenv('JOBS_PARENTS', '').split(" ")
                        jobs_aunts   = os.getenv('JOBS_AUNTS', '').split(" ")
                        if(jobs_parents[0] != '' ):
                            md['parents'] = [{'file_name': parent} for parent in jobs_parents]
                        if(jobs_aunts[0] != '' ):
                            for aunt in jobs_aunts:
                                mixparent_dict = {'file_name': aunt}
                                if 'parents' not in md:
                                    md['parents'] = []
                                md['parents'].append(mixparent_dict)
                                             
                    if len(md) > 0 and 'file_type' in md:
                        project_utilities.test_kca()

                        # Make lack of parent files a nonfatal error.
                        # This should probably be removed at some point.
      
                        try:
                            samweb.declareFile(md=md)
                            declare_ok = True
             
                        except samweb_cli.exceptions.SAMWebHTTPError as e:
                            print(e)
                            print('SAM declare failed.')
                            declare_ok = False
             
                        except:
                            print('SAM declare failed.')
                            declare_ok = False
                     
                    else:
                        print('No sam metadata found for %s.' % fn)
                        declare_ok = False
             
                if copy_to_dropbox == 1 and declare_ok:
                    print("Copying to Dropbox")
                    dropbox_dir = project_utilities.get_dropbox(fn)
                    rootPath = dropbox_dir + "/" + fn
                    jsonPath = rootPath + ".json"
                    ifdh_cp(histpath, rootPath)
             
        return status
    
    # something went wrong, so make a list of bad directories and potentially missing files
    else:      
        # first get the subdir name on pnfs. this contains the job id
        dir_on_scratch = os.path.basename(outdir)
        print('Dir on scratch ' + dir_on_scratch)
        bad_list.write('%s \n' % dir_on_scratch)
        bad_list.close()
        return status