Exemplo n.º 1
0
    def find_checksum( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs( [self._project, self._parent_project], [2, 0] ):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Checking tape: run=%d, subrun=%d ...' % (run,subrun))

            statusCode = 2

            in_file = self._infile_format % ( run, subrun )
            samweb = samweb_cli.SAMWebClient(experiment="uboone")

            meta = {}
            try:
                meta = samweb.getMetadata(filenameorid=in_file)
                checksum_info = meta['checksum'][0].split(':')
                if checksum_info[0] == 'enstore':
                    self._data = checksum_info[1]
                    statusCode = 0

                else:
                    statusCode = 1

            except samweb_cli.exceptions.FileNotFound:
                subject = 'Failed to locate file %s at SAM' % in_file
                text = 'File %s is not found at SAM!' % in_file

                pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

                statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = statusCode,
                                data    = self._data )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
Exemplo n.º 2
0
def samweb():

    global samweb_obj

    if samweb_obj == None:
        samweb_obj = samweb_cli.SAMWebClient(experiment=get_experiment())

    os.environ['SSL_CERT_DIR'] = '/etc/grid-security/certificates'

    return samweb_obj
Exemplo n.º 3
0
    def validate_sam( self ):
        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project,self._parent_project],
                                      [12,0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Checking the SAM declaration: run=%d, subrun=%d ...' % (run,subrun))

            status = 12

            in_file_base = self._infile_format % ( run, subrun )
            samweb = samweb_cli.SAMWebClient(experiment="uboone")

            # Check if the file already exists at SAM
            try:
                samweb.getMetadata(filenameorid=in_file_base)
                status = 10
            except samweb_cli.exceptions.FileNotFound:
                status = 1

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
Exemplo n.º 4
0
def getFileListFromDefinition(defname):
    samweb = samweb_cli.SAMWebClient(experiment='uboone')
    flist = []
    try:
        flist = samweb.listFiles(defname=args.defname)
    except:
        print "Failed to get the list of files in %s" % args.defname
        sys.exit(1)

    flist.sort()
    if (not args.noheader):
        print "Definition %s contains %i files" % (defname, len(flist))

    return flist
Exemplo n.º 5
0
def matadataValidation(infile,jsonData):
    jsonFileName = str(infile)+".json"
    # Check if we actually generated the json file
    # If not, exit with error
    if not  os.path.isfile(jsonFileName): 
         sys.exit(in_file +": Metadata File Not Found")
    # Set up the experiment for SAM
    samweb = samweb_cli.SAMWebClient(experiment="uboone")
    # Check if metadata is valid, or exit with error
    # If it takes too much time exit with error
    signal.signal(signal.SIGALRM, handler)
    signal.alarm(4)
    try:
        samweb.validateFileMetadata(jsonData)       
        return True
    except Exception:
        sys.exit(in_file +" : Invalid/Corrupted Metadata")
Exemplo n.º 6
0
def get_dropbox(filename):

    # Get metadata.

    md = {}
    exp = 'sbnd'
    if 'SAM_EXPERIMENT' in os.environ:
        exp = os.environ['SAM_EXPERIMENT']
    samweb = samweb_cli.SAMWebClient(experiment=exp)
    try:
        md = samweb.getMetadata(filenameorid=filename)
    except:
        pass

    # Extract the metadata fields that we need.

    file_type = ''
    group = ''
    data_tier = ''

    if 'file_type' in md:
        file_type = md['file_type']
    if 'group' in md:
        group = md['group']
    if 'data_tier' in md:
        data_tier = md['data_tier']

    if not file_type or not group or not data_tier:
        raise RuntimeError('Missing or invalid metadata for file %s.' %
                           filename)

    # Construct dropbox path.

    #path = '/sbnd/data/sbndsoft/dropbox/%s/%s/%s' % (file_type, group, data_tier)
    if 'FTS_DROPBOX' in os.environ:
        dropbox_root = os.environ['FTS_DROPBOX']
    else:
        dropbox_root = '/pnfs/sbnd/scratch/sbndpro/dropbox'
    path = '%s/%s/%s/%s' % (dropbox_root, file_type, group, data_tier)
    return path
Exemplo n.º 7
0
import ifdh
import os
import pprint
import samweb_cli
import subprocess

samweb = samweb_cli.SAMWebClient(experiment='dune')
#files = samweb.listFiles("file_name like snb_timedep_dune10kt_1x2x6%root%")
files = samweb.listFiles("file_name like snb_radio_dune10kt_1x2x6%root%")

#print(files)
#pprint.pprint(list(files))

locs = []
IFDH = ifdh.ifdh()

for i in range(len(files)):

    file = files[i]
    cloc = samweb.locateFile(file)
    print(cloc[0])
    loc = cloc[0]['full_path'] + "/" + file
    loc = loc[8:]
    loc = IFDH.locateFile(file)
    sLoc = loc[0].partition("(")
    sLoc = sLoc[0].partition(":")
    loc = sLoc[-1]
    loc = loc + "/" + file
    print(loc)
    locs.append(loc)
    print(locs[-1])
Exemplo n.º 8
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project, self._parent_project],
                                      [1, 0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 1

            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            out_file = '%s.json' % in_file
            self.info('Found %s' % (in_file))

            if in_file.strip().split('.')[-1] == "ubdaq":
                status, jsonData = self.get_ubdaq_metadata(
                    in_file, run, subrun)
            else:
                try:
                    jsonData = extractor_dict.getmetadata(in_file)
                    status = 3
                    self.info(
                        'Successfully extract metadata from the swizzled file.'
                    )
                except:
                    status = 100
                    self.error(
                        'Failed extracting metadata from the swizzled file.')

            if not status == 100:
                with open(out_file, 'w') as ofile:
                    json.dump(jsonData,
                              ofile,
                              sort_keys=True,
                              indent=4,
                              ensure_ascii=False)
                    # To Eric: what are you doing here?
                    try:
                        samweb = samweb_cli.SAMWebClient(experiment="uboone")
                        # samweb.validateFileMetadata(json_file) # this throws/raises exception
                        status = 2
                    except:
                        self.error("Problem with samweb metadata: ", jsonData)
                        self.error(sys.exc_info()[0])
                        status = 100

            else:
                status = 1000
                self.error('Did not find the input file %s' % in_file)

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
Exemplo n.º 9
0
    def declare_to_sam(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns))
        self._project_requirement[0] = 1

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs(self._project_list,
                                      self._project_requirement):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Declaring a file to SAM: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 1

            # Check input file exists. Otherwise report error
            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_json = '%s.json' % in_file

            self.info('Declaring ' + in_file + ' to SAM: using ' + in_json)

            if os.path.isfile(in_file) and os.path.isfile(in_json):
                self.info('Found %s' % (in_file))
                self.info('Found %s' % (in_json))
                json_dict = json.load(open(in_json))

                # native SAM python call, instead of a system call
                # make sure you've done get-cert
                # Perhaps we want a try block for samweb?
                samweb = samweb_cli.SAMWebClient(experiment="uboone")

                # Check if the file already exists at SAM
                try:
                    in_file_base = os.path.basename(in_file)
                    samweb.getMetadata(filenameorid=in_file_base)
                    status = 101
                    # Email the experts
                    subject = 'File %s Existing at SAM' % in_file_base
                    text = """
File %s has already exists at SAM!
                    """ % in_file_base

                    pub_smtp(os.environ['PUB_SMTP_ACCT'],
                             os.environ['PUB_SMTP_SRVR'],
                             os.environ['PUB_SMTP_PASS'], self._experts,
                             subject, text)

                except samweb_cli.exceptions.FileNotFound:
                    # metadata already validated in get_assembler_metadata_file.py
                    try:
                        samweb.declareFile(md=json_dict)
                        status = 2
                    except Exception as e:
                        #                        print "Unexpected error: samweb declareFile problem: "
                        self.error(
                            "Unexpected error: samweb declareFile problem: ")
                        self.error("%s" % e)
                        subject = "samweb declareFile problem: %s" % in_file_base
                        text = """
File %s failed to be declared to SAM!
%s
                        """ % (in_file_base, traceback.print_exc())

                        pub_smtp(os.environ['PUB_SMTP_ACCT'],
                                 os.environ['PUB_SMTP_SRVR'],
                                 os.environ['PUB_SMTP_PASS'], self._experts,
                                 subject, text)

                        # print "Give some null properties to this meta data"
                        self.error("Give this file a status 102")
                        status = 102

            else:
                status = 100

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
Exemplo n.º 10
0
    def validate_sam(self):
        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns) )
        self._project_requirement[0] = 2

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        #for x in [(391,10,0,0)]:
        for x in self.get_xtable_runs(self._project_list,
                                      self._project_requirement):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Checking the SAM declaration: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 12

            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_file_base = os.path.basename(in_file)
            samweb = samweb_cli.SAMWebClient(experiment="uboone")

            # Check if the file already exists at SAM
            try:
                samweb.getMetadata(filenameorid=in_file_base)
                status = 0
            except samweb_cli.exceptions.FileNotFound:
                status = 1

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
Exemplo n.º 11
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        # Below picks up successfully swizzled files
        for x in self.get_xtable_runs([self._project, self._parent_project],
                                      [1, 0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 100

            # Check input file exists. Otherwise report error
            in_file = '%s/%s' % (self._in_dir, self._infile_format %
                                 (run, subrun))

            print 'Looking for %s' % (in_file)
            if os.path.isfile(in_file):
                self.info('Found %s' % (in_file))

                # Check metadata
                has_metadata = False
                try:
                    samweb = samweb_cli.SAMWebClient(experiment="uboone")
                    md = samweb.getMetadata(filenameorid=in_file)
                    print 'Here 0' % md
                    self.info(
                        'Weirdly, metadata already registered in SAM for %s.  ... '
                        % (in_file))
                    has_metadata = True
                except:
                    pass

                # Should be that metadata is in the artroot file. (But not yet declared it to SAM.)
                # Thus, retrieve metadata from file; use it to declare file with SAM.
                if not has_metadata:
                    try:
                        self.info(' I feel a couple woos comin on, cus ')
                        md = extractor_dict.getmetadata(in_file)
                        self.info(
                            ' ... there it was.  ... (Just extracted the meta data from root file.) '
                        )
                        status = 3
                        try:
                            samweb = samweb_cli.SAMWebClient(
                                experiment="uboone")
                            samweb.declareFile(md=md)
                            status = 2
                            self.info(
                                'Successful extraction of artroot metadata and declaring it to SAM for %s.  ... '
                                % (in_file))
                        except:
                            self.info(
                                'Failed declaring metadata to SAM for %s.  ... '
                                % (in_file))
                    except:
                        self.info(
                            'Failed extracting artroot metadata for %s.  ... '
                            % (in_file))

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
Exemplo n.º 12
0
# This script will take a file that has been processed at ANL, modify its parent
# to be the swizzled file, add in the swizzler version to the metadata and then
# create a json file for declaring to SAM
# To run do python modify_metadata.py <your files>
# ------------------------------------------------------------------------------

# Turn this on to print the metadata dict
# 0 = not much printed
# 1 = print input files
# 2 = print the meta data dictonaries
debug = 1

# Start time of script
start = time.time()

samweb = samweb_cli.SAMWebClient(experiment='uboone')

for infile in sys.argv[1:]:
    if (debug == 1):
        print infile

    # Get the file metadata
    mdgen = extractor_dict.expMetaData('uboone', infile)
    md = mdgen.getmetadata()

    # Modify the parent file to be the swizzled file
    parent = md['parents']
    parent_name=parent[0]['file_name'].split("_event",1)[0]+".root"
    md['parents'] = parent_name

    # Now we want to get the swizzle version and add this to the metadata
Exemplo n.º 13
0
        int(s)
        return True
    except ValueError:
        return False

def _chunk(iterable, chunksize):
   """ Helper to divide an iterable into chunks of a given size """
   iterator = iter(iterable)
   from itertools import islice
   while True:
       l = list(islice(iterator, chunksize))
       if l: yield l
       else: return

try:
   samweb = samweb_cli.SAMWebClient(experiment="lariat")
except:
   raise Exception('Not able to open up the SAMWeb Connection')

query = "run_number = -1"

if is_intstring(sys.argv[1]):
    query = "run_number = %d" % (int(sys.argv[1]))

try:
   list_o_files=samweb.listFiles(query)
except:
   raise Exception('Unable to get list of files for this query:' + query)

for filenames in _chunk(list_o_files, 10):
   mdlist = samweb.getMultipleMetadata(filenames)
Exemplo n.º 14
0
    def declare_to_sam(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project,self._parent_project],
                                      [1,0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Declaring a file to SAM: run=%d, subrun=%d ...' % (run,subrun))

            status = 1
            
            # Check input file exists. Otherwise report error
            in_file_base = self._infile_format % ( run, subrun )
            in_file = '%s/%s' % ( self._in_dir, in_file_base )
            in_json = '%s/%s.json' %( self._meta_dir, in_file_base )
            
            if os.path.isfile(in_file) and os.path.isfile(in_json):
                self.info('Found %s' % (in_file))
                self.info('Found %s' % (in_json))
                json_dict = json.load( open( in_json ) )

                # native SAM python call, instead of a system call
                # make sure you've done get-cert
                # Perhaps we want a try block for samweb?
                samweb = samweb_cli.SAMWebClient(experiment="uboone")

                # Check if the file already exists at SAM
                try:
                    samweb.getMetadata(filenameorid=in_file_base)
                    status = 101
                    # Want to email the experts
                except samweb_cli.exceptions.FileNotFound:
                    # metadata already validated in get_assembler_metadata_file.py
                    try:
                        samweb.declareFile(md=json_dict)
                        status = 12
                    except:
                        print "Unexpected error: samweb declareFile problem: "
                        print traceback.print_exc()
                        # print "Give some null properties to this meta data"
                        print "Give this file a status 11"
                        status = 11

            else:
                status = 100

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
Exemplo n.º 15
0
def getDataGivenFileList(flist, r):
    #query SAM for each file in file list and gets run and subrun processed from meta data
    #puts that into dictionary rslist[run]=list_of_subruns
    con = sqlite3.connect("%s/run.db" % dbdir)
    con.row_factory = sqlite3.Row
    cur = con.cursor()
    cur.execute("ATTACH DATABASE '%s/bnb_v%i.db' AS bnb" % (dbdir, version))
    cur.execute("ATTACH DATABASE '%s/numi_v%i.db' AS numi" % (dbdir, version))

    cfgDB = confDB.confDB()

    bnbwarn = False
    numiwarn = False
    otherwarn = False
    prescalewarn = False
    samweb = samweb_cli.SAMWebClient(experiment='uboone')
    try:
        meta = samweb.getMetadataIterator(flist)
    except Exception as e:
        print "Failed to get metadata from SAM."
        print "Make sure to setup sam_web_client v2_1 or higher."
        print e
        sys.exit(0)

    missbnb = {}
    missnumi = {}
    missother = {}
    missprescale = {}

    mcount = 0
    for m in meta:
        mcount += 1
        for rs in m['runs']:
            pf = None
            if prescaleFactor:
                pf = cfgDB.getAllPrescaleFactors(int(rs[0]))
            if pf is not None:
                for pfkey in pf:
                    if pfkey not in r:
                        r[pfkey] = 0
            query = "%s WHERE r.run=%i AND r.subrun=%i" % (
                dbquerybase, int(rs[0]), int(rs[1]))
            cur.execute(query)
            row = cur.fetchone()
            for k in r:
                if k in row.keys() and row[k] is not None:
                    if pf is not None:
                        for pfkey in pf:
                            if "EXT" in k and "EXT_" in pfkey:
                                r[pfkey] += pf[pfkey] * row[k]
                            elif "Gate1" in k and "NUMI_" in pfkey:
                                r[pfkey] += pf[pfkey] * row[k]
                            elif "Gate2" in k and "BNB_" in pfkey:
                                r[pfkey] += pf[pfkey] * row[k]
                    elif prescaleFactor:
                        if rs[0] not in missprescale:
                            missprescale[rs[0]] = [rs[1]]
                        elif rs[1] not in missprescale[rs[0]]:
                            missprescale[rs[0]].append(rs[1])
                        prescalewarn = True

                    r[k] += row[k]
                elif k in bnbcols:
                    if rs[0] not in missbnb:
                        missbnb[rs[0]] = [rs[1]]
                    elif rs[1] not in missbnb[rs[0]]:
                        missbnb[rs[0]].append(rs[1])
                    bnbwarn = True
                elif k in numicols:
                    if rs[0] not in missnumi:
                        missnumi[rs[0]] = [rs[1]]
                    elif rs[1] not in missnumi[rs[0]]:
                        missnumi[rs[0]].append(rs[1])
                    numiwarn = True
                elif k is "EXT":
                    if rs[0] not in missother:
                        missother[rs[0]] = [rs[1]]
                    elif rs[1] not in missother[rs[0]]:
                        missother[rs[0]].append(rs[1])
                    otherwarn = True
    r['bnbwarn'] = bnbwarn
    r['numiwarn'] = numiwarn
    r['otherwarn'] = otherwarn
    r['prescalewarn'] = prescalewarn
    r['missbnb'] = missbnb
    r['missnumi'] = missnumi
    r['missother'] = missother
    r['missprescale'] = missprescale
    if mcount != len(flist):
        print "Warning! Did not get metadata for all files. Looped through %i files, but only got metadata for %i. Check list for repeats or bad file names." % (
            len(flist), mcount)
        logging.debug("Warning! Did not get metadata for all files.")

    con.close()
    return
Exemplo n.º 16
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project,self._parent_project],
                                      [1,0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' % (run,subrun))

            status = 1
            
            # Check input file exists. Otherwise report error
            in_file = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            out_file = '%s/%s' % (self._out_dir,self._outfile_format % (run,subrun))


#
# Looks fine now, but if there are new troubles: run this project with NRUNS=1
#
            if os.path.isfile(in_file):
                self.info('Found %s' % (in_file))
#                shutil.copyfile(in_file,out_file)

                if in_file.strip().split('.')[-1] == "ubdaq":
                    status, jsonData = self.get_ubdaq_metadata( in_file, run, subrun )

                else:
                    try:
                        jsonData = extractor_dict.getmetadata( in_file )
                        status = 3
                        self.info('Successfully extract metadata from the swizzled file.')
                    except:
                        status = 100
                        self.error('Failed extracting metadata from the swizzled file.')

                if not status == 100:
                    with open(out_file, 'w') as ofile:
                        json.dump(jsonData, ofile, sort_keys = True, indent = 4, ensure_ascii=False)
                        # To Eric: what are you doing here?
                        try:
                            samweb = samweb_cli.SAMWebClient(experiment="uboone")
                            # samweb.validateFileMetadata(json_file) # this throws/raises exception
                            status = 2
                        except:
                            print "Problem with samweb metadata: ", jsonData
                            print sys.exc_info()[0]
                            status=100
 

            else:
                status = 1000
                self.error('Did not find the input file %s' % in_file )

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
Exemplo n.º 17
0
def getmetadata(inputfile):
    # Set up the experiment name for samweb Python API
    samweb = samweb_cli.SAMWebClient(
        experiment=project_utilities.get_experiment())

    # Extract metadata into a pipe.
    local = project_utilities.path_to_local(inputfile)
    if local != '':
        proc = subprocess.Popen(["sam_metadata_dumper", "-H", local],
                                stdout=subprocess.PIPE)
    else:
        url = project_utilities.path_to_url(inputfile)
        proc = subprocess.Popen(["sam_metadata_dumper", "-H", url],
                                stdout=subprocess.PIPE)
    lines = proc.stdout.readlines()
    if local != '' and local != inputfile:
        os.remove(local)

    # Count the number of lines in the file (for later use!)
    num_lines = len(lines)

    # define an empty python dictionary
    md = {}

    # Read tbe columns from the file and fill the dictionary
    c = 0
    p = 0
    parents = []
    PName = False
    gen = False
    for line in lines:
        c = c + 1
        columns = line.split(" ")
        columns = [col.strip() for col in columns]
        if c >= 4 and c <= num_lines - 2:
            if columns[1] == 'dataTier':
                md['data_tier'] = columns[-1]
                if columns[-1] == 'generated':
                    gen = True
            elif columns[1] == 'endTime':
                E = time.localtime(int(columns[-1]))
                md['end_time'] = str(E[0]) + '-' + str(E[1]) + '-' + str(
                    E[2]) + 'T' + str(E[3]) + ':' + str(E[4]) + ':' + str(E[5])
            elif columns[1] == 'startTime':
                S = time.localtime(int(columns[-1]))
                md['start_time'] = str(S[0]) + '-' + str(S[1]) + '-' + str(
                    S[2]) + 'T' + str(S[3]) + ':' + str(S[4]) + ':' + str(S[5])
            elif columns[1] == 'group':
                md['group'] = columns[-1]
            elif columns[1] == 'eventCount':
                md['event_count'] = columns[-1]
            elif columns[1] == 'fclName':
                md['fcl.name'] = columns[-1]
            elif columns[1] == 'fclVersion':
                md['fcl.version'] = columns[-1]
            elif columns[1] == 'fileFormat':
                md['file_format'] = columns[-1]
            elif columns[1] == 'ubProjectStage':
                md['ub_project.stage'] = columns[-1]
            elif columns[1] == 'ubProjectVersion':
                md['ub_project.version'] = columns[-1]
            elif columns[1] == 'lastEvent':
                md['last_event'] = columns[-1]
            elif columns[1] == 'firstEvent':
                md['first_event'] = columns[-1]
            elif columns[1] == 'fileType':
                md['file_type'] = columns[-1]
            elif columns[1] == 'group':
                md['group'] = columns[-1]
            elif columns[1] == 'group':
                md['group'] = columns[-1]
            elif columns[1] == 'run':
                run = columns[-1]
            elif columns[1] == 'runType':
                run_type = columns[-1]
            elif columns[1] == 'applicationFamily':
                app_family = columns[-1]
            elif columns[1] == 'applicationVersion':
                app_version = columns[-1]
            elif columns[1] == 'process_name':
                app_name = columns[-1]
            elif columns[1] == 'ubProjectName':
                PName = True
                md['ub_project.name'] = columns[-1]
            elif columns[1] == 'parent':
                parents.append({'file_name': columns[-1]})

    # Get the other meta data field parameters
    md['file_name'] = inputfile.split("/")[-1]
    md['file_size'] = os.path.getsize(inputfile)
    # For now, skip the checksum for dCache files.
    md['crc'] = root_metadata.fileEnstoreChecksum(inputfile)
    md['runs'] = [[run, run_type]]
    md['application'] = {
        'family': app_family,
        'name': app_name,
        'version': app_version
    }
    md['parents'] = parents

    # If ub_project.name is not in the internal metadata,
    # for generator files, get the ub_project.name from the fcl_filename (without the '.fcl' part) for gen files.
    # for all other stages, get this from the parents
    if gen == True:
        md['parents'] = []
        if PName == False:
            md['ub_project.name'] = md['fcl.name'].split(".fcl")[0]
    else:
        if PName == False:
            if 'parents' in md:
                parent = md['parents'][0]['file_name']
                mdparent = samweb.getMetadata(parent)
                if 'ub_project.name' in mdparent:
                    md['ub_project.name'] = mdparent['ub_project.name']

    return md
Exemplo n.º 18
0
    def pnnl_transfer( self, file_arg ):

        # sshftp-to-sshftp not enabled on near1, so must use gsiftp: must thus ship from the Sam'd-up dcache file.
        # uboonepro from near1 has an ssh-key to sshftp to dtn2.pnl.gov as chur558.
        # This requires that uboonepro owns a valid proxy. We might get 2 Gbps throughput with this scenario.
        # Need more logging to message service ... also let's put all this into a function that we call.


        cmd = "voms-proxy-info -all "
        proc = sub.Popen(cmd,shell=True,stderr=sub.PIPE,stdout=sub.PIPE)
        (out,err) = proc.communicate()
        goodProxy = False

        for line in out.split('\n'):
            if "timeleft" in line:
                if int(line.split(" : ")[1].replace(":","")) > 0:
                    goodProxy = True
                    break;

        if not goodProxy:
            self.error('uboonepro has no proxy.')
            raise Exception 
   

        in_file = os.path.basename(file_arg)
    # We do a samweb.fileLocate on basename of in_file. This project's parent must be check_root_on_tape.
        transfer = 0
        samweb = samweb_cli.SAMWebClient(experiment="uboone")
        loc = samweb.locateFile(filenameorid=in_file)
        size_in = samweb.getMetadata(filenameorid=in_file)['file_size']
        samcode = 12


        if not ('enstore' in loc[0]["full_path"] and 'pnfs' in loc[0]["full_path"]):
            self.error('No enstore or pnfs in loc[0]["full_path"]')
            return (transfer, samcode)


        full_file = loc[0]["full_path"].replace('enstore:/pnfs/uboone','') + "/" +  in_file

        pnnl_machine = "dtn2.pnl.gov"
        pnnl_dir = 'pic/projects/microboone/data/'
        ddir = str(samweb.getMetadata(filenameorid=in_file)['runs'][0][0])
        cmd_mkdir = "ssh chur558@" + pnnl_machine + " mkdir -p "  + "/" + pnnl_dir + ddir
        proc = sub.Popen(cmd_mkdir,shell=True,stderr=sub.PIPE,stdout=sub.PIPE)
        # block, but plow on w.o. regard to whether I was successful to create ddir. (Cuz this will complain if run is not new.) 
        (out,err) = proc.communicate() 
        
        pnnl_loc = pnnl_machine + "/" + pnnl_dir + ddir + "/" + in_file
        cmd_gsiftp_to_sshftp = "globus-url-copy -rst -vb -p 10 gsiftp://fndca1.fnal.gov:2811" + full_file + " sshftp://chur558@" + pnnl_loc

        # Popen() gymnastics here
        ntry = 0
        delay = 5
        ntry_max = 1 # more than 1 is not demonstrably helping. In fact, it creates lotsa orphaned ssh's. EC, 8-Aug-2015.
        ndelays = 20
        while (ntry != ntry_max):
            self.info('Will launch ' + cmd_gsiftp_to_sshftp)
            info_str = "pnnl_transfer trying " + str(ntry+1) + " (of " + str(ntry_max) + ") times."
            self.info (info_str)
            proc = sub.Popen(cmd_gsiftp_to_sshftp,shell=True,stderr=sub.PIPE,stdout=sub.PIPE)
            wait = 0
            transfer = 0
            while  proc.poll() is None:
                wait+=delay
                if wait > delay*ndelays:
                    self.error ("pnnl_transfer timed out in awaiting transfer.")
                    proc.kill()
                    transfer = 11
                    break
                self.info('pnnl_transfer process ' + str(proc.pid) + ' active for ' + str(wait) + ' [sec]')
                time.sleep (delay)
            if (transfer != 11):
                break

            # This rm usually fails for reasons I don't understand. If it succeeded the retry would work.
            # Commenting it out, since we're setting ntry_max=1 now anyway, EC 8-Aug-2015.
            # rm the (usually) 0 length file.
            #cmd_rm = "ssh chur558@" + pnnl_machine + " rm -f "  + "/" + pnnl_dir + ddir + "/" + in_file
            #proc = sub.Popen(cmd_rm,shell=True,stderr=sub.PIPE,stdout=sub.PIPE)
            #self.info("pnnl_transfer process: Attempting to remove " + pnnl_dir + ddir + "/" + in_file)
            #time.sleep(5) # give it 5 seconds, then kill it if not done.
            #if proc.poll() is None:
            #    proc.kill()
            #    self.error("pnnl_transfer process: " + cmd_rm + " failed.")

            ntry+=1
            ndelays+=10 # extend time period to force timeout for next effort.




        size_out = 0
        if not transfer:
            (out,err) = proc.communicate()
            transfer = proc.returncode
                # also grep the out for indication of success at end.

            if not transfer:
#                self.info('out is ' + out)
                li = out.split(" ")
                mind = max(ind for ind, val in enumerate(li) if val == 'bytes') - 1
                size_out = int(li[mind])
                transfer = 10

                if size_out == size_in:
                    transfer = 0


# end file lives in enstore


        if not transfer:
            try:
                # Then samweb.addFileLocation() to pnnl location, with resj capturing that return status.
                pnnl_loc_withcolon = pnnl_machine + ":/" + pnnl_dir + ddir + "/" + in_file
                samadd = samweb.addFileLocation(filenameorid=in_file,location=pnnl_loc_withcolon)
                samloc  = samweb.locateFile(filenameorid=in_file)
                if len(samloc)>0:
                    samcode = 0
                    self.info('pnnl_transfer() finished moving ' + in_file + ', size ' + str(size_in) + ' [bytes], to PNNL')
                    self.info('Transfer rate was ' + str(out.split("\n")[4].split("    ")[8]))
                    self.info('Transfer and samaddFile are successful. Full SAM location for file is now ' + str(samloc))
            except:
                self.error('pnnl_transfer finished with a problem on ' + in_file + ' during addFile or locFile. samadd/samloc is: ' + str(samadd)+"/"+str(samloc))
        else:
            self.error('pnnl_transfer finished with a problem on ' + in_file)
                
        return (transfer, samcode)
Exemplo n.º 19
0
    def compare_dropbox_checksum(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project, self._parent_project],
                                      [1, 0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' %
                      (run, subrun))

            statusCode = 1
            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_file_name = os.path.basename(in_file)
            out_file = '%s/%s' % (self._out_dir, in_file_name)

            #Note that this has the sequence number hard coded as number 0
            RefStatus = self._api.get_status(
                ds_status(self._ref_project, run, subrun, 0))
            near1_checksum = RefStatus._data

            try:
                pnfs_adler32_1, pnfs_size = get_pnfs_1_adler32_and_size(
                    out_file)
                near1_adler32_1 = convert_0_adler32_to_1_adler32(
                    near1_checksum, pnfs_size)

                if near1_adler32_1 == pnfs_adler32_1:
                    statusCode = 0
                else:
                    subject = 'Checksum different in run %d, subrun %d between %s and PNFS' % (
                        run, subrun, self._ref_project)
                    text = '%s\n' % subject
                    text += 'Run %d, subrun %d\n' % (run, subrun)
                    text += 'Converted %s checksum: %s\n' % (self._ref_project,
                                                             near1_adler32_1)
                    text += 'Converted PNFS checksum: %s\n' % (pnfs_adler32_1)

                    pub_smtp(os.environ['PUB_SMTP_ACCT'],
                             os.environ['PUB_SMTP_SRVR'],
                             os.environ['PUB_SMTP_PASS'], self._experts,
                             subject, text)
                    statusCode = 1000
                    self._data = '%s:%s;PNFS:%s' % (
                        self._ref_project, near1_adler32_1, pnfs_adler32_1)

            except LookupError:

                self.warning("Could not find file in the dropbox %s" %
                             out_file)
                self.warning("Gonna go looking on tape %s" % in_file_name)
                samweb = samweb_cli.SAMWebClient(experiment="uboone")
                meta = {}

                try:
                    meta = samweb.getMetadata(filenameorid=in_file_name)
                    checksum_info = meta['checksum'][0].split(':')
                    if checksum_info[0] == 'enstore':
                        self._data = checksum_info[1]
                        statusCode = 0
                    else:
                        statusCode = 10

                except samweb_cli.exceptions.FileNotFound:
                    subject = 'Failed to locate file %s at SAM' % in_file
                    text = 'File %s is not found at SAM!' % in_file
                    pub_smtp(os.environ['PUB_SMTP_ACCT'],
                             os.environ['PUB_SMTP_SRVR'],
                             os.environ['PUB_SMTP_PASS'], self._experts,
                             subject, text)
                    statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=statusCode,
                               data=self._data)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break