示例#1
0
    def check_db( self ):
        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs( self._project, 2 ):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' % (run,subrun))

            statusCode = 2
            in_file_name = self._infile_format % ( run, subrun )
            in_file = '%s/%s' % ( self._in_dir, in_file_name )

            # Get status object
            status = self._api.get_status(ds_status(self._project,
                                                    x[0],x[1],x[2]))

            self._data = status._data
            self._data = str( self._data )

            if self._data:
               statusCode = 0
            else:
                subject = 'Checksum of the file %s not in database' % in_file
                text = """File: %s
Checksum is not in database
                """ % ( in_file )

                pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

                statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = statusCode,
                                data    = self._data )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#2
0
文件: production.py 项目: LArbys/pubs
    def process(self):

        ctr = self._nruns
        #return
        # Kazu's version of submit jobs
        stage_v = list(self._stage_digits)
        stage_v.reverse()
        status_v = list(self.PROD_STATUS)
        status_v.reverse()
        # temporary fix: record the processed run and only process once per process function call
        processed_run = []
        for istage in stage_v:
            # self.warning('Inspecting stage %s @ %s' % (istage,self.now_str()))
            for istatus in status_v:
                fstatus = istage + istatus
                self.debug('Inspecting status %s @ %s' %
                           (fstatus, self.now_str()))
                for x in self.get_runs(self._project, fstatus):

                    run = int(x[0])
                    subrun = int(x[1])
                    runid = (run, subrun)
                    if self._max_runid and runid > self._max_runid:
                        continue
                    if runid in processed_run: continue
                    processed_run.append(runid)

                    self.info('Found run/subrun: %s/%s ... inspecting @ %s' %
                              (run, subrun, self.now_str()))

                    statusCode = self.__decode_status__(fstatus)
                    action = self.PROD_ACTION[statusCode]

                    # Get status object
                    status = self._api.get_status(
                        ds_status(self._project, x[0], x[1], x[2]))
                    self._data = status._data
                    statusCode = action(statusCode, istage, run, subrun)

                    self.info('Finished executing an action: %s @ %s' %
                              (action.__name__, self.now_str()))

                    # Create a status object to be logged to DB (if necessary)
                    status = ds_status(project=self._project,
                                       run=run,
                                       subrun=subrun,
                                       seq=0,
                                       status=statusCode,
                                       data=self._data)

                    # Log status
                    self.log_status(status)

                    # Counter decreases by 1
                    ctr -= 1
                    # Break from loop if counter became 0
                    if ctr < 0: return
        return
示例#3
0
    def process( self ):

        ctr = self._nruns
        #return
        # Kazu's version of submit jobs
        stage_v  = list(self._stage_digits)
        stage_v.reverse()
        status_v = list(self.PROD_STATUS)
        status_v.reverse()
        # temporary fix: record the processed run and only process once per process function call
        processed_run=[]
        for istage in stage_v:
            # self.warning('Inspecting stage %s @ %s' % (istage,self.now_str()))
            for istatus in status_v:
                fstatus = istage + istatus
                self.debug('Inspecting status %s @ %s' % (fstatus,self.now_str()))
                for x in self.get_runs( self._project, fstatus ):
                    
                    run    = int(x[0])
                    subrun = int(x[1])
                    runid = (run,subrun)
                    if self._max_runid and runid > self._max_runid:
                        continue
                    if runid in processed_run: continue
                    processed_run.append(runid)

                    self.info('Found run/subrun: %s/%s ... inspecting @ %s' % (run,subrun,self.now_str()))

                    statusCode = self.__decode_status__( fstatus )
                    action = self.PROD_ACTION[statusCode]

                    # Get status object
                    status = self._api.get_status(ds_status(self._project,
                                                            x[0],x[1],x[2]))
                    self._data = status._data
                    statusCode = action( statusCode, istage, run, subrun )

                    self.info('Finished executing an action: %s @ %s' % (action.__name__,self.now_str()))

                    # Create a status object to be logged to DB (if necessary)
                    status = ds_status( project = self._project,
                                        run     = run,
                                        subrun  = subrun,
                                        seq     = 0,
                                        status  = statusCode,
                                        data    = self._data )

                    # Log status
                    self.log_status( status )

                    # Counter decreases by 1
                    ctr -=1
                    # Break from loop if counter became 0
                    if ctr < 0: return
        return
示例#4
0
文件: check_tape.py 项目: LArbys/pubs
    def find_checksum( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs( [self._project, self._parent_project], [2, 0] ):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Checking tape: run=%d, subrun=%d ...' % (run,subrun))

            statusCode = 2

            in_file = self._infile_format % ( run, subrun )
            samweb = samweb_cli.SAMWebClient(experiment="uboone")

            meta = {}
            try:
                meta = samweb.getMetadata(filenameorid=in_file)
                checksum_info = meta['checksum'][0].split(':')
                if checksum_info[0] == 'enstore':
                    self._data = checksum_info[1]
                    statusCode = 0

                else:
                    statusCode = 1

            except samweb_cli.exceptions.FileNotFound:
                subject = 'Failed to locate file %s at SAM' % in_file
                text = 'File %s is not found at SAM!' % in_file

                pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

                statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = statusCode,
                                data    = self._data )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#5
0
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project,2):

            # Counter decreases by 1
            ctr -=1
            (run, subrun) = (int(x[0]), int(x[1]))

            status = 1
            in_file = '%s/%s' % (self._in_dir,self._name_pattern % (run,subrun))
            out_file = '%s/%s' % (self._out_dir,self._name_pattern % (run,subrun))
            # Report starting
            self.info('Checking %s transfer' % (out_file))
            
            fmd5=[]
            for fin in in_file, out_file:                
                if ":" in fin:
                    if not os.system('ssh -x %s "test -f %s"'%(tuple(fin.split(":")))):
                        fmd5.append(os.popen('ssh -x %s "md5sum -b %s"'%(tuple(fin.split(":")))).read())
                    else:
                        self.error('File %s does not exist'%fin)
                        status = 100
                else:
                    if os.path.isfile(fin):
                        fmd5.append(os.popen('md5sum -b %s'%(fin)).read())
                    else:
                        self.error('File %s does not exist'%fin)
                        status = 100

            if status == 1:
                if (fmd5[0].split()[0]==fmd5[1].split()[0]):
                    self.info('Cheksum ok! %s %s'%tuple(fmd5[1].split()))
                    status = 0
                else:
                    self.error('Failed md5sum!')
                    status = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = int(x[2]),
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#6
0
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:

            resource = self._api.get_resource(self._project)

            self._nruns    = int(resource['NRUNS'])
            self._beamdir  = resource['BEAMDIR']
            self._beamfile = resource['BEAMFILE']
            self._infodir  = resource['INFODIR']
            self._infofile = resource['INFOFILE']

        ctr = self._nruns
        for x in self.get_runs(self._project,2):

            # Counter decreases by 1
            ctr -=1

            run    = int(x[0])
            subrun = int(x[1])
            status = 0
            
            self.info('Parse into log file and check number events')
            logfname='%s/%s'%(self._logdir,self._logfile%(run,subrun))
            if not os.path.isfile(logfname):
                # change status
                self.error('% not created'%logfname)
                continue
            if os.stat(logfname).st_size == 0:
                # change status
                self.error('%s is empty'%logfname)
                continue
            log_file = open(logfname)
            last_line = log_file.readlines()[-1].split()
            log_num_evts = int(last_line[0])
            if not log_num_evts > 0:
                self.error('No events written')
                continue

            
            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#7
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:

            resource = self._api.get_resource(self._project)

            self._nruns    = int(resource['NRUNS'])
            self._infodir  = resource['INFODIR']
            self._infofile = resource['INFOFILE']
            self._jsondir  = resource['JSONDIR']
            self._jsonfile = resource['JSONFILE']

        ctr = self._nruns
        for x in self.get_runs(self._project,1):

            # Counter decreases by 1
            ctr -=1

            run    = int(x[0])
            subrun = int(x[1])

            jsonfname='%s/%s'%(self._jsondir,self._jsonfile%(run,subrun))
            if (not os.path.isfile(jsonfname)):
                self.info('Waiting for json file %s'%jsonfname)
                continue
            json_file=open(jsonfname)
            json_data=json.load(json_file)

            tbegin=datetime.datetime.strptime(json_data["stime"], "%a %b %d %H:%M:%S %Z %Y")
            tend=datetime.datetime.strptime(json_data["etime"], "%a %b %d %H:%M:%S %Z %Y")
            json_file.close()

            # Report starting
            self.info('Getting beam data: run=%d, subrun=%d' % (run,subrun))
            self.info('  t0=%s, t1=%s' % (tbegin,tend))

            cmd='bdaq_get --run-number %i --subrun-number --%i --begin-time %i %i --end-time %i %i'%(run,subrun,int(tbegin.strftime("%s")),0,int(tend.strftime("%s"))+1,0)
            self.info('Run cmd: %s'%cmd)
            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = 2 )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#8
0
文件: dummy_daq.py 项目: LArbys/pubs
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:

            resource = self._api.get_resource(self._project)

            self._nruns = int(resource['NRUNS'])
            self._out_dir = '%s/%s' % (os.environ['PUB_TOP_DIR'],
                                       resource['OUTDIR'])
            self._outfile_format = resource['OUTFILE_FORMAT']

        ctr = self._nruns
        for x in self.get_runs(self._project, 2):

            # Counter decreases by 1
            ctr -= 1

            run = int(x[0])
            subrun = int(x[1])
            status = 0
            if os.path.isfile('%s/%s' % (self._out_dir, self._outfile_format %
                                         (run, subrun))):

                self.info('validated run: run=%d, subrun=%d ...' %
                          (run, subrun))

            else:

                self.error('error on run: run=%d, subrun=%d ...' %
                           (run, subrun))

                status = 1

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#9
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project, self._parent_project],
                                      [1,0]):
#        for x in self.get_runs(self._project,1):

            (run, subrun) = (int(x[0]), int(x[1]))
            status = 1
            # Counter decreases by 1
            ctr -=1

            # Generate input, output file names
            in_file = '%s/%s' % (self._in_dir,self._name_pattern % (run,subrun))
            out_file = '%s/%s' % (self._out_dir,self._name_pattern % (run,subrun))

            dt=time.strftime("%Y%m%d-%H%M%S")
            cmd=["rsync","-e \"ssh -x\"","-bptgo","--suffix=_%s"%dt,in_file,out_file]
            if self._bwlimit>0:
                cmd+=["--bwlimit=%i"%self._bwlimit]
            p=subprocess.Popen(' '.join(cmd), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            self.debug('Executing %s'%' '.join(cmd))
            (out,err)=p.communicate(None)
            if not p.returncode:
                status=2
                self.info('Transfered %s to %s'%(in_file,out_file))                
            else:
                status=100
                self.error("Failed to transfer %s to %s"%(in_file,out_file))
                for line in err.split("\n"):
                    self.error(line)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#10
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project, self._parent_project],
                                      [1, 0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 1

            # Check input file exists. Otherwise report error
            in_file = '%s/%s' % (self._in_dir, self._infile_format %
                                 (run, subrun))
            out_file = '%s/%s' % (self._out_dir, self._outfile_format %
                                  (run, subrun))
            if os.path.isfile(in_file):
                shutil.copyfile(in_file, out_file)
                status = 2
            else:
                status = 100

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#11
0
    def validate_sam( self ):
        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project,self._parent_project],
                                      [12,0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Checking the SAM declaration: run=%d, subrun=%d ...' % (run,subrun))

            status = 12

            in_file_base = self._infile_format % ( run, subrun )
            samweb = samweb_cli.SAMWebClient(experiment="uboone")

            # Check if the file already exists at SAM
            try:
                samweb.getMetadata(filenameorid=in_file_base)
                status = 10
            except samweb_cli.exceptions.FileNotFound:
                status = 1

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#12
0
文件: dummy_daq.py 项目: LArbys/pubs
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:

            resource = self._api.get_resource(self._project)

            self._nruns = int(resource['NRUNS'])
            self._out_dir = '%s/%s' % (os.environ['PUB_TOP_DIR'],
                                       resource['OUTDIR'])
            self._outfile_format = resource['OUTFILE_FORMAT']

        ctr = self._nruns
        for x in self.get_runs(self._project, 1):

            # Counter decreases by 1
            ctr -= 1

            run = int(x[0])
            subrun = int(x[1])

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' %
                      (run, subrun))

            f = open(
                '%s/%s' % (self._out_dir, self._outfile_format %
                           (run, subrun)), 'w')
            f.write('Dummy data for run %d, subrun %d' % (run, subrun))
            f.close()

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=2)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#13
0
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 2):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('validating run: run=%d, subrun=%d ...' % (run, subrun))

            status = 1
            in_file = '%s/%s' % (self._in_dir, self._infile_format %
                                 (run, subrun))
            out_file = '%s/%s.json' % (self._out_dir, self._infile_format %
                                       (run, subrun))

            if os.path.isfile(out_file):
                #                os.system('rm %s' % in_file)
                status = 0
            else:
                status = 100

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=int(x[2]),
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#14
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project,self._parent_project],
                                      [1,0]):

            # Counter decreases by 1
            ctr -=1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' % (run,subrun))

            status = 1
            
            # Check input file exists. Otherwise report error
            in_file = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            out_file = '%s/%s' % (self._out_dir,self._outfile_format % (run,subrun))
            if os.path.isfile(in_file):
                shutil.copyfile(in_file,out_file)
                status = 2
            else:
                status = 100

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#15
0
    def validate_dropbox( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 32):

            # Counter decreases by 1
            ctr -=1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Validating a file in dropbox: run=%d, subrun=%d ...' % (run,subrun))

            status = 32
            in_file = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            out_file = '%s/%s' % (self._out_dir,self._outfile_format % (run,subrun))

            res = subprocess.call(['ssh', 'uboonegpvm06', '-x', 'ls', out_file])
            if res:
                # didn't find the file
                status = 10
            else:
                status = 0

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = int(x[2]),
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#16
0
    def error_handle(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 100):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            status = 1
            out_file = '%s/%s' % (self._out_dir, self._name_pattern %
                                  (run, subrun))
            self.info('Removing failed transfer %s' % (out_file))

            if ":" in out_file:
                #check that out_file is a file before trying to remove
                #(hopefully should avoid unintentional rm with bad out_dir/name_pattern combo)
                if not os.system('ssh -x %s "test -f %s"' %
                                 (tuple(out_file.split(":")))):
                    os.system('ssh -x %s "rm %s"' % tuple(out_file.split(":")))
            else:
                if os.path.isfile(out_file):
                    os.system('rm %s' % out_file)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#17
0
文件: ds_clean.py 项目: LArbys/pubs
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 2, False):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            in_file = '%s/%s' % (self._in_dir, self._name_pattern %
                                 (run, subrun))
            self.info('Check if file %s was deleted.' % in_file)

            tmp_status = 0
            if ":" in in_file:
                if not os.system('ssh -x %s "ls %s"' %
                                 (tuple(in_file.split(":")))):
                    tmp_status = 100
            else:
                if (len(glob.glob(in_file)) > 0):
                    tmp_status = 100

            if (tmp_status == 100):
                self.error('Failed to remove %s' % in_file)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=int(x[2]),
                               status=tmp_status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#18
0
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project,2):

            # Counter decreases by 1
            ctr -=1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('validating run: run=%d, subrun=%d ...' % (run,subrun))

            status = 1
            in_file = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            out_file = '%s/%s.json' % (self._out_dir,self._infile_format % (run,subrun))

            if os.path.isfile(out_file):
#                os.system('rm %s' % in_file)
                status = 0
            else:
                status = 100

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = int(x[2]),
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#19
0
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project,2,False):

            # Counter decreases by 1
            ctr -=1

            (run, subrun) = (int(x[0]), int(x[1]))

            in_file = '%s/%s' % (self._in_dir,self._name_pattern % (run,subrun))
            self.info('Check if file %s was deleted.' % in_file)

            tmp_status=0
            if ":" in in_file:
                if not os.system('ssh -x %s "ls %s"'%(tuple(in_file.split(":")))):
                    tmp_status=100
            else:
                if (len(glob.glob(in_file))>0):
                    tmp_status=100

            if (tmp_status==100):
                self.error('Failed to remove %s'%in_file)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = int(x[2]),
                                status  = tmp_status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#20
0
    def error_handle(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project,100):

            # Counter decreases by 1
            ctr -=1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            status = 1
            out_file = '%s/%s' % (self._out_dir,self._name_pattern % (run,subrun))
            self.info('Removing failed transfer %s' %(out_file))

            if ":" in out_file:
                #check that out_file is a file before trying to remove 
                #(hopefully should avoid unintentional rm with bad out_dir/name_pattern combo)
                if not os.system('ssh -x %s "test -f %s"'%(tuple(out_file.split(":")))):
                    os.system('ssh -x %s "rm %s"' % tuple(out_file.split(":")))
            else:
                if os.path.isfile(out_file):
                    os.system('rm %s' % out_file)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#21
0
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:

            resource = self._api.get_resource(self._project)

            self._nruns = int(resource['NRUNS'])
            self._infodir  = resource['INFODIR']
            self._infofile = resource['INFOFILE']

        ctr = self._nruns
        for x in self.get_runs(self._project,2):

            # Counter decreases by 1
            ctr -=1

            run    = int(x[0])
            subrun = int(x[1])
            status = 0
            
            fname='%s/%s'%(self._infodir,self._infofile%(run,subrun))
            
            self.info('Parse info file %s and check created files'%fname)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#22
0
文件: ds_beamdaq.py 项目: LArbys/pubs
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:

            resource = self._api.get_resource(self._project)

            self._nruns = int(resource['NRUNS'])
            self._infodir = resource['INFODIR']
            self._infofile = resource['INFOFILE']

        ctr = self._nruns
        for x in self.get_runs(self._project, 2):

            # Counter decreases by 1
            ctr -= 1

            run = int(x[0])
            subrun = int(x[1])
            status = 0

            fname = '%s/%s' % (self._infodir, self._infofile % (run, subrun))

            self.info('Parse info file %s and check created files' % fname)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#23
0
    def error_handle(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 100):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('cleaning failed run: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 1

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
    def error_handle(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project,100):

            # Counter decreases by 1
            ctr -=1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('cleaning failed run: run=%d, subrun=%d ...' % (run,subrun))

            status = 1

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#25
0
文件: ds_clean.py 项目: LArbys/pubs
    def error_handle(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 100, False):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            in_file = '%s/%s' % (self._in_dir, self._name_pattern %
                                 (run, subrun))
            self.info('Will try removing %s again later.' % (in_file))

            tmp_status = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=tmp_status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#26
0
    def error_handle(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project,100,False):

            # Counter decreases by 1
            ctr -=1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            in_file = '%s/%s' % (self._in_dir,self._name_pattern % (run,subrun))
            self.info('Will try removing %s again later.' % (in_file))

            tmp_status = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = tmp_status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#27
0
    def check_db(self):
        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 2):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' %
                      (run, subrun))

            statusCode = 2
            in_file_name = self._infile_format % (run, subrun)
            in_file = '%s/%s' % (self._in_dir, in_file_name)

            # Get status object
            status = self._api.get_status(
                ds_status(self._project, x[0], x[1], x[2]))

            self._data = status._data
            self._data = str(self._data)

            if self._data:
                statusCode = 0
            else:
                subject = 'Checksum of the file %s not in database' % in_file
                text = """File: %s
Checksum is not in database
                """ % (in_file)

                pub_smtp(os.environ['PUB_SMTP_ACCT'],
                         os.environ['PUB_SMTP_SRVR'],
                         os.environ['PUB_SMTP_PASS'], self._experts, subject,
                         text)

                statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=statusCode,
                               data=self._data)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#28
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Check available space
        if ":" in self._in_dir:
            disk_frac_used=int(os.popen('ssh -x %s "df %s" | tail -n1'%tuple(self._in_dir.split(":"))).read().split()[4].strip("%"))
        else:
            disk_frac_used=int(os.popen('df %s | tail -n1'%(self._in_dir)).read().split()[4].strip("%"))

        self.info("%i%% of disk used. Removing files to get down to %i%%."%(disk_frac_used, self._disk_frac_limit))
        if (disk_frac_used < self._disk_frac_limit):
            self.info('Only %i%% of disk space used (%s), skip cleaning until %i%% is reached.'%(disk_frac_used, self._in_dir, self._disk_frac_limit))
            return

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        #we want the last argument of this list get_xtable_runs call to be False
        #that way the list is old files first to newew files last and clean up that way
        #target_runs = self.get_xtable_runs([self._project, self._parent_project], 
        #                                   [            1,                    0],False)

        target_runs = self.get_xtable_runs(self._project_list, self._project_requirement, False)
        self.info('Found %d runs to be processed (from project %s)...' % (len(target_runs),self._parent_project))
        for x in target_runs:

            # Counter decreases by 1
            ctr -=1

            (run, subrun) = (int(x[0]), int(x[1]))

            tmp_status = 1
            rm_status = 1
            multiple_file_status=0

            # Check input file exists. Otherwise report error
            in_file_holder = '%s/%s' % (self._in_dir,self._name_pattern % (run,subrun))
            filelist = glob.glob( in_file_holder )
            if (len(filelist)>1):
                self.error('ERROR: There is more than one file matching that pattern: %s' % filelist)
                multiple_file_status=200
            if (len(filelist)<1):
                self.info('ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run,subrun))
                status_code=100
                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )
                self.log_status( status )     
                errorMessage = "Failed to find file%s"%in_file_holder
                subject = "get_checksum_temp Failed to find file%s"%in_file_holder
                text = """File: %s
Error message:
%s
                """ % ( in_file_holder, errorMessage )
                pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )
            else:
                in_file = filelist[0]
                self.info('Removing %s'%in_file)

                if ":" in in_file:
                #check that out_file is a file before trying to remove 
                #(hopefully should avoid unintentional rm with bad out_dir/name_pattern combo)
                    if not os.system('ssh -x %s "test -f %s"'%(tuple(in_file.split(":")))):
                        rm_status=os.system('ssh -x %s "rm -f %s"' % tuple(in_file.split(":")))
                        tmp_status=2
                else:
                    self.info('Looks like the file is local on this node')
                    if not os.path.isfile(in_file):
                        self.info("ERROR: os.path.isfile('%s') returned false?!"%in_file)
                    self.info('Going to remove the file with rm...')
                    rm_status=os.system('rm -f %s' % in_file)
                    tmp_status=2

                if rm_status==0:
                    tmp_status=tmp_status + multiple_file_status
                else:
                    self.info('Failed to remove the file %s' % in_file)
                    tmp_status=4
            # Create a status object to be logged to DB (if necessary)
                status = ds_status( project = self._project,
                                    run     = int(x[0]),
                                    subrun  = int(x[1]),
                                    seq     = 0,
                                    status  = tmp_status )
            
            # Log status
                self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#29
0
文件: ds_beamdaq.py 项目: LArbys/pubs
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:

            resource = self._api.get_resource(self._project)

            self._nruns = int(resource['NRUNS'])
            self._istest = int(resource['ISTEST'])
            self._fcldir = resource['FCLDIR']
            self._fclfile = resource['FCLFILE']
            self._infodir = resource['INFODIR']
            self._infofile = resource['INFOFILE']
            if self._istest == 1:
                self._timedir = resource['TIMEDIR']
                self._timefile = resource['TIMEFILE']

        ctr = self._nruns
        for x in self.get_runs(self._project, 1):

            # Counter decreases by 1
            ctr -= 1

            run = int(x[0])
            subrun = int(x[1])

            if self._istest == 1:
                # Read timestamp from text file for testing
                tsfname = '%s/%s' % (self._timedir, self._timefile %
                                     (run, subrun))
                if not os.path.isfile(tsfname):
                    self.info('Waiting for time stamp file %s' % tsfname)
                    continue
                if os.stat(tsfname).st_size == 0:
                    self.info('Waiting for time stamp info')
                    continue
                ts_file = open(tsfname)
                ts_lines = ts_file.readlines()
                bgn_line = ts_lines[0].split('"')
                end_line = ts_lines[-1].split('"')
                tbegin = datetime.datetime.strptime(bgn_line[1],
                                                    "%a %b %d %H:%M:%S %Z %Y")
                tend = datetime.datetime.strptime(end_line[3],
                                                  "%a %b %d %H:%M:%S %Z %Y")
            else:
                # Read timestamp from DB
                timestamp = self._api.run_timestamp('MainRun', run, subrun)
                tbegin = datetime.datetime.strptime(timestamp[0],
                                                    "%a %b %d %H:%M:%S %Z %Y")
                tend = datetime.datetime.strptime(timestamp[1],
                                                  "%a %b %d %H:%M:%S %Z %Y")

            # Report starting
            self.info('Getting beam data: run=%d, subrun=%d' % (run, subrun))
            self.info('  t0=%s, t1=%s' % (tbegin, tend))

            cmd = 'bdaq_get --run-number %i --subrun-number %i --begin-time %i %i --end-time %i %i -f %s/%s' % (
                run, subrun, int(tbegin.strftime("%s")), 0,
                int(tend.strftime("%s")) + 1, 0, self._fcldir, self._fclfile)
            self.info('Run cmd: %s' % cmd)
            subprocess.call(cmd, shell=True)
            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=2)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#30
0
logger = pub_logger.get_logger('table')
reader = ds_reader(pubdb_conn_info.reader_info(), logger)
writer = ds_writer(pubdb_conn_info.writer_info(), logger)

if not reader.project_exist(table) :
    print 'The table you gave me does not exist: %s' % table

for x in reader.get_runs( table, old_status ):

    if run==x[0]:

        if subrun==-1:
            logger.info('In table %s, changing status of run %d, subrun %d from old_status=%d to new_status=%d' % (table, int(x[0]),int(x[1]), old_status, new_status) )
            updated_status = ds_status( project = table,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = new_status )
            writer.log_status(updated_status)

        else:
            if subrun==x[1]:
                logger.info('In table %s, changing status of run %d, subrun %d from old_status=%d to new_status=%d' % (table, int(x[0]),int(x[1]), old_status, new_status) )
                updated_status = ds_status( project = table,
                                    run     = int(x[0]),
                                    subrun  = int(x[1]),
                                    seq     = 0,
                                    status  = new_status )
                writer.log_status(updated_status)

print 'Finished updating table %s' % table
    def declare_to_sam(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project,self._parent_project],
                                      [1,0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Declaring a file to SAM: run=%d, subrun=%d ...' % (run,subrun))

            status = 1
            
            # Check input file exists. Otherwise report error
            in_file_base = self._infile_format % ( run, subrun )
            in_file = '%s/%s' % ( self._in_dir, in_file_base )
            in_json = '%s/%s.json' %( self._meta_dir, in_file_base )
            
            if os.path.isfile(in_file) and os.path.isfile(in_json):
                self.info('Found %s' % (in_file))
                self.info('Found %s' % (in_json))
                json_dict = json.load( open( in_json ) )

                # native SAM python call, instead of a system call
                # make sure you've done get-cert
                # Perhaps we want a try block for samweb?
                samweb = samweb_cli.SAMWebClient(experiment="uboone")

                # Check if the file already exists at SAM
                try:
                    samweb.getMetadata(filenameorid=in_file_base)
                    status = 101
                    # Want to email the experts
                except samweb_cli.exceptions.FileNotFound:
                    # metadata already validated in get_assembler_metadata_file.py
                    try:
                        samweb.declareFile(md=json_dict)
                        status = 12
                    except:
                        print "Unexpected error: samweb declareFile problem: "
                        print traceback.print_exc()
                        # print "Give some null properties to this meta data"
                        print "Give this file a status 11"
                        status = 11

            else:
                status = 100

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#32
0
    def error_handle(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 100):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('cleaning failed run: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 1

            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_file_base_no_ext = os.path.splitext(
                os.path.basename(in_file))[0]
            out_file_base = '%s.root' % in_file_base_no_ext
            out_file = '%s/%s' % (self._out_dir, out_file_base)

            if os.path.isfile(out_file):
                os.system('rm %s' % out_file)

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break

        try:
            os.remove(self._fcl_file_new)
        except OSError:
            pass
示例#33
0
    def transfer_file( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns) )

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project, self._parent_project],
                                      [1, 0],self._sort_new_to_old):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Transferring a file: run=%d, subrun=%d ...' % (run,subrun) )

            status = 1

            # Check input file exists. Otherwise report error
            in_file = '%s/%s' % ( self._in_dir, self._infile_format % ( run, subrun ) )
            in_json = '%s/%s.json' %( self._meta_dir, self._infile_format % ( run, subrun ) )
            out_file = '%s/%s' % ( self._out_dir, self._outfile_format % (run,subrun) )
            out_json = '%s/%s.json' %( self._out_dir, self._outfile_format % (run,subrun) )



            if "pnnl" in self._project:
                self.info('Will  look for  %s' % os.path.basename(in_file) )


                try:
                    if "pnnl" in self._project:

                        (resi, resj) = self.pnnl_transfer(in_file)

                    if resi == 0 and resj == 0:
                        status = 0
                    else:
                        status = 1
                except:
                    status = 1

            else:
                status = 100
                self.error("Big problem: This project not doing a transfer to PNNL.")

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#34
0
    def validate_outfile( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs( self._project, 2 ):

            # Counter decreases by 1
            ctr -=1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Validating a file in the output directory: run=%d, subrun=%d ...' % (run,subrun))

            status = 2
            in_file_holder = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            filelist = glob.glob( in_file_holder )
            if (len(filelist)<1):
                self.error('ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run,subrun))
                status_code=100
                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )
                self.log_status( status )                
                continue

            if (len(filelist)>1):
                self.error('ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run,subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            if_file_basename = os.path.basename(in_file)
            out_file = '%s/%s' % ( self._out_dir, in_file_base)
            out_json = '%s/%s.json' %( self._out_dir, in_file_base)

            # construct ifdh object
            ih = ifdh.ifdh()

            try:
                ih.locateFile( out_file )
                ih.locateFile( out_json )
                status = 0
            except:
                status = 1

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = int(x[2]),
                                status  = status )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#35
0
    def transfer_file( self ):

        proc_list=[]
        done_list=[]
        run_id=[]
        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project, self._parent_project],
                                      [1, 0]):

            # Counter decreases by 1
            ctr -= 1
            
            (run, subrun) = (int(x[0]), int(x[1]))
            
            # Report starting
            self.info('Transferring a file: run=%d, subrun=%d ...' % (run,subrun) )
            
            status = 1
            
            # Check input file exists. Otherwise report error
            in_file_holder = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            filelist = glob.glob( in_file_holder )
            if (len(filelist)<1):
                self.error('ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run,subrun))
                status_code=100
                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )
                self.log_status( status )                
                continue

            if (len(filelist)>1):
                self.error('ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run,subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_json = '%s.json' % in_file
            in_file_base = os.path.basename(in_file)
            out_file = '%s/%s' % ( self._out_dir, in_file_base)
            out_json = '%s/%s.json' % ( self._out_dir, in_file_base)
            
            # construct ifdh object
            #ih = ifdh.ifdh()
            #we're gonna use subprocess to parallelize these transfers and construct an ifdh command by hand
            
            if (os.path.isfile( in_file ) and (os.path.isfile( in_json ))):
                self.info('Found %s' % (in_file) )
                self.info('Found %s' % (in_json) )

                try:
                    cmd = ['ifdh', 'cp','-D', in_file, in_json, self._out_dir]
                    proc_list.append(subprocess.Popen(cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE))
                    done_list.append(False)
                    run_id.append((run,subrun))
                    self.info('Started transfer for (run,subrun)=%s @ %s' % (run_id[-1], time.strftime('%Y-%m-%d %H:%M:%s')))
                    status_code=3
                    status = ds_status( project = self._project,
                                        run     = run,
                                        subrun  = subrun,
                                        seq     = 0,
                                        status  = status_code )
                    self.log_status( status )

                        # if not parallelized, wait till proc is done
                    if not self._parallelize:
                        time_spent = 0
                        while ((len(proc_list)>0) and (proc_list[-1].poll() is None)):
                            time.sleep(1)
                            time_spent +=1

                            if time_spent > self._max_wait:
                                self.error('Exceeding the max wait time (%d sec). Terminating the process...' % self._max_wait)
                                proc_list[-1].kill()
                                status = ds_status( project = self._project,
                                                    run     = run_id[-1][0],
                                                    subrun  = run_id[-1][1],
                                                    seq     = 0,
                                                    status  = 555 )
                                self.log_status( status )
                                time.sleep(5)
                                
                                if proc_list[-1].poll() is None:
                                    self.error('Process termination failed. Hard-killing it (kill -9 %d)' % proc_list[-1].pid)
                                    subprocess.call(['kill','-9',str(proc_list[-1].pid)])
                                    status = ds_status( project = self._project,
                                                        run     = run_id[-1][0],
                                                        subrun  = run_id[-1][1],
                                                        seq     = 0,
                                                        status  = 666 )
                                    self.log_status( status )
                                break

                        self.info('Finished copy [%s] @ %s' % (run_id[-1],time.strftime('%Y-%m-%d %H:%M:%S')))
                        status = ds_status( project = self._project,
                                            run     = run_id[-1][0],
                                            subrun  = run_id[-1][1],
                                            seq     = 0,
                                            status  = 0 )
                        self.log_status( status )

                    else:
                        time.sleep(1)
                
                except:
                    self.error('Caught the exception and setting the status back to 1 for (run,subrun) = (%s, %s)' % (x[0],x[1]))
                    status = 1
                    status = ds_status( project = self._project,
                                        run     = int(x[0]),
                                        subrun  = int(x[1]),
                                        seq     = 0,
                                        status  = status )
                    self.log_status( status )

            else:
                self.error('Did not find the files that you told me to look for (run,subrun) = (%s, %s)' % (x[0],x[1]))
                self.error('Not found: %s' % (in_file) )
                self.error('Or not found: %s' % (in_json) )
                status = 100
                status = ds_status( project = self._project,
                                    run     = int(x[0]),
                                    subrun  = int(x[1]),
                                    seq     = 0,
                                    status  = status )
                self.log_status( status )

            # Pretend I'm doing something
            time.sleep(1)


            # Break from loop if counter became 0
            if not ctr: break

        if not self._parallelize:
            return

        finished = False
        time_spent = 0
        while not finished:
            finished = True
            time.sleep(1)
            time_spent += 1
            active_counter = 0
            for x in xrange(len(proc_list)):
                if done_list[x]: continue
                if not proc_list[x].poll() is None:
                    self.info('Finished copy [%s] @ %s' % (run_id[x],time.strftime('%Y-%m-%d %H:%M:%S')))
                    status = ds_status( project = self._project,
                                        run     = run_id[x][0],
                                        subrun  = run_id[x][1],
                                        seq     = 0,
                                        status  = 0 )
                    self.log_status( status )
                    done_list[x] = True
                else:
                    active_counter += 1
                    finished = False
            if time_spent%10:
                self.info('Waiting for copy to be done... (%d/%d processes) ... %d [sec]' % (active_counter,len(proc_list),time_spent))
            if time_spent > self._max_wait:
                self.error('Exceeding the max wait time (%d sec). Terminating the processes...' % self._max_wait)
                for x in xrange(len(proc_list)):
                    proc_list[x].kill()

                    status_code = 101
                    status = ds_status( project = self._project,
                                        run     = run_id[x][0],
                                        subrun  = run_id[x][1],
                                        seq     = 0,
                                        status  = status_code )
                    self.log_status( status )

                    # hard kill if still alive
                    time.sleep(5)
                    if proc_list[x].poll() is None:
                        self.error('Process termination failed. Hard-killing it (kill -9 %d)' % proc_list[x].pid)
                        subprocess.call(['kill','-9',str(proc_list[x].pid)])
                break
        self.info('All finished @ %s' % time.strftime('%Y-%m-%d %H:%M:%S'))
示例#36
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project, self._parent_project],
                                      [1, 0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 1

            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            out_file = '%s.json' % in_file
            self.info('Found %s' % (in_file))

            if in_file.strip().split('.')[-1] == "ubdaq":
                status, jsonData = self.get_ubdaq_metadata(
                    in_file, run, subrun)
            else:
                try:
                    jsonData = extractor_dict.getmetadata(in_file)
                    status = 3
                    self.info(
                        'Successfully extract metadata from the swizzled file.'
                    )
                except:
                    status = 100
                    self.error(
                        'Failed extracting metadata from the swizzled file.')

            if not status == 100:
                with open(out_file, 'w') as ofile:
                    json.dump(jsonData,
                              ofile,
                              sort_keys=True,
                              indent=4,
                              ensure_ascii=False)
                    # To Eric: what are you doing here?
                    try:
                        samweb = samweb_cli.SAMWebClient(experiment="uboone")
                        # samweb.validateFileMetadata(json_file) # this throws/raises exception
                        status = 2
                    except:
                        self.error("Problem with samweb metadata: ", jsonData)
                        self.error(sys.exc_info()[0])
                        status = 100

            else:
                status = 1000
                self.error('Did not find the input file %s' % in_file)

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#37
0
    def validate(self):

        ctr = self._nruns
        # see if the status=2 files we've processed are indeed where they should be.
        for x in self.get_runs(self._project,2): 

            # Counter decreases by 1
            ctr -=1

            run    = int(x[0])
            subrun = int(x[1])
            status_code = 2

            in_file_holder = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            filelist = glob.glob( in_file_holder )
            if (len(filelist)<1):
                self.error('ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run,subrun))
                status_code=100
                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )
                self.log_status( status )                

            if (len(filelist)>1):
                self.error('ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run,subrun))
                self.error('ERROR: List of files found %s' % filelist)
            
            if (len(filelist)>0):
                in_file = filelist[0]
                in_file_segments = os.path.basename(in_file).split('-')
                if (len(in_file_segments)<2):
                    self.error('ERROR: The file %s does not contain the - character' % in_file)
                    self.error('ERROR: So have no idea what to do.')
                    break
                out_file_prefix = in_file_segments[0]

                out_file = '%s/%s' % ( self._out_dir, self._outfile_format % (out_file_prefix,run,subrun) )
            
                #res = subprocess.call(['ssh', 'ubdaq-prod-near1', '-x', 'ls', out_file])
                res = subprocess.call(['ls', out_file])
                if res:
                    self.error('error on run: run=%d, subrun=%d ...' % (run,subrun))
                    status_code = 102
                else:
                    self.info('validated run: run=%d, subrun=%d ...' % (run,subrun))
                    status_code = 0
                
                    # Create a status object to be logged to DB (if necessary)
                    status = ds_status( project = self._project,
                                        run     = run,
                                        subrun  = subrun,
                                        seq     = 0,
                                        status  = status_code )
            
                    # Log status
                    self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#38
0
    def process_newruns(self):

        ctr = self._nruns
        proc_list=[]
        done_list=[]
        run_id=[]
        for x in self.get_runs(self._project,1):

            # Counter decreases by 1
            ctr -=1

            run    = int(x[0])
            subrun = int(x[1])

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' % (run,subrun))
            status_code=1
            in_file_holder = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            filelist = glob.glob( in_file_holder )
            if (len(filelist)<1):
                self.error('ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run,subrun))
                status_code=100
                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )
                self.log_status( status )                
                continue

            if (len(filelist)>1):
                self.error('ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run,subrun))
                self.error('ERROR: List of files found %s' % filelist)
            
            if (len(filelist)>0):
                in_file = filelist[0]
                in_file_segments = os.path.basename(in_file).split('-')
                if (len(in_file_segments)<2):
                    self.error('ERROR: The file %s does not contain the - character' % in_file)
                    self.error('ERROR: So have no idea what to do.')
                    break
                out_file_prefix = in_file_segments[0]
                out_file = '%s/%s' % ( self._out_dir, self._outfile_format % (out_file_prefix,run,subrun) )
                #cmd = ['rsync', '-v', in_file, 'ubdaq-prod-near1:%s' % out_file]
                #cmd = ['rsync', '-v', in_file, out_file]
                cmd = ['cp', '-v', in_file, out_file]
                proc_list.append(subprocess.Popen(cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE))
                done_list.append(False)
                run_id.append((run,subrun))
                self.info('Started copy (run,subrun)=%s @ %s' % (run_id[-1],time.strftime('%Y-%m-%d %H:%M:%S')))
                # Create a status object to be logged to DB (if necessary)
                status_code=3

                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )

                self.log_status( status )

            # if not parallelized, wait till proc is done
                if not self._parallelize:
                    time_spent = 0
                    while ((len(proc_list)>0) and (proc_list[-1].poll() is None)):
                        time.sleep(1)
                        time_spent +=1

                        if time_spent > self._max_wait:
                            self.error('Exceeding the max wait time (%d sec). Terminating the process...' % self._max_wait)
                            proc_list[-1].kill()
                            time.sleep(5)

                            if proc_list[-1].poll() is None:
                                self.error('Process termination failed. Hard-killing it (kill -9 %d)' % proc_list[-1].pid)
                                subprocess.call(['kill','-9',str(proc_list[-1].pid)])
                                break

                    self.info('Finished copy [%s] @ %s' % (run_id[-1],time.strftime('%Y-%m-%d %H:%M:%S')))
                    status = ds_status( project = self._project,
                                        run     = run_id[-1][0],
                                        subrun  = run_id[-1][1],
                                        seq     = 0,
                                        status  = 2 )
                    self.log_status( status )

            # if parallelized, just sleep 5 sec and go next run
                else:
                    time.sleep(5)

            if not ctr: break
        
        # if not parallelized, done
        if not self._parallelize:
            return

        finished = False
        time_spent = 0
        while not finished:
            finished = True
            time.sleep(1)
            time_spent += 1
            active_counter = 0
            for x in xrange(len(proc_list)):
                if done_list[x]: continue
                if not proc_list[x].poll() is None:
                    self.info('Finished copy [%s] @ %s' % (run_id[x],time.strftime('%Y-%m-%d %H:%M:%S')))
                    status_code = 2
                    status = ds_status( project = self._project,
                                        run     = run_id[x][0],
                                        subrun  = run_id[x][1],
                                        seq     = 0,
                                        status  = status_code )
                    self.log_status( status )
                    done_list[x] = True
                else:
                    active_counter += 1
                    finished = False
            if time_spent%10:
                self.info('Waiting for copy to be done... (%d/%d processes) ... %d [sec]' % (active_counter,len(proc_list),time_spent))
            if time_spent > self._max_wait:
                self.error('Exceeding the max wait time (%d sec). Terminating the processes...' % self._max_wait)
                for x in xrange(len(proc_list)):
                    proc_list[x].kill()

                    status_code = 101
                    status = ds_status( project = self._project,
                                        run     = run_id[x][0],
                                        subrun  = run_id[x][1],
                                        seq     = 0,
                                        status  = status_code )
                    self.log_status( status )

                    # hard kill if still alive
                    time.sleep(5)
                    if proc_list[x].poll() is None:
                        self.error('Process termination failed. Hard-killing it (kill -9 %d)' % proc_list[x].pid)
                        subprocess.call(['kill','-9',str(proc_list[x].pid)])
                break
        self.info('All finished @ %s' % time.strftime('%Y-%m-%d %H:%M:%S'))
示例#39
0
    def declare_to_sam(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns))
        self._project_requirement[0] = 1

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs(self._project_list,
                                      self._project_requirement):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Declaring a file to SAM: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 1

            # Check input file exists. Otherwise report error
            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_json = '%s.json' % in_file

            self.info('Declaring ' + in_file + ' to SAM: using ' + in_json)

            if os.path.isfile(in_file) and os.path.isfile(in_json):
                self.info('Found %s' % (in_file))
                self.info('Found %s' % (in_json))
                json_dict = json.load(open(in_json))

                # native SAM python call, instead of a system call
                # make sure you've done get-cert
                # Perhaps we want a try block for samweb?
                samweb = samweb_cli.SAMWebClient(experiment="uboone")

                # Check if the file already exists at SAM
                try:
                    in_file_base = os.path.basename(in_file)
                    samweb.getMetadata(filenameorid=in_file_base)
                    status = 101
                    # Email the experts
                    subject = 'File %s Existing at SAM' % in_file_base
                    text = """
File %s has already exists at SAM!
                    """ % in_file_base

                    pub_smtp(os.environ['PUB_SMTP_ACCT'],
                             os.environ['PUB_SMTP_SRVR'],
                             os.environ['PUB_SMTP_PASS'], self._experts,
                             subject, text)

                except samweb_cli.exceptions.FileNotFound:
                    # metadata already validated in get_assembler_metadata_file.py
                    try:
                        samweb.declareFile(md=json_dict)
                        status = 2
                    except Exception as e:
                        #                        print "Unexpected error: samweb declareFile problem: "
                        self.error(
                            "Unexpected error: samweb declareFile problem: ")
                        self.error("%s" % e)
                        subject = "samweb declareFile problem: %s" % in_file_base
                        text = """
File %s failed to be declared to SAM!
%s
                        """ % (in_file_base, traceback.print_exc())

                        pub_smtp(os.environ['PUB_SMTP_ACCT'],
                                 os.environ['PUB_SMTP_SRVR'],
                                 os.environ['PUB_SMTP_PASS'], self._experts,
                                 subject, text)

                        # print "Give some null properties to this meta data"
                        self.error("Give this file a status 102")
                        status = 102

            else:
                status = 100

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#40
0
    def validate_sam(self):
        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns) )
        self._project_requirement[0] = 2

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        #for x in [(391,10,0,0)]:
        for x in self.get_xtable_runs(self._project_list,
                                      self._project_requirement):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Checking the SAM declaration: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 12

            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_file_base = os.path.basename(in_file)
            samweb = samweb_cli.SAMWebClient(experiment="uboone")

            # Check if the file already exists at SAM
            try:
                samweb.getMetadata(filenameorid=in_file_base)
                status = 0
            except samweb_cli.exceptions.FileNotFound:
                status = 1

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#41
0
    def calculate_checksum(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 1):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))
            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' %
                      (run, subrun))

            statusCode = 1

            in_file_name = self._infile_format % (run, subrun)
            in_file_holder = '%s/%s' % (self._in_dir, in_file_name)
            filelist = glob.glob(in_file_holder)
            if (len(filelist) > 1):
                self.error(
                    'ERROR: There is more than one file matching that pattern: %s'
                    % in_file_name)
            if (len(filelist) < 1):
                errorMessage = "Failed to find file%s" % in_file_holder
                subject = "get_checksum_temp Failed to find file%s" % in_file_holder
                text = """File: %s
Error message:
%s
                """ % (in_file, errorMessage)
                pub_smtp(os.environ['PUB_SMTP_ACCT'],
                         os.environ['PUB_SMTP_SRVR'],
                         os.environ['PUB_SMTP_PASS'], self._experts, subject,
                         text)
                statusCode = 200
            else:
                in_file = filelist[0]
                metadata = {}
                try:
                    metadata[
                        'crc'] = samweb_client.utility.fileEnstoreChecksum(
                            in_file)
                    self._data = metadata['crc']['crc_value']
                    statusCode = 0
                except Exception:
                    errorMessage = traceback.print_exc()
                    subject = 'Failed to obtain the checksum of the file %s' % in_file
                    text = """File: %s
Error message:
%s
                """ % (in_file, errorMessage)

                    pub_smtp(os.environ['PUB_SMTP_ACCT'],
                             os.environ['PUB_SMTP_SRVR'],
                             os.environ['PUB_SMTP_PASS'], self._experts,
                             subject, text)

                    statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=statusCode,
                               data=self._data)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#42
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project,self._parent_project],
                                      [1,0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' % (run,subrun))

            status = 1
            
            # Check input file exists. Otherwise report error
            in_file = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            out_file = '%s/%s' % (self._out_dir,self._outfile_format % (run,subrun))


#
# Looks fine now, but if there are new troubles: run this project with NRUNS=1
#
            if os.path.isfile(in_file):
                self.info('Found %s' % (in_file))
#                shutil.copyfile(in_file,out_file)

                if in_file.strip().split('.')[-1] == "ubdaq":
                    status, jsonData = self.get_ubdaq_metadata( in_file, run, subrun )

                else:
                    try:
                        jsonData = extractor_dict.getmetadata( in_file )
                        status = 3
                        self.info('Successfully extract metadata from the swizzled file.')
                    except:
                        status = 100
                        self.error('Failed extracting metadata from the swizzled file.')

                if not status == 100:
                    with open(out_file, 'w') as ofile:
                        json.dump(jsonData, ofile, sort_keys = True, indent = 4, ensure_ascii=False)
                        # To Eric: what are you doing here?
                        try:
                            samweb = samweb_cli.SAMWebClient(experiment="uboone")
                            # samweb.validateFileMetadata(json_file) # this throws/raises exception
                            status = 2
                        except:
                            print "Problem with samweb metadata: ", jsonData
                            print sys.exc_info()[0]
                            status=100
 

            else:
                status = 1000
                self.error('Did not find the input file %s' % in_file )

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#43
0
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 2):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))
            self._log_file_local = self._log_file + str(run) + "_" + str(
                subrun) + ".txt"
            # Report starting
            self.info('validating run: run=%d, subrun=%d ...' % (run, subrun))

            status = 1

            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_file_base_no_ext = os.path.splitext(
                os.path.basename(in_file))[0]
            out_file_base = '%s.root' % in_file_base_no_ext
            out_file = '%s/%s' % (self._out_dir, out_file_base)

            # Get status object
            proj_status = self._api.get_status(
                ds_status(self._project, x[0], x[1], x[2]))
            # get data string for this project for this (run,subrun)
            datastr = proj_status._data
            # variable to hold the number of attempts to run this project on this (run,subrun)
            trial = 0
            if (datastr != ''):
                try:
                    trial = int(datastr)
                    self.info('Trial number is %i' % trial)
                except:
                    self.info(
                        'data field in status was neither string nor integer...'
                    )

            if os.path.exists(self._log_file_local):
                contents = open(self._log_file_local, 'r').read()
                if contents.find(
                        'Art has completed and will exit with status 0') > 0:
                    self.info(
                        'Swizzling successfully completed for: run=%d, subrun=%d ...'
                        % (run, subrun))
                    status = 0
            else:
                self.info(
                    'Swizzling has no corresponding logfile for: run=%d, subrun=%d ...'
                    % (run, subrun))
                trial += 1

            # if we tried this (run,subrun) too many times
            # change status to bad status =
            if (trial > 3):
                self.info('more than 3 trials...changing status to 101')
                status = 101

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=int(x[2]),
                               status=status,
                               data=str(trial))

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
示例#44
0
    def process_newruns(self):

        ctr = self._nruns
        proc_list = []
        done_list = []
        run_id = []
        for x in self.get_runs(self._project, 1):

            # Counter decreases by 1
            ctr -= 1

            run = int(x[0])
            subrun = int(x[1])

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' %
                      (run, subrun))
            status_code = 1
            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            if (len(filelist) > 0):
                in_file = filelist[0]
                in_file_segments = os.path.basename(in_file).split('-')
                if (len(in_file_segments) < 2):
                    self.error(
                        'ERROR: The file %s does not contain the - character' %
                        in_file)
                    self.error('ERROR: So have no idea what to do.')
                    break
                out_file_prefix = in_file_segments[0]
                out_file = '%s/%s' % (self._out_dir, self._outfile_format %
                                      (out_file_prefix, run, subrun))
                #cmd = ['rsync', '-v', in_file, 'ubdaq-prod-near1:%s' % out_file]
                #cmd = ['rsync', '-v', in_file, out_file]
                cmd = ['cp', '-v', in_file, out_file]
                proc_list.append(
                    subprocess.Popen(cmd,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE))
                done_list.append(False)
                run_id.append((run, subrun))
                self.info('Started copy (run,subrun)=%s @ %s' %
                          (run_id[-1], time.strftime('%Y-%m-%d %H:%M:%S')))
                # Create a status object to be logged to DB (if necessary)
                status_code = 3

                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)

                self.log_status(status)

                # if not parallelized, wait till proc is done
                if not self._parallelize:
                    time_spent = 0
                    while ((len(proc_list) > 0)
                           and (proc_list[-1].poll() is None)):
                        time.sleep(1)
                        time_spent += 1

                        if time_spent > self._max_wait:
                            self.error(
                                'Exceeding the max wait time (%d sec). Terminating the process...'
                                % self._max_wait)
                            proc_list[-1].kill()
                            time.sleep(5)

                            if proc_list[-1].poll() is None:
                                self.error(
                                    'Process termination failed. Hard-killing it (kill -9 %d)'
                                    % proc_list[-1].pid)
                                subprocess.call(
                                    ['kill', '-9',
                                     str(proc_list[-1].pid)])
                                break

                    self.info('Finished copy [%s] @ %s' %
                              (run_id[-1], time.strftime('%Y-%m-%d %H:%M:%S')))
                    status = ds_status(project=self._project,
                                       run=run_id[-1][0],
                                       subrun=run_id[-1][1],
                                       seq=0,
                                       status=2)
                    self.log_status(status)

            # if parallelized, just sleep 5 sec and go next run
                else:
                    time.sleep(5)

            if not ctr: break

        # if not parallelized, done
        if not self._parallelize:
            return

        finished = False
        time_spent = 0
        while not finished:
            finished = True
            time.sleep(1)
            time_spent += 1
            active_counter = 0
            for x in xrange(len(proc_list)):
                if done_list[x]: continue
                if not proc_list[x].poll() is None:
                    self.info('Finished copy [%s] @ %s' %
                              (run_id[x], time.strftime('%Y-%m-%d %H:%M:%S')))
                    status_code = 2
                    status = ds_status(project=self._project,
                                       run=run_id[x][0],
                                       subrun=run_id[x][1],
                                       seq=0,
                                       status=status_code)
                    self.log_status(status)
                    done_list[x] = True
                else:
                    active_counter += 1
                    finished = False
            if time_spent % 10:
                self.info(
                    'Waiting for copy to be done... (%d/%d processes) ... %d [sec]'
                    % (active_counter, len(proc_list), time_spent))
            if time_spent > self._max_wait:
                self.error(
                    'Exceeding the max wait time (%d sec). Terminating the processes...'
                    % self._max_wait)
                for x in xrange(len(proc_list)):
                    proc_list[x].kill()

                    status_code = 101
                    status = ds_status(project=self._project,
                                       run=run_id[x][0],
                                       subrun=run_id[x][1],
                                       seq=0,
                                       status=status_code)
                    self.log_status(status)

                    # hard kill if still alive
                    time.sleep(5)
                    if proc_list[x].poll() is None:
                        self.error(
                            'Process termination failed. Hard-killing it (kill -9 %d)'
                            % proc_list[x].pid)
                        subprocess.call(['kill', '-9', str(proc_list[x].pid)])
                break
        self.info('All finished @ %s' % time.strftime('%Y-%m-%d %H:%M:%S'))
示例#45
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project, self._parent_project],
                                      [1, 0]):
            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))
            self._log_file_local = self._log_file + str(run) + "_" + str(
                subrun) + ".txt"
            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 1

            # Check input file exists. Otherwise report error
            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_file_base_no_ext = os.path.splitext(
                os.path.basename(in_file))[0]
            out_file_base = '%s.root' % in_file_base_no_ext
            out_file = '%s/%s' % (self._out_dir, out_file_base)

            #
            #
            #            print "Looking for ", in_file
            # This is a hackity, hacky hack. But for now, run 1409 is the first run taken with the cable swap performed on Thurs Aug 13, 2015.
            # I'm putting this hack into place so that we don't swizzle the data before that with uboonecode v04_19_00 since there is not
            # yet an interval of validity for the database and channel mapping. Basically, the swizzler_data.py will ignore anything before
            #run 1409 and just not process it.
            if not run > 1408: continue

            if not os.path.isfile(in_file):
                self.error('Could not find file %s. Assigning status 404' %
                           in_file)
                status = 404

            else:

                self.info('Found %s' % (in_file))

                try:
                    # setup the LArSoft envt

                    # print "Putting together cmd "
                    # print "\t fcl_file ", self._fcl_file_new
                    # print "\t in_file", in_file
                    # print "\t out_file", out_file
                    # print "\t basename out_file", os.path.basename(out_file)
                    # print "\t _log_file", self._log_file_local

                    cmd = "lar -c " + self._fcl_file_new + " -s " + in_file + " -o " + out_file + " -T " + self._out_dir + "/" + os.path.basename(
                        out_file).strip(".root") + "_hist.root "
                    # print "cmd is ", cmd
                    self.info('Launch cmd is ' + cmd)

                except:
                    self.error(sys.exc_info()[0])
                    # print "Give some null properties to this meta data"
                    self.error("Give this file a status 100")
                    status = 100

                if not status == 100:
                    # form the lar command

                    # Check available space
                    if ":" in self._in_dir:
                        disk_frac_used = int(
                            os.popen(
                                'ssh -x %s "df %s" | tail -n1' %
                                tuple(self._in_dir.split(":"))).read().split()
                            [4].strip("%"))
                    else:
                        disk_frac_used = int(
                            os.popen(
                                'df %s | tail -n1' %
                                (self._in_dir)).read().split()[4].strip("%"))

                    if (disk_frac_used > self._disk_frac_limit):
                        self.info(
                            '%i%% of disk space used (%s), will not swizzle until %i%% is reached.'
                            % (disk_frac_used, self._in_dir,
                               self._disk_frac_limit))
                        status = 1
                        raise Exception(
                            " raising Exception: not enough disk space.")

                    # Check available cpu
                    cpu_used = float(
                        os.popen(
                            "top -bn 2 -d 0.01 | grep '^Cpu.s.' | tail -n 1 | gawk '{print $2+$4+$6}'"
                        ).read().strip("\n"))
                    if (cpu_used > self._cpu_frac_limit):
                        self.info(
                            '%i of cpu used; will not swizzle until %i is reached.'
                            % (cpu_used, self._cpu_frac_limit))
                        status = 1
                        raise Exception(" raising Exception: not enough cpu.")

                    # Check available memory
                    mem_avail = float(
                        os.popen(
                            "free -m | grep buffers | tail -n 1|  gawk '{print $4}'"
                        ).read().strip("\n"))
                    if (mem_avail < int(self._available_memory)):
                        self.info(
                            '%d Memory available, will not swizzle until %d is reached.'
                            % (mem_avail, int(self._available_memory)))
                        status = 1
                        raise Exception(
                            " raising Exception: not enough memory available.")

                    self._proc_list.append(
                        sub.Popen(cmd,
                                  shell=True,
                                  stderr=sub.PIPE,
                                  stdout=sub.PIPE))
                    self._log_file_list.append(self._log_file_local)
                    self._run_list.append(x[0])
                    self._subrun_list.append(x[1])
                    self.info(
                        ' Swizzling (run,subrun,processID) = (%d,%d,%d)...' %
                        (run, subrun, self._proc_list[-1].pid))
                    self._proc_active.append(True)
                    status = 3
                    time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            self.info('logging (run,subrun) = (%i,%i) with status %i' %
                      (int(x[0]), int(x[1]), status))
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from run/subrun loop if counter became 0
            if not ctr: break

#############################################################################################
# NOTE that the below "poll" solution deadlocks with piping. Yet, I can't read to break the deadlock till
# done, so let's for now just block each process till done. I need 'em all anyway before I can proceed.
# The real time cost here seems to be in the reading (out,err) into local files. And then grep'ing
# for the success mesage -- "Art has completed ...".
#############################################################################################

# Now continually loop over all the running processes and ask for them each to be finished before we break out
#        while (1):
#            proc_alive=False
        time_spent = 0
        while 1:
            active_counter = 0
            time.sleep(5)
            time_spent += 5
            for x in xrange(len(self._proc_list)):

                proc = self._proc_list[x]
                if not self._proc_active[x]:
                    continue

                if not proc.poll() is None:
                    self._proc_active[x] = False
                    self.info('The return code was %d ' % proc.returncode)
                    self.info('Finished swizzler process %s' % proc.pid)
                    (out, err) = proc.communicate()
                    fout = open(str(self._log_file_list[x]), 'w')
                    fout.write(out)
                    fout.close()

                    if proc.returncode != 0:
                        status = proc.returncode
                    else:
                        status = 2

                    status = ds_status(project=self._project,
                                       run=int(self._run_list[x]),
                                       subrun=int(self._subrun_list[x]),
                                       seq=0,
                                       status=status)
                    # Log status
                    self.log_status(status)

                else:
                    active_counter += 1
            if not active_counter:
                break
            if time_spent % 20 == 0:
                self.info('Swizzling process %d/%d active... @ %d [sec]' %
                          (active_counter, len(self._proc_list), time_spent))
            else:
                self.debug('Swizzling process %d/%d active... @ %d [sec]' %
                           (active_counter, len(self._proc_list), time_spent))

            if time_spent > self._proc_lifetime:
                self.error(
                    'Exceeding the allowed time span (%d [sec])! Killing lar jobs...',
                    self._proc_lifetime)
                # Loop over & kill
                for x in xrange(len(self._proc_list)):
                    proc = self._proc_list[x]
                    # ignore already finished ones
                    if not self._proc_active[x]:
                        continue
                    # kill
                    proc.kill()
                    # Log "finished" status
                    status = 2
                    status = ds_status(project=self._project,
                                       run=int(self._run_list[x]),
                                       subrun=int(self._subrun_list[x]),
                                       seq=0,
                                       status=status)
                    self.log_status(status)

                # Wait 30 sec and make sure they are dead
                time.sleep(30)
                for x in xrange(len(self._proc_list)):
                    proc = self._proc_list[x]
                    if not self._proc_active[x]:
                        continue
                    if proc.poll() is None:
                        self.warning(
                            'Process %d not ending 30 sec after SIGINT... kill -9 now...'
                            % proc.pid)
                        sub.call(['kill', '-9', str(proc.pid)])
示例#46
0
    def error_handle(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project,100):

            # Counter decreases by 1
            ctr -=1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('cleaning failed run: run=%d, subrun=%d ...' % (run,subrun))

            status = 1

            in_file_holder = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            filelist = glob.glob( in_file_holder )
            if (len(filelist)<1):
                self.error('ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run,subrun))
                status_code=100
                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )
                self.log_status( status )                
                continue

            if (len(filelist)>1):
                self.error('ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run,subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            out_file = '%s.json' % in_file

            if os.path.isfile(out_file):
                os.system('rm %s' % out_file)

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#47
0
    def compare_dropbox_checksum( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs( [self._project, self._parent_project], [1, 0] ):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' % (run,subrun))

            statusCode = 1
            in_file_holder = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            filelist = glob.glob( in_file_holder )
            if (len(filelist)<1):
                self.error('ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run,subrun))
                status_code=100
                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )
                self.log_status( status )                
                continue

            if (len(filelist)>1):
                self.error('ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run,subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_file_name = os.path.basename(in_file)
            out_file = '%s/%s' % ( self._out_dir, in_file_name )

            #Note that this has the sequence number hard coded as number 0
            RefStatus = self._api.get_status( ds_status(self._ref_project, run, subrun, 0))
            near1_checksum = RefStatus._data

            try:
                pnfs_adler32_1, pnfs_size = get_pnfs_1_adler32_and_size( out_file )
                near1_adler32_1 = convert_0_adler32_to_1_adler32(near1_checksum, pnfs_size)

                if near1_adler32_1 == pnfs_adler32_1:
                    statusCode = 0
                else:
                    subject = 'Checksum different in run %d, subrun %d between %s and PNFS' % ( run, subrun, self._ref_project )
                    text = '%s\n' % subject
                    text += 'Run %d, subrun %d\n' % ( run, subrun )
                    text += 'Converted %s checksum: %s\n' % ( self._ref_project, near1_adler32_1 )
                    text += 'Converted PNFS checksum: %s\n' % ( pnfs_adler32_1 )
                    
                    pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )
                    statusCode = 1000
                    self._data = '%s:%s;PNFS:%s' % ( self._ref_project, near1_adler32_1, pnfs_adler32_1 )

            except LookupError:

                self.warning("Could not find file in the dropbox %s" % out_file)
                self.warning("Gonna go looking on tape %s" % in_file_name)
                samweb = samweb_cli.SAMWebClient(experiment="uboone")
                meta = {}

                try:
                    meta = samweb.getMetadata(filenameorid=in_file_name)
                    checksum_info = meta['checksum'][0].split(':')
                    if checksum_info[0] == 'enstore':
                        self._data = checksum_info[1]
                        statusCode = 0
                    else:
                        statusCode = 10

                except samweb_cli.exceptions.FileNotFound:
                    subject = 'Failed to locate file %s at SAM' % in_file
                    text = 'File %s is not found at SAM!' % in_file
                    pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )
                    statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = statusCode,
                                data    = self._data )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#48
0
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project,self._parent_project],
                                      [1,0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' % (run,subrun))

            status = 1

            in_file_holder = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            filelist = glob.glob( in_file_holder )
            if (len(filelist)<1):
                self.error('ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run,subrun))
                status_code=100
                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )
                self.log_status( status )                
                continue

            if (len(filelist)>1):
                self.error('ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run,subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            out_file = '%s.json' % in_file
            self.info('Found %s' % (in_file))

            if in_file.strip().split('.')[-1] == "ubdaq":
                status, jsonData = self.get_ubdaq_metadata( in_file, run, subrun )
            else:
                try:
                    jsonData = extractor_dict.getmetadata( in_file )
                    status = 3
                    self.info('Successfully extract metadata from the swizzled file.')
                except:
                    status = 100
                    self.error('Failed extracting metadata from the swizzled file.')
                    
            if not status == 100:
                with open(out_file, 'w') as ofile:
                    json.dump(jsonData, ofile, sort_keys = True, indent = 4, ensure_ascii=False)
                    # To Eric: what are you doing here?
                    try:
                        samweb = samweb_cli.SAMWebClient(experiment="uboone")
                            # samweb.validateFileMetadata(json_file) # this throws/raises exception
                        status = 2
                    except:
                        self.error( "Problem with samweb metadata: ", jsonData)
                        self.error( sys.exc_info()[0])
                        status=100
 
            else:
                status = 1000
                self.error('Did not find the input file %s' % in_file )

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#49
0
    def calculate_checksum( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs( self._project, 1 ):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))
            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' % (run,subrun))

            statusCode = 1

            in_file_name = self._infile_format % ( run, subrun )
            in_file_holder = '%s/%s' % ( self._in_dir, in_file_name )
            filelist = glob.glob( in_file_holder )
            if (len(filelist)>1):
                self.error('ERROR: There is more than one file matching that pattern: %s' % in_file_name)
            if (len(filelist)<1):
                errorMessage = "Failed to find file%s"%in_file_holder
                subject = "get_checksum_temp Failed to find file%s"%in_file_holder
                text = """File: %s
Error message:
%s
                """ % ( in_file, errorMessage )
                pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )
                statusCode = 200
            else:
                in_file = filelist[0]
                metadata = {}
                try:
                    metadata['crc'] = samweb_client.utility.fileEnstoreChecksum( in_file )
                    self._data = metadata['crc']['crc_value']
                    statusCode = 0
                except Exception:
                    errorMessage = traceback.print_exc()
                    subject = 'Failed to obtain the checksum of the file %s' % in_file
                    text = """File: %s
Error message:
%s
                """ % ( in_file, errorMessage )
                    
                    pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

                    statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = statusCode,
                                data    = self._data )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#50
0
文件: ds_beamdaq.py 项目: LArbys/pubs
    def validate(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:

            resource = self._api.get_resource(self._project)

            self._nruns = int(resource['NRUNS'])
            self._infodir = resource['INFODIR']
            self._infofile = resource['INFOFILE']

        ctr = self._nruns
        for x in self.get_runs(self._project, 2):

            # Counter decreases by 1
            ctr -= 1

            run = int(x[0])
            subrun = int(x[1])
            status = 0

            fname = '%s/%s' % (self._infodir, self._infofile % (run, subrun))

            # check that info was created and look for beam events
            # if beam events, check for beam file
            self.info('Parse info file %s and check created files' % fname)
            if not os.path.isfile(fname):
                # change status?
                self.error('%s not created' % fname)
            info_file = open(fname)
            for line in info_file:
                if "events" in line:
                    wds = line.split()
                    if int(wds[2]) > 0:
                        beamfname = '%s/beam_%s_%07i_%05i.dat' % (
                            self._infodir, wds[0], run, subrun)
                        if not os.path.isfile(beamfname):
                            # change to appropriate status
                            self.error('%s not created' % beamfname)
                            return
                        if os.stat(beamfname).st_size == 0:
                            # change to appropriate status
                            self.error('%s is empty' % beamfname)
                            return

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
    def process_newruns(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        # Below picks up successfully swizzled files 
        for x in self.get_xtable_runs([self._project,self._parent_project],
                                      [1,0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('processing new run: run=%d, subrun=%d ...' % (run,subrun))

            status = 100
            
            # Check input file exists. Otherwise report error
            in_file = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))

            print 'Looking for %s' % (in_file)
            if os.path.isfile(in_file):
                self.info('Found %s' % (in_file))

                # Check metadata
                has_metadata = False
                try:
                    samweb = samweb_cli.SAMWebClient(experiment="uboone")
                    md = samweb.getMetadata(filenameorid=in_file)
                    print 'Here 0' % md
                    self.info('Weirdly, metadata already registered in SAM for %s.  ... ' % (in_file))
                    has_metadata = True
                except:
                    pass

                # Should be that metadata is in the artroot file. (But not yet declared it to SAM.)
                # Thus, retrieve metadata from file; use it to declare file with SAM.
                if not has_metadata:
                    try:
                        self.info( ' I feel a couple woos comin on, cus ')
                        md = extractor_dict.getmetadata(in_file)
                        self.info( ' ... there it was.  ... (Just extracted the meta data from root file.) ')
                        status = 3
                        try:
                            samweb = samweb_cli.SAMWebClient(experiment="uboone")
                            samweb.declareFile(md=md)
                            status = 2
                            self.info('Successful extraction of artroot metadata and declaring it to SAM for %s.  ... ' % (in_file))
                        except:
                            self.info('Failed declaring metadata to SAM for %s.  ... ' % (in_file))
                    except:
                        self.info('Failed extracting artroot metadata for %s.  ... ' % (in_file))


            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
示例#52
0
writer = ds_writer(pubdb_conn_info.writer_info(), logger)

if not reader.project_exist(table):
    print 'The table you gave me does not exist: %s' % table

for x in reader.get_runs(table, old_status):

    if run == x[0]:

        if subrun == -1:
            logger.info(
                'In table %s, changing status of run %d, subrun %d from old_status=%d to new_status=%d'
                % (table, int(x[0]), int(x[1]), old_status, new_status))
            updated_status = ds_status(project=table,
                                       run=int(x[0]),
                                       subrun=int(x[1]),
                                       seq=0,
                                       status=new_status)
            writer.log_status(updated_status)

        else:
            if subrun == x[1]:
                logger.info(
                    'In table %s, changing status of run %d, subrun %d from old_status=%d to new_status=%d'
                    % (table, int(x[0]), int(x[1]), old_status, new_status))
                updated_status = ds_status(project=table,
                                           run=int(x[0]),
                                           subrun=int(x[1]),
                                           seq=0,
                                           status=new_status)
                writer.log_status(updated_status)