Пример #1
0
    def exists(self, pfn):
        """ Checks if the requested file is known by the referred RSE.

            :param pfn Physical file name

            :returns: True if the file exists, False if it doesn't

            :raise  ServiceUnavailable
        """
        dp = DataPoint(str(pfn), self.cfg)
        fileinfo = arc.FileInfo()

        status = dp.h.Stat(fileinfo)
        if not status:
            if status.GetErrno() == errno.ENOENT:
                return False
            raise ServiceUnavailable(str(status))

        return True
Пример #2
0
    def checkOutputFiles(self, surldict):
        '''
        Check if SURLs are working. Returns a dict of arcjobid:file status
        Do bulk arc.DataPoint.Stat() with max 100 files per request. The list
        of surls passed here all belong to the same SE.
        '''

        if self.arcconf.get(['downtime', 'srmdown']) == 'True':
            self.log.info("SRM down, will validate later")
            return dict((k['arcjobid'], self.retry) for k in surldict.values())

        result = {}
        datapointlist = arc.DataPointList()
        surllist = []
        dummylist = []
        bulklimit = 100
        for surls in surldict.values():
            count = 0
            for surl in surls:
                count += 1
                if not surl['surl']:
                    self.log.error("Missing surl for %s, cannot validate" % surl['arcjobid'])
                    result[surl['arcjobid']] = self.failed
                    continue
                dp = aCTUtils.DataPoint(str(surl['surl']), self.uc)
                if not dp or not dp.h:
                    self.log.warning("URL %s not supported, skipping validation" % str(surl['surl']))
                    result[surl['arcjobid']] = self.ok
                    continue
                datapointlist.append(dp.h)
                dummylist.append(dp) # to not destroy objects
                surllist.append(surl)

                if count % bulklimit != 0 and count != len(surls):
                    continue

                # do bulk call
                (files, status) = dp.h.Stat(datapointlist)
                if not status and status.GetErrno() != os.errno.EOPNOTSUPP:
                    # If call fails it is generally a server or connection problem
                    # and in most cases should be retryable
                    if status.Retryable():
                        self.log.warning("Failed to query files on %s, will retry later: %s" % (dp.h.GetURL().Host(), str(status)))
                        result.update(dict((k['arcjobid'], self.retry) for k in surllist))
                    else:
                        self.log.error("Failed to query files on %s: %s" % (dp.h.GetURL().Host(), str(status)))
                        result.update(dict((k['arcjobid'], self.failed) for k in surllist))

                else:
                    # files is a list of FileInfo objects. If file is not found or has
                    # another error in the listing FileInfo object will be invalid
                    for i in range(len(datapointlist)):
                        if status.GetErrno() == os.errno.EOPNOTSUPP:
                            # Bulk stat was not supported, do non-bulk here
                            f = arc.FileInfo()
                            st = datapointlist[i].Stat(f)
                            if not st or not f:
                                if status.Retryable():
                                    self.log.warning("Failed to query files on %s, will retry later: %s" % (datapointlist[i].GetURL().Host(), str(st)))
                                    result[surllist[i]['arcjobid']] = self.retry
                                else:
                                    self.log.warning("%s: Failed to find info on %s" % (surllist[i]['arcjobid'], datapointlist[i].GetURL().str()))
                                    result[surllist[i]['arcjobid']] = self.failed
                                files.append(None)
                            else:
                                files.append(f)

                        if not files[i]:
                            self.log.warning("%s: Failed to find info on %s" % (surllist[i]['arcjobid'], datapointlist[i].GetURL().str()))
                            result[surllist[i]['arcjobid']] = self.failed
                        else:
                            # compare metadata
                            try:
                                self.log.debug("File %s for %s: expected size %d, checksum %s, actual size %d, checksum %s" %
                                               (datapointlist[i].GetURL().str(), surllist[i]['arcjobid'], int(surllist[i]['fsize']),
                                               surllist[i]['checksum'], int(files[i].GetSize()), files[i].GetCheckSum()))
                            except:
                                self.log.warning("Unhandled issue %d",i)
                                result[surllist[i]['arcjobid']] = self.failed
                                continue
                            if int(surllist[i]['fsize']) != int(files[i].GetSize()):
                                self.log.warning("File %s for %s: size on storage (%d) differs from expected size (%d)" %
                                                 (datapointlist[i].GetURL().str(), surllist[i]['arcjobid'],
                                                  int(files[i].GetSize()), int(surllist[i]['fsize'])))
                                result[surllist[i]['arcjobid']] = self.failed
                                continue
                            if not files[i].CheckCheckSum():
                                self.log.warning("File %s for %s: no checksum information available" %
                                                 (datapointlist[i].GetURL().str(), surllist[i]['arcjobid']))
                            elif surllist[i]['checksum'] != files[i].GetCheckSum():
                                self.log.warning("File %s for %s: checksum on storage (%s) differs from expected checksum (%s)" %
                                                 (datapointlist[i].GetURL().str(), surllist[i]['arcjobid'],
                                                  files[i].GetCheckSum(), surllist[i]['checksum']))
                                result[surllist[i]['arcjobid']] = self.failed
                                continue

                            self.log.info("File %s validated for %s" % (datapointlist[i].GetURL().str(), surllist[i]['arcjobid']))
                            # don't overwrite previous failed file for this job
                            if surllist[i]['arcjobid'] not in result:
                                result[surllist[i]['arcjobid']] = self.ok

                # Clear lists and go to next round
                datapointlist = arc.DataPointList()
                surllist = []
                dummylist = []

        return result
Пример #3
0
    def fetchJobs(self, arcstate, nextarcstate):

        # Get list of jobs in the right state
        jobstofetch = self.db.getArcJobs("arcstate='" + arcstate +
                                         "' and cluster='" + self.cluster +
                                         "'" + " limit 100")

        if not jobstofetch:
            return
        self.log.info("Fetching %i jobs" %
                      sum(len(v) for v in jobstofetch.values()))

        fetched = []
        notfetched = []
        notfetchedretry = []
        for proxyid, jobs in jobstofetch.items():
            self.uc.CredentialString(self.db.getProxy(proxyid))

            # Clean the download dir just in case something was left from previous attempt
            for job in jobs:
                shutil.rmtree(
                    self.conf.get(['tmp', 'dir']) +
                    job[2].JobID[job[2].JobID.rfind('/'):], True)

            # Get list of downloadable files for these jobs
            filestodl = self.db.getArcJobsInfo(
                "arcstate='" + arcstate + "' and cluster='" + self.cluster +
                "' and proxyid='" + str(proxyid) + "'",
                ['id', 'downloadfiles'])
            # id: downloadfiles
            downloadfiles = dict(
                (row['id'], row['downloadfiles']) for row in filestodl)
            # jobs to download all files
            jobs_downloadall = dict(
                (j[0], j[2]) for j in jobs
                if j[0] in downloadfiles and not downloadfiles[j[0]])
            # jobs to download specific files
            jobs_downloadsome = dict(
                (j[0], j[2]) for j in jobs
                if j[0] in downloadfiles and downloadfiles[j[0]])

            # We don't know if a failure from JobSupervisor is retryable or not
            # so always retry
            (f, r) = self.fetchAll(jobs_downloadall)
            fetched.extend(f)
            notfetchedretry.extend(r)

            (f, n, r) = self.fetchSome(jobs_downloadsome, downloadfiles)
            fetched.extend(f)
            notfetched.extend(n)
            notfetchedretry.extend(r)

        # Check for massive failure, and back off before trying again
        # TODO: downtime awareness
        if len(notfetched) > 10 and len(notfetched) == len(jobstofetch) or \
           len(notfetchedretry) > 10 and len(notfetchedretry) == len(jobstofetch):
            self.log.error(
                "Failed to get any jobs from %s, sleeping for 5 mins" %
                self.cluster)
            time.sleep(300)
            return

        for proxyid, jobs in jobstofetch.items():
            for (id, appjobid, job, created) in jobs:
                if job.JobID in notfetchedretry:
                    self.log.warning("%s: Could not get output from job %s" %
                                     (appjobid, job.JobID))
                    # Remove download directory to allow retry
                    shutil.rmtree(
                        self.conf.get(['tmp', 'dir']) +
                        job.JobID[job.JobID.rfind('/'):], True)
                    # Check if job still exists
                    fileinfo = arc.FileInfo()
                    self.uc.CredentialString(self.db.getProxy(proxyid))
                    dp = aCTUtils.DataPoint(job.JobID, self.uc)
                    status = dp.h.Stat(fileinfo)
                    # TODO Check other permanent errors
                    if not status and status.GetErrno() == errno.ENOENT:
                        self.log.warning("%s: Job %s no longer exists" %
                                         (appjobid, job.JobID))
                        self.db.updateArcJob(
                            id, {
                                "arcstate": "donefailed",
                                "tarcstate": self.db.getTimeStamp()
                            })
                    # Otherwise try again next time
                elif job.JobID in notfetched:
                    self.log.error("%s: Failed to download job %s" %
                                   (appjobid, job.JobID))
                    self.db.updateArcJob(
                        id, {
                            "arcstate": "donefailed",
                            "tarcstate": self.db.getTimeStamp()
                        })
                else:
                    self.log.info("%s: Downloaded job %s" %
                                  (appjobid, job.JobID))
                    self.db.updateArcJob(
                        id, {
                            "arcstate": nextarcstate,
                            "tarcstate": self.db.getTimeStamp()
                        })
Пример #4
0
import arc
import sys

if len(sys.argv) != 2:
    sys.stdout.write("Usage: python partial_copy.py filename\n")
    sys.exit(1)

desired_size = 512
usercfg = arc.UserConfig()
url = arc.URL(sys.argv[1])
handle = arc.DataHandle(url, usercfg)
point = handle.__ref__()
point.SetSecure(
    False)  # GridFTP servers generally do not have encrypted data channel
info = arc.FileInfo("")
point.Stat(info)
sys.stdout.write("Name: %s\n" % str(info.GetName()))
fsize = info.GetSize()
if fsize > desired_size:
    point.Range(fsize - desired_size, fsize - 1)
databuffer = arc.DataBuffer()
point.StartReading(databuffer)
while True:
    n = 0
    length = 0
    offset = 0
    (r, n, length, offset, buf) = databuffer.for_write(True)
    if not r: break
    sys.stdout.write("BUFFER: %d :  %d  : %s\n" % (offset, length, str(buf)))
    databuffer.is_written(n)
point.StopReading()