Пример #1
0
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.proxy = InternalServerProxy("http://localhost")
        self.requests = []

        def _urlopen(url, data=None):
            if data:
                self.requests.append((url, data.decode()))
            else:
                self.requests.append(url)
            return StringIO(JsonDict(self.response).dumps())

        self.proxy._urlopen = _urlopen
 def setUp(self):
     SeecrTestCase.setUp(self)
     self.proxy = InternalServerProxy("http://localhost")
     self.requests = []
     def _urlopen(url, data=None):
         if data:
             self.requests.append((url, data))
         else:
             self.requests.append(url)
         return StringIO(JsonDict(self.response).dumps())
     self.proxy._urlopen = _urlopen
Пример #3
0
    def __init__(self):
        if len(argv[1:]) == 0:
            argv.append('-h')
        self.parser = OptionParser()
        args = self.parse_args()
        self.__dict__.update(args.__dict__)

        if not self.domainId:
            self.parser.error("Specify domain")
        if self._concurrency < 1:
            self.parser.error("Concurrency must be at least 1.")

        config = JsonDict.load(urlopen(self.serverUrl + '/info/config'))
        if self._logDir is None:
            self._logDir = config['logPath']
        if self._stateDir is None:
            self._stateDir = config['statePath']

        self.proxy = InternalServerProxy(self.serverUrl, self.setActionDone)
        self.repository = self.repositoryId and self.proxy.getRepositoryObject(
            identifier=self.repositoryId, domainId=self.domainId)
class InternalServerProxyTest(SeecrTestCase):

    def setUp(self):
        SeecrTestCase.setUp(self)
        self.proxy = InternalServerProxy("http://localhost")
        self.requests = []
        def _urlopen(url, data=None):
            if data:
                self.requests.append((url, data))
            else:
                self.requests.append(url)
            return StringIO(JsonDict(self.response).dumps())
        self.proxy._urlopen = _urlopen

    def testGetRepository(self):
        self.response = {
                'request': {'verb': 'GetRepository'},
                'response': {'GetRepository': {
                    'identifier': 'repo1',
                    'use': True,
                    'complete': False,
                }}
            }
        repoDict = self.proxy.getRepository(identifier='repo1', domainId='domainId')
        self.assertEqual('http://localhost/get?verb=GetRepository&identifier=repo1&domainId=domainId', self.requests[0])
        repo = self.proxy.getRepositoryObject(identifier='repo1', domainId='domainId')
        self.assertEqual(self.requests[0], self.requests[-1])
        self.assertEqual({'complete': False, 'identifier': 'repo1', 'use': True}, repoDict)
        self.assertEqual('repo1', repo.id)
        self.assertFalse(repo.complete)
        self.assertTrue(repo.use)

    def testGetStatus(self):
        self.response = {'response': {'GetStatus': '?'}}
        self.proxy.getStatus(domainId='domainId')
        self.assertEqual('http://localhost/get?verb=GetStatus&domainId=domainId', self.requests[0])

    def testErrorInResponse(self):
        self.response = {'request': {'verb': 'getUnknown'}, 'error': {'code': 'badVerb', 'message': 'Bad verb'}}
        try:
            self.proxy.getStatus(domainId='domainId')
            self.fail()
        except ValueError, e:
            self.assertEqual('Bad verb', str(e))
Пример #5
0
class StartHarvester(object):
    def __init__(self):
        if len(argv[1:]) == 0:
            argv.append('-h')
        self.parser = OptionParser()
        args = self.parse_args()
        self.__dict__.update(args.__dict__)

        if not self.domainId:
            self.parser.error("Specify domain")
        if self._concurrency < 1:
            self.parser.error("Concurrency must be at least 1.")

        config = JsonDict.load(urlopen(self.serverUrl + '/info/config'))
        if self._logDir is None:
            self._logDir = config['logPath']
        if self._stateDir is None:
            self._stateDir = config['statePath']

        self.proxy = InternalServerProxy(self.serverUrl, self.setActionDone)
        self.repository = self.repositoryId and self.proxy.getRepositoryObject(
            identifier=self.repositoryId, domainId=self.domainId)

    def parse_args(self):
        self.parser.add_option("-d",
                               "--domain",
                               dest="domainId",
                               help="Mandatory argument denoting the domain.",
                               metavar="DOMAIN")
        self.parser.add_option("-u",
                               "--url",
                               dest="serverUrl",
                               help="The url of the Meresco Harvester Server",
                               default="http://localhost:8888")
        self.parser.add_option(
            "-r",
            "--repository",
            dest="repositoryId",
            help=
            "Process a single repository within the given domain. Defaults to all repositories from the domain.",
            metavar="REPOSITORY")
        self.parser.add_option("",
                               "--gustosId",
                               dest="gustosId",
                               help="Name this harvester sends to Gustos")
        self.parser.add_option("",
                               "--gustosHost",
                               dest="gustosHost",
                               help="Hostname for the gustos server")
        self.parser.add_option(
            "",
            "--gustosPort",
            dest="gustosPort",
            help="Portnumber of gustos on the gustos server",
            default=8001,
            type=int),
        self.parser.add_option(
            "-t",
            "--set-process-timeout",
            dest="processTimeout",
            type="int",
            default=60 * 60,
            metavar="TIMEOUT",
            help="Subprocess will be timed out after amount of seconds.")
        self.parser.add_option(
            "--logDir",
            "",
            dest="_logDir",
            help="Override the logDir in the apache configuration.",
            metavar="DIRECTORY",
            default=None)
        self.parser.add_option(
            "--stateDir",
            dest="_stateDir",
            help="Override the stateDir in the apache configuration.",
            metavar="DIRECTORY",
            default=None)
        self.parser.add_option(
            "--concurrency",
            dest="_concurrency",
            type="int",
            default=1,
            help=
            "Number of repositories to be concurrently harvested. Defaults to 1 (no concurrency).",
            metavar="NUMBER")
        self.parser.add_option("--force-target",
                               "",
                               dest="forceTarget",
                               help="Overrides the repository's target",
                               metavar="TARGETID")
        self.parser.add_option("--force-mapping",
                               "",
                               dest="forceMapping",
                               help="Overrides the repository's mapping",
                               metavar="MAPPINGID")
        self.parser.add_option("--no-action-done",
                               "",
                               action="store_false",
                               dest="setActionDone",
                               default=True,
                               help="Do not set SAHARA's actions",
                               metavar="TARGETID")
        self.parser.add_option(
            "--runOnce",
            "",
            dest="runOnce",
            action="store_true",
            default=False,
            help=
            "Prevent harvester from looping (if combined with --repository)")
        self.parser.add_option("--child",
                               "",
                               action="store_true",
                               dest="child",
                               default=False,
                               help=SUPPRESS_HELP)
        self.parser.add_option("--sleepTime",
                               "",
                               dest="sleepTime",
                               type='int',
                               default=1,
                               help=SUPPRESS_HELP)

        (options, args) = self.parser.parse_args()
        for opt in ['serverUrl']:
            if not getattr(options, opt, None):
                raise ValueError('Missing option: %s' % repr(opt))
        return options

    def start(self):
        if self.child:
            self._startRepository()
        else:
            self._startChildProcesses()

    def _startChildProcesses(self):
        running = set()
        if self.repository:
            waiting = [self.repositoryId]
        else:
            waiting = self.proxy.getRepositoryIds(self.domainId)
        processes = {}
        try:
            while running or waiting:
                while waiting and (len(running) < self._concurrency):
                    repositoryId = waiting.pop(0)
                    self._createProcess(processes, repositoryId)
                    running.add(repositoryId)
                try:
                    readers, _, _ = select(list(processes.keys()), [], [])
                except error as e:
                    (errno, description) = e.args
                    if errno == EINTR:
                        pass
                    else:
                        raise
                for reader in readers:
                    if reader not in processes:
                        continue

                    t, process, repositoryId = processes[reader]
                    try:
                        pipeContent = read(reader, 4096)
                    except OSError as e:
                        if e.errno == EAGAIN:
                            continue
                        raise

                    poFileno = process.stdout.fileno()
                    peFileno = process.stderr.fileno()

                    strm = stdout if reader == poFileno else stderr
                    strm.write(pipeContent.decode(
                    ) if type(pipeContent) is bytes else pipeContent)
                    strm.flush()

                    if process.poll() is not None:
                        exitstatus = t.stopScript(process)
                        running.remove(repositoryId)
                        del processes[poFileno]
                        del processes[peFileno]
                        if exitstatus == AGAIN_EXITCODE:
                            waiting.insert(0, repositoryId)
                        else:
                            if exitstatus != 0:
                                stderr.write(
                                    "Process (for repository %s) exited with exitstatus %s.\n"
                                    % (repositoryId, exitstatus))
                                stderr.flush()
                            if not self.runOnce:
                                waiting.append(repositoryId)
                        self._updateWaiting(waiting, running)

        except:
            for t in set(
                [t for t, process, repositoryId in list(processes.values())]):
                t.terminate()
            raise

    def _createProcess(self, processes, repositoryId):
        t = TimedProcess(signal=SIGTERM)
        process = t.executeScript(self._createArgs(repositoryId),
                                  self.processTimeout)
        processes[process.stdout.fileno()] = t, process, repositoryId
        processes[process.stderr.fileno()] = t, process, repositoryId

    def _createArgs(self, repositoryId):
        args = argv + ["--child"]
        extraArg = '--repository=%s' % repositoryId
        if not extraArg in argv:
            args += [extraArg]
        return args

    def _updateWaiting(self, waiting, running):
        if self.runOnce or self.repository:
            return
        repositoryIds = self.proxy.getRepositoryIds(self.domainId)
        for repoId in waiting[:]:
            if not repoId in repositoryIds:
                waiting.remove(repoId)
        for repoId in repositoryIds:
            if not repoId in waiting and not repoId in running:
                waiting.append(repoId)

    def _startRepository(self):
        if self.forceTarget:
            self.repository.targetId = self.forceTarget
        if self.forceMapping:
            self.repository.mappingId = self.forceMapping

        self._generalHarvestLog = CompositeLogger([
            (['*'], StreamEventLogger(stdout)),
            (['ERROR', 'WARN'], StreamEventLogger(stderr)),
        ])

        gustosClient = GustosClient(id=self.gustosId,
                                    gustosHost=self.gustosHost,
                                    gustosPort=self.gustosPort,
                                    threaded=False) if self.gustosId else None

        messageIgnored, again = self.repository.do(
            stateDir=join(self._stateDir, self.domainId),
            logDir=join(self._logDir, self.domainId),
            generalHarvestLog=self._generalHarvestLog,
            gustosClient=gustosClient)
        sleep(self.sleepTime)
        if again:
            exit(AGAIN_EXITCODE)
Пример #6
0
class InternalServerProxyTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.proxy = InternalServerProxy("http://localhost")
        self.requests = []

        def _urlopen(url, data=None):
            if data:
                self.requests.append((url, data.decode()))
            else:
                self.requests.append(url)
            return StringIO(JsonDict(self.response).dumps())

        self.proxy._urlopen = _urlopen

    def testGetRepository(self):
        self.response = {
            'request': {
                'verb': 'GetRepository'
            },
            'response': {
                'GetRepository': {
                    'identifier': 'repo1',
                    'use': True,
                    'complete': False,
                }
            }
        }
        repoDict = self.proxy.getRepository(identifier='repo1',
                                            domainId='domainId')
        self.assertEqual(
            'http://localhost/get?verb=GetRepository&identifier=repo1&domainId=domainId',
            self.requests[0])
        repo = self.proxy.getRepositoryObject(identifier='repo1',
                                              domainId='domainId')
        self.assertEqual(self.requests[0], self.requests[-1])
        self.assertEqual(
            {
                'complete': False,
                'identifier': 'repo1',
                'use': True
            }, repoDict)
        self.assertEqual('repo1', repo.id)
        self.assertFalse(repo.complete)
        self.assertTrue(repo.use)

    def testGetStatus(self):
        self.response = {'response': {'GetStatus': '?'}}
        self.proxy.getStatus(domainId='domainId')
        self.assertEqual(
            'http://localhost/get?verb=GetStatus&domainId=domainId',
            self.requests[0])

    def testErrorInResponse(self):
        self.response = {
            'request': {
                'verb': 'getUnknown'
            },
            'error': {
                'code': 'badVerb',
                'message': 'Bad verb'
            }
        }
        try:
            self.proxy.getStatus(domainId='domainId')
            self.fail()
        except ValueError as e:
            self.assertEqual('Bad verb', str(e))

    def testSetActionDone(self):
        self.response = {}
        self.proxy.repositoryActionDone(domainId='adomain',
                                        repositoryId='repo1')
        self.assertEqual(('http://localhost/action/repositoryDone',
                          'domainId=adomain&identifier=repo1'),
                         self.requests[0])