Пример #1
0
    def test_multi_request(self):
        urls = (('http://irgendwas.com', None, None),
                ('http://*****:*****@irgendwas.com', 'heinz', 'secret'))
        service_urls = [url[0] for url in urls]
        client = MultiRESTClient(service_urls)

        for i, (service_url, user, passwd) in enumerate(urls):
            c = client.clients[i]
            if user:
                assert service_url != c.service_url
            assert c.user == user
            assert c.password == passwd

        try:
            client.request('irgendwas')
            assert False
        except Exception as e:
            assert 'Could not make request to path' in str(e)

        try:
            urls = ('https://[email protected]', )
            client = MultiRESTClient(urls)
            assert False, 'must raise an assertion error'
        except Exception as e:
            print '!!! previous exception is OK, we expected that'
            assert 'if set, user AND pwd required' in e.args  # not tested (SV)
Пример #2
0
 def __init__(self, url=WEBLYZARD_API_URL, usr=WEBLYZARD_API_USER,
              pwd=WEBLYZARD_API_PASS, default_timeout=None):
     '''
     :param url: URL of the jeremia web service
     :param usr: optional user name
     :param pwd: optional password
     '''
     MultiRESTClient.__init__(self, service_urls=url, user=usr, password=pwd,
                              default_timeout=default_timeout)
Пример #3
0
 def __init__(self, url=WEBLYZARD_API_URL, usr=WEBLYZARD_API_USER,
              pwd=WEBLYZARD_API_PASS, default_timeout=None):
     '''
     :param url: URL of the jeremia web service
     :param usr: optional user name
     :param pwd: optional password
     '''
     MultiRESTClient.__init__(self, service_urls=url, user=usr, password=pwd,
                              default_timeout=default_timeout)
 def __init__(self,
              url=WEBLYZARD_API_URL,
              usr=WEBLYZARD_API_USER,
              pwd=WEBLYZARD_API_PASS,
              default_timeout=None):
     MultiRESTClient.__init__(self,
                              service_urls=url,
                              user=usr,
                              password=pwd,
                              default_timeout=default_timeout)
Пример #5
0
    def test_randomize_urls(self):
        ''' this test might fail, if random returns the same list, but this is
        very unlikely '''
        client = MultiRESTClient(service_urls='http://test.url',
                                 use_random_server=True)

        assert isinstance(client._service_urls, list)
        assert len(client._service_urls) == 1

        service_urls = ['http://test.url%s' % i for i in range(1000)]

        client = MultiRESTClient(service_urls=service_urls,
                                 use_random_server=True)

        assert len(client._service_urls) == len(service_urls)
        assert service_urls <> client._service_urls
Пример #6
0
    def test_multi_request(self):
        urls = (('http://irgendwas.com', None, None),
                ('http://*****:*****@irgendwas.com', 'heinz', 'secret'))
        service_urls = [url[0] for url in urls]
        client = MultiRESTClient(service_urls)

        for i, (service_url, user, passwd) in enumerate(urls):
            c = client.clients[i]
            if user:
                assert service_url != c.service_url
            assert c.user == user
            assert c.password == passwd

        try:
            client.request('irgendwas')
            assert False
        except Exception as e:
            assert 'Could not make request to path' in str(e)

        try:
            urls = ('https://[email protected]', )
            client = MultiRESTClient(urls)
            assert False, 'must raise an assertion error'
        except Exception as e:
            print '!!! previous exception is OK, we expected that'
            assert 'if set, user AND pwd required' in e.args  # not tested (SV)
Пример #7
0
 def __init__(self, url=WEBLYZARD_API_URL, usr=WEBLYZARD_API_USER,
              pwd=WEBLYZARD_API_PASS, default_timeout=None):
     MultiRESTClient.__init__(self, service_urls=url, user=usr, password=pwd,
                              default_timeout=default_timeout)
Пример #8
0
 def __init__(self, url, default_timeout=None):
     self.url = url
     self.default_timeout = default_timeout
     self.multiRestclient = MultiRESTClient(self.url)
Пример #9
0
class Joanna(object):
    """
    Joanna Nilsimsa web service client
    Available endpoints:
        /load/:portalName/:sourceId/:daysBack
            - GET: load the nilsimsa hashes for a portal with sourceId
              and days back to load
            - Python client function: reload_source_nilsimsa
        /is_similar/:sourceId/:nilsimsaHash
            - Returns true or false for a given nilsimsa hash
              with a sourceId
            - Python client function: similar_document
        /get_hashes/:sourceId
            - GET: return the list of hashes for a given sourceId
            - Python client function: get_hashes
        /clean_hashes
            - GET: cleans cached hash lists by removing outdated
              elements and duplicates
            - Python client function: clean_hashes
        /version
            - GET: return the current version of the API
            - Python client function: version
        /status
            - GET: return the status of the API.
              If functioning it will return "ONLINE"
            - Python client function: status
        /batchIsSimilar/:portalName/:sourceId/:daysBack
            - POST: make a batch of nilsimsa. If the sourceId isn't
               present it will make a /load request instead.
               The client will try again to return the batch request.
            - Returns:
                Dictionary of hash and similarity
                {hash:similarity-bool}
                Similarity: False means it is not similar to
                anything with that sourceId
            - Python client function: similar_documents
    Example usage:
        jo = Joanna(url="http://localhost:8080")
    """

    def __init__(self, url, default_timeout=None):
        self.url = url
        self.default_timeout = default_timeout
        self.multiRestclient = MultiRESTClient(self.url)

    def get_hashes(self, sourceId, portalName):
        ''' Return the hashes for a specific source and portal
        '''
        request_url = "get_hashes/{}/{}".format(sourceId, portalName)
        return self.multiRestclient.request(request_url)

    def get_hash_size(self, sourceId, portalName):
        ''' Return the hashes for a specific source and portal
        '''
        request_url = "hashes_size/{}/{}".format(sourceId, portalName)
        return self.multiRestclient.request(request_url)

    def clean_hashes(self):
        ''' Make a request to clean old nilsimsa hashes
        '''
        request_url = "clean_hashes"
        return self.multiRestclient.request(request_url)

    def similar_document(self, sourceId, nilsimsa, portalName,
                         daysBack=None, nilsimsa_threshold=5):
        ''' Get the similarity of a single document.
        Expected response: Boolean True or False
        '''
        if daysBack is None:
            daysBack = 20
        request_url = "is_similar/{}/{}/{}/{}/{}".format(
            portalName, sourceId, nilsimsa, daysBack, nilsimsa_threshold)

        result = self.multiRestclient.request(
            request_url, return_plain=True)

        if result == "LOADED":
            result = self.multiRestclient.request(
                request_url, return_plain=True)
        else:
            return result

    def similar_documents(self, sourceId, portalName, contentIds_nilsimsa_dict,
                          daysBack=20, nilsimsa_threshold=5):
        """ Uses PostRequest instead of the eWRT MultiRESTClient
         for finer control of the connection codes for retries
             result: {hash:boolean, ..}
        """
        max_retry_delay = DEFAULT_MAX_RETRY_DELAY
        max_retry_attempts = DEFAULT_MAX_RETRY_ATTEMPTS
        nilsimsa_threshold = int(nilsimsa_threshold)
        if daysBack is None:
            daysBack = DAYS_BACK_DEFAULT

        if not (sourceId or contentIds_nilsimsa_dict):
            logger.error("Arguments missing")
            return
        if isinstance(contentIds_nilsimsa_dict, basestring):
            logger.error("Expected dict. Please use single_document")
            raise ValueError('Expected a dictionary, got a string')
        if isinstance(contentIds_nilsimsa_dict, list):
            logger.error("Expected dict. Got a list.")
            raise ValueError('Expected a dictionary, got a list.')

        request_url = "batchIsSimilar/{}/{}/{}/{}".format(
            portalName, sourceId, daysBack, nilsimsa_threshold)

        req = PostRequest(self.url + '/' + request_url,
                          contentIds_nilsimsa_dict)

        attempts = 0
        conn_code = -1

        while attempts < max_retry_attempts and conn_code != 204:
            conn = req.request()
            conn_code = conn.code
            if conn.code == 200:
                logger.info('successful request')
                data = conn.read()
                if data == "LOADED":
                    logger.info("Nilsimsas loaded from db. \
                    Sending request again for results..")
                elif data == "LOADING":
                    logger.info("Nilsimsas loading from db. \
                    Sending request again for results..")
                    sleep(2)
                else:
                    attempts = max_retry_attempts
                    json_data = json.loads(data)
                    for content_id, h in contentIds_nilsimsa_dict.iteritems():
                        if h not in json_data:
                            json_data[h] = 'true'
                    return json_data
            elif conn.code == 204:
                data = conn.read()
                logger.info('No content found attempts {} {}', attempts, data)
            elif conn.code == 400:
                logger.error('Bad request.. 404 error')
                data = conn.read()
                logger.error('Err: %s', data)
            elif conn.code == 500:
                data = conn.read()
                logger.error(
                    'Server failure: attempts %d %s', attempts, data)
            sleep(max_retry_delay * random())
            attempts += 1

    def reload_source_nilsimsa(self, sourceId, portal_db, daysBack=20):
        if daysBack is None:
            daysBack = DAYS_BACK_DEFAULT
        request = "load/{}/{}/{}".format(portal_db, sourceId, daysBack)
        return self.multiRestclient.request(request, return_plain=True)

    def status(self):
        return self.multiRestclient.request('status', return_plain=True)

    def version(self):
        return self.multiRestclient.request('version', return_plain=True)

    def rand_strings(self, num_docs):
        import os
        import binascii
        docs_to_send = []
        for _ in xrange(num_docs):
            rand_str = str(binascii.b2a_hex(os.urandom(16)))
            docs_to_send.append(rand_str)
        return docs_to_send