def Bind(self, configuration): """Bind to LDAP, retrying if necessary.""" # If the server is unavailable, we are going to find out now, as this # actually initiates the network connection. retry_count = 0 while retry_count < configuration['retry_max']: self.log.debug('opening ldap connection and binding to %s', configuration['uri']) try: if 'use_sasl' in configuration and configuration['use_sasl']: if ('sasl_mech' in configuration and configuration['sasl_mech'] and configuration['sasl_mech'].lower() == 'gssapi'): sasl = ldap.sasl.gssapi(configuration['sasl_authzid']) # TODO: Add other sasl mechs else: raise error.ConfigurationError('SASL mechanism not supported') self.conn.sasl_interactive_bind_s('', sasl) else: self.conn.simple_bind_s(who=configuration['bind_dn'], cred=str(configuration['bind_password'])) break except ldap.SERVER_DOWN, e: retry_count += 1 self.log.warning('Failed LDAP connection: attempt #%s.', retry_count) self.log.debug('ldap error is %r', e) if retry_count == configuration['retry_max']: self.log.debug('max retries hit') raise error.SourceUnavailable(e) self.log.debug('sleeping %d seconds', configuration['retry_delay']) time.sleep(configuration['retry_delay'])
def HandleCurlError(e, logger=None): """Handle a curl exception. See http://curl.haxx.se/libcurl/c/libcurl-errors.html for a list of codes. Args: e: pycurl.error logger: logger object Raises: ConfigurationError: PermissionDenied: SourceUnavailable: Error: """ if not logger: logger = logging code = e[0] msg = e[1] # Config errors if code in (pycurl.E_UNSUPPORTED_PROTOCOL, pycurl.E_URL_MALFORMAT, pycurl.E_SSL_ENGINE_NOTFOUND, pycurl.E_SSL_ENGINE_SETFAILED, pycurl.E_SSL_CACERT_BADFILE): raise error.ConfigurationError(msg) # Possibly transient errors, try again if code in (pycurl.E_FAILED_INIT, pycurl.E_COULDNT_CONNECT, pycurl.E_PARTIAL_FILE, pycurl.E_WRITE_ERROR, pycurl.E_READ_ERROR, pycurl.E_OPERATION_TIMEOUTED, pycurl.E_SSL_CONNECT_ERROR, pycurl.E_COULDNT_RESOLVE_PROXY, pycurl.E_COULDNT_RESOLVE_HOST, pycurl.E_GOT_NOTHING): logger.debug('Possibly transient error: %s', msg) return # SSL issues if code in (pycurl.E_SSL_PEER_CERTIFICATE, ): raise error.SourceUnavailable(msg) # Anything else raise error.Error(msg)
def GetUpdates(self, s3_client, bucket, obj, since): """Get updates from a source. Args: s3_client: initialized s3 client bucket: s3 bucket obj: object with the data since: a timestamp representing the last change (None to force-get) Returns: A tuple containing the map of updates and a maximum timestamp Raises: ValueError: an object in the source map is malformed ConfigurationError: """ try: if since is not None: response = s3_client.get_object( Bucket=bucket, IfModifiedSince=self.FromTimestampToDateTime(since), Key=obj) else: response = s3_client.get_object(Bucket=bucket, Key=obj) body = response['Body'] last_modified_ts = self.FromDateTimeToTimestamp( response['LastModified']) except ClientError as e: error_code = int(e.response['Error']['Code']) if error_code == 304: return [] self.log.error('error getting S3 object ({}): {}'.format(obj, e)) raise error.SourceUnavailable('unable to download object from S3') data_map = self.GetMap(cache_info=body) data_map.SetModifyTimestamp(last_modified_ts) return data_map
def GetUpdates(self, source, url, since): """Get updates from a source. Args: source: A data source url: url to the data we want since: a timestamp representing the last change (None to force-get) Returns: A tuple containing the map of updates and a maximum timestamp Raises: ValueError: an object in the source map is malformed ConfigurationError: """ proto = url.split(':')[0] # Newer libcurl allow you to disable protocols there. Unfortunately # it's not in dapper or hardy. if proto not in ('http', 'https'): raise error.ConfigurationError('Unsupported protocol %s' % proto) conn = source.conn conn.setopt(pycurl.OPT_FILETIME, 1) conn.setopt(pycurl.ENCODING, 'bzip2, gzip') if since is not None: conn.setopt(pycurl.TIMEVALUE, int(since)) conn.setopt(pycurl.TIMECONDITION, pycurl.TIMECONDITION_IFMODSINCE) retry_count = 0 resp_code = 500 while retry_count < source.conf['retry_max']: try: source.log.debug('fetching %s', url) (resp_code, headers, body) = curl.CurlFetch(url, conn, self.log) self.log.debug('response code: %s', resp_code) finally: if resp_code < 400: # Not modified-since if resp_code == 304: return [] if resp_code == 200: break retry_count += 1 self.log.warning('Failed connection: attempt #%s.', retry_count) if retry_count == source.conf['retry_max']: self.log.debug('max retries hit') raise error.SourceUnavailable('Max retries exceeded.') time.sleep(source.conf['retry_delay']) headers = headers.split('\r\n') last_modified = conn.getinfo(pycurl.INFO_FILETIME) self.log.debug('last modified: %s', last_modified) if last_modified == -1: for header in headers: if header.lower().startswith('last-modified'): self.log.debug('%s', header) http_ts_string = header[header.find(':') + 1:].strip() last_modified = self.FromHttpToTimestamp(http_ts_string) break else: http_ts_string = '' else: http_ts_string = self.FromTimestampToHttp(last_modified) self.log.debug('Last-modified is: %s', http_ts_string) # curl (on Ubuntu hardy at least) will handle gzip, but not bzip2 try: response = cStringIO.StringIO(bz2.decompress(body)) self.log.debug('bzip encoding found') except IOError: response = cStringIO.StringIO(body) data_map = self.GetMap(cache_info=response) if http_ts_string: http_ts = self.FromHttpToTimestamp(http_ts_string) self.log.debug('setting last modified to: %s', http_ts) data_map.SetModifyTimestamp(http_ts) return data_map