Exemplo n.º 1
0
def _get_qvalue(accept_types, content_type_string):
    content_type = httpheader.content_type(content_type_string)
    class_qvalue = None
    universal_wildcard_qvalue = None
    ret = 0
    for accept_content_type, qvalue, _ in accept_types:
        if accept_content_type == content_type:
            ret = qvalue
            break
        elif accept_content_type.is_wildcard() and accept_content_type.major == content_type.major:
            class_qvalue = qvalue
        elif accept_content_type.is_universal_wildcard():
            universal_wildcard_qvalue = qvalue
    else:
        if class_qvalue is not None:
            ret = class_qvalue
        if universal_wildcard_qvalue is not None:
            ret = universal_wildcard_qvalue
    return ret
Exemplo n.º 2
0
    def __init__(self, name, additional_headers={}):
        """
		@param name: URL to be opened
		@keyword additional_headers: additional HTTP request headers to be added to the call
		"""
        try:
            req = urllib2.Request(url=name)

            for key in additional_headers:
                req.add_header(key, additional_headers[key])
            if 'Accept' not in additional_headers:
                req.add_header('Accept', 'text/html, application/xhtml+xml')

            self.data = urllib2.urlopen(req)
            self.headers = self.data.info()

            if URIOpener.CONTENT_TYPE in self.headers:
                # The call below will remove the possible media type parameters, like charset settings
                ct = httpheader.content_type(
                    self.headers[URIOpener.CONTENT_TYPE])
                self.content_type = ct.media_type
                if 'charset' in ct.parmdict:
                    self.charset = ct.parmdict['charset']
                else:
                    self.charset = None
                # print
            else:
                # check if the suffix can be used for the content type; this may be important
                # for file:// type URI or if the server is not properly set up to return the right
                # mime type
                self.charset = None
                self.content_type = ""
                for suffix in preferred_suffixes.keys():
                    if name.endswith(suffix):
                        self.content_type = preferred_suffixes[suffix]
                        break

            if URIOpener.CONTENT_LOCATION in self.headers:
                self.location = urlparse.urljoin(
                    self.data.geturl(),
                    self.headers[URIOpener.CONTENT_LOCATION])
            else:
                self.location = name

            self.expiration_date = datetime.datetime.utcnow(
            ) + datetime.timedelta(days=1)
            if URIOpener.EXPIRES in self.headers:
                try:
                    # Thanks to Deron Meranda for the HTTP date conversion method...
                    self.expiration_date = httpheader.parse_http_datetime(
                        self.headers[URIOpener.EXPIRES])
                except:
                    # The Expires date format was wrong, sorry, forget it...
                    pass

            self.last_modified_date = None
            if URIOpener.LAST_MODIFIED in self.headers:
                try:
                    # Thanks to Deron Meranda for the HTTP date conversion method...
                    self.last_modified_date = httpheader.parse_http_datetime(
                        self.headers[URIOpener.LAST_MODIFIED])
                except:
                    # The last modified date format was wrong, sorry, forget it...
                    pass

        except urllib2.HTTPError, e:
            from pyRdfa import HTTPError
            raise HTTPError('%s' % e, e.code)
Exemplo n.º 3
0
	def __init__(self, name, additional_headers = {}) :
		"""
		@param name: URL to be opened
		@keyword additional_headers: additional HTTP request headers to be added to the call
		"""		
		try :
			req = urllib2.Request(url=name)

			for key in additional_headers :
				req.add_header(key, additional_headers[key])
			if 'Accept' not in additional_headers :
				req.add_header('Accept', 'text/html, application/xhtml+xml')
				
			self.data		= urllib2.urlopen(req)
			self.headers	= self.data.info()
			
			if URIOpener.CONTENT_TYPE in self.headers :
				# The call below will remove the possible media type parameters, like charset settings
				ct = httpheader.content_type(self.headers[URIOpener.CONTENT_TYPE])
				self.content_type = ct.media_type
				if 'charset' in ct.parmdict :
					self.charset = ct.parmdict['charset']
				else :
					self.charset = None
				# print
			else :
				# check if the suffix can be used for the content type; this may be important
				# for file:// type URI or if the server is not properly set up to return the right
				# mime type
				self.charset = None
				self.content_type = ""
				for suffix in preferred_suffixes.keys() :
					if name.endswith(suffix) :
						self.content_type = preferred_suffixes[suffix]
						break
			
			if URIOpener.CONTENT_LOCATION in self.headers :
				self.location = urlparse.urljoin(self.data.geturl(),self.headers[URIOpener.CONTENT_LOCATION])
			else :
				self.location = name
			
			self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(days=1)
			if URIOpener.EXPIRES in self.headers :
				try :
					# Thanks to Deron Meranda for the HTTP date conversion method...
					self.expiration_date = httpheader.parse_http_datetime(self.headers[URIOpener.EXPIRES])
				except :
					# The Expires date format was wrong, sorry, forget it...
					pass

			self.last_modified_date = None
			if URIOpener.LAST_MODIFIED in self.headers :
				try :
					# Thanks to Deron Meranda for the HTTP date conversion method...
					self.last_modified_date = httpheader.parse_http_datetime(self.headers[URIOpener.LAST_MODIFIED])
				except :
					# The last modified date format was wrong, sorry, forget it...
					pass
				
		except urllib2.HTTPError, e :
			from pyRdfa import HTTPError
			raise HTTPError('%s' % e, e.code)