def test_item_inheritance(self): class BaseItem(Item): class Meta: find_query = '//player' name = StringField('firstname') age = IntegerField('age') class ChildItem(BaseItem): name = StringField('lastname') items = list(BaseItem.find(xml_fromstring(XML))) self.assertEqual(items[0].name, 'Ardeshir') self.assertEqual(items[0].age, 19) self.assertEqual(set(['name', 'age']), set(items[0]._fields.keys())) items = list(ChildItem.find(xml_fromstring(XML))) self.assertEqual(items[0].age, 19) self.assertEqual(set(['name', 'age']), set(items[0]._fields.keys()))
def get_item(self): return Player(xml_fromstring(XML))
def test_stringfield_multiple(self): class GameItem(Item): names = StringField('//game/@name', multiple=True) game = GameItem(xml_fromstring(XML)) self.assertEqual(['quake1', 'quake2'], game.names)
def test_find(self): games = list(GameItem.find(xml_fromstring(XML))) self.assertEqual(['quake1', 'quake2'], [x.name for x in games])
def get_data(self, url, force_type = False): response_type = "unknown" req = urllib2.Request(url = url, headers = self.headers) data = False log(self.__class__.__name__ + ": Trying to open %s." % url) try: response = urllib2.urlopen(req) except (urllib2.URLError, urllib2.HTTPError) as e: if DEBUG: log(self.__class__.__name__ + ": %s" % str(e)) log(self.__class__.__name__ + ": Error while opening %s, Fatal." % url ) return(False, response_type) if response.getcode() == 200: response_info = response.info() if "Content-Type" in response_info: if (response_info["Content-Type"].lower().find('json')) > -1: response_type="json" elif (response_info["Content-Type"].lower().find('xml')) > -1: response_type = "xml" else: print(response_info["Content-Type"]) if force_type: if not force_type == response_type: response_type = force_type log(self.__class__.__name__ + ": Response-type data forced to '%s' from %s." % (response_type, url)) else: log(self.__class__.__name__ + ": Response-type data '%s' from %s." % (response_type, url)) else: log(self.__class__.__name__ + ": Response-type data '%s' from %s." % (response_type, url)) if "Content-Length" in response_info: if int(response_info["Content-Length"]) < 5: # No json data available, #doesnotexist log(self.__class__.__name__ + ": Getting %s bytes from %s" % (response_info["Content-Length"], url)) else: log(self.__class__.__name__ + ": Getting %s bytes from %s" % (response_info["Content-Length"], url)) else: log(self.__class__.__name__+ ": Getting ? bytes from %s." % (url)) try: data = response.read() log(self.__class__.__name__+ ": Got %i bytes from %s." % (len(data), url)) except: log(self.__class__.__name__ + ": Error while reading data.") return(False, response_type) if not len(data) > 0: log(self.__class__.__name__+ ": Data size to small (%i bytes) from %s." % (len(data), url)) return(False, response_type) else: log(self.__class__.__name__ + ": Did not get a 200 ok response, got %i." % (response.getcode())) return(False, response_type) if response_type == "xml": try: data = xml_fromstring(data) log(self.__class__.__name__ + ": Converted raw data to xmletree object.") walker=data.iter() for item in walker: print(item, item.tag, item.text) except: log(self.__class__.__name__ + ": Error while converting raw data to xml.") return(False, response_type) if response_type == "feed": try: data = feedparser.parse(data) try: data["feed"].pop("updated_parsed") except: pass try: for item in data["entries"]: item.pop("updated_parsed") except: pass log(self.__class__.__name__ + ": Converted raw data to feed object.") except: log(self.__class__.__name__ + ": Error while converting raw data to feed.") return(False, response_type) if response_type == "json": try: data = loads(data) log(self.__class__.__name__ + ": Converted raw data to json.") except: log(self.__class__.__name__ + ": Error while converting raw data to json.") return(False, response_type) return(data, response_type)
def get_data(self, url, force_type = False): response_type = "unknown" req = urllib2.Request(url = url, headers = self.headers) data = False log(self.__class__.__name__ + ": Trying to open %s." % url) try: response = urllib2.urlopen(req) except (urllib2.URLError, urllib2.HTTPError) as e: if DEBUG: log(self.__class__.__name__ + ": %s" % str(e)) log(self.__class__.__name__ + ": Error while opening %s, Fatal." % url ) return(False, response_type) if response.getcode() == 200: response_info = response.info() if "Content-Type" in response_info: if (response_info["Content-Type"].lower().find('json')) > -1: response_type="json" elif (response_info["Content-Type"].lower().find('xml')) > -1: response_type = "xml" else: print(response_info["Content-Type"]) if force_type: if not force_type == response_type: response_type = force_type log(self.__class__.__name__ + ": Response-type data forced to '%s' from %s." % (response_type, url)) else: log(self.__class__.__name__ + ": Response-type data '%s' from %s." % (response_type, url)) else: log(self.__class__.__name__ + ": Response-type data '%s' from %s." % (response_type, url)) if "Content-Length" in response_info: if int(response_info["Content-Length"]) < 5: # No json data available, #doesnotexist log(self.__class__.__name__ + ": Getting %s bytes from %s" % (response_info["Content-Length"], url)) else: log(self.__class__.__name__ + ": Getting %s bytes from %s" % (response_info["Content-Length"], url)) else: log(self.__class__.__name__+ ": Getting ? bytes from %s." % (url)) try: data = response.read() log(self.__class__.__name__+ ": Got %i bytes from %s." % (len(data), url)) except: log(self.__class__.__name__ + ": Error while reading data.") return(False, response_type) if not len(data) > 0: log(self.__class__.__name__+ ": Data size to small (%i bytes) from %s." % (len(data), url)) return(False, response_type) else: log(self.__class__.__name__ + ": Did not get a 200 ok response, got %i." % (response.getcode())) return(False, response_type) if response_type == "xml": try: data = xml_fromstring(data) log(self.__class__.__name__ + ": Converted raw data to xmletree object.") walker=data.iter() for item in walker: print(item, item.tag, item.text) except: log(self.__class__.__name__ + ": Error while converting raw data to xml.") return(False, response_type) if response_type == "feed": try: data = feedparser.parse(data) try: data["feed"].pop("updated_parsed") except: pass log(self.__class__.__name__ + ": Converted raw data to feed object.") #walker=data.iter() #for item in walker: # print(item, item.tag, item.text) except: log(self.__class__.__name__ + ": Error while converting raw data to feed.") return(False, response_type) if response_type == "json": try: data = loads(data) log(self.__class__.__name__ + ": Converted raw data to json.") except: log(self.__class__.__name__ + ": Error while converting raw data to json.") return(False, response_type) return(data, response_type)