def processSection(self, url): actualPage = 1 sectionLenght = None while True: urll = url + "/" + str(actualPage) page = getDataFrom(urll) # print "PS for " + urll if page is None and sectionLenght is None: print "none data, return" return elif page is None and sectionLenght != None: print "none data, continue" continue pageParser = PageParser(page) if not sectionLenght: sectionLenght = pageParser.getSectionLenght() # print "sectionLenght is" + str(sectionLenght) itemList = pageParser.getListOfItems() self.processData(itemList) self.addItemToFinalList("source", urll) self.addItemToFinalList("section", "netdoktor") self.addItemToFinalList("lang", "de") self.storeToFile(actualPage) if actualPage == sectionLenght: return actualPage += 1
def processSection(self, url): actualPage = 1 sectionLenght = None while(True): urll = url + "/" + str(actualPage) page = getDataFrom(urll) #print "PS for " + urll if page is None and sectionLenght is None: print "none data, return" return elif page is None and sectionLenght != None: print "none data, continue" continue pageParser = PageParser(page) if not sectionLenght: sectionLenght = pageParser.getSectionLenght() #print "sectionLenght is" + str(sectionLenght) itemList = pageParser.getListOfItems() self.processData(itemList) self.addItemToFinalList("source", urll) self.addItemToFinalList("section", "netdoktor") self.addItemToFinalList("lang", "de") self.storeToFile(actualPage) if actualPage == sectionLenght: return actualPage += 1
def updateContent(self, url): # print " updating from " + url logging.debug("UPDATING data from - %s", url) page = getDataFrom(url) if page == None: return pageParser = PageParser(page) notCached = pageParser.getEntriesList() # print notCached eToStore = self._cache.cache(url, notCached) for i in range(len(eToStore)): eToStore[i]["link"] = url logging.debug("ADD %d new entries", len(eToStore)) self.dataToStore.extend(eToStore) pass
def updateContent(self, url): #print " updating from " + url logging.debug("UPDATING data from - %s",url) page = getDataFrom(url) if page == None: return pageParser = PageParser(page) notCached = pageParser.getEntriesList() #print notCached eToStore = self._cache.cache(url, notCached) for i in range(len(eToStore)): eToStore[i]["link"] = url logging.debug("ADD %d new entries", len(eToStore)) self.dataToStore.extend(eToStore) pass