def get_target(self,url,target_prefix=""): content, ts = self.get_html(url) if content == -1: return -1 data = json.loads(content)['data'] if data: try: ps = parser(data) return ps except Exception, e: print e return -1
def get_target(self,url,target_prefix=""): content, ts = self.get_html(url) if content == -1: return -1 lines = content.splitlines() for line in lines: if line.startswith(target_prefix): n = line.find('html":"') target = line[n+7:-12] target = target.replace("\\t","") target = target.replace("\\n","") target = target.replace("\\r",'') target = target.replace("\\",'') ps = parser(target) if ps == None: #logger.logger_error("bad content") print "why here..." return -1 return ps #logger.logger_error("bad content, please check the resource") print "bad content, please check the resource" save_to_disk("tmp.html",content) return -1
def get_target(self, url, target_prefix=""): content, ts = self.get_html(url) if content == -1: return -1 lines = content.splitlines() for line in lines: if line.startswith(target_prefix): n = line.find('html":"') target = line[n + 7:-12] target = target.replace("\\t", "") target = target.replace("\\n", "") target = target.replace("\\r", '') target = target.replace("\\", '') ps = parser(target) if ps == None: #logger.logger_error("bad content") print "why here..." return -1 return ps #logger.logger_error("bad content, please check the resource") print "bad content, please check the resource" save_to_disk("tmp.html", content) return -1