def list(self, marker): with open(self._url_list_file, 'r') as f: for line in f: try: field = line.split() if len(field) < 1: logger.warn("{} is invalid".format(line)) continue check_value = None url_path = None if len(field) == 1: url_path = field[0] else: check_value = field[0].strip() url_path = field[1] ret = urlparse.urlparse(url_path) if ret.path == '': logger.warn("{} is invalid, No path".format(line)) continue # use HEAD to get object size file_size = None try: response = requests.head(url_path, timeout=5) if response.status_code == 200: file_size = response.headers['content-length'] except Exception as e: logger.exception("HEAD object failed with " + str(e)) continue logger.info("yield new object: {}".format(str({'store_path': ret.path.strip(), 'url_path': url_path.strip()}))) yield task.Task(ret.path.strip()[1:], file_size, url_path.strip(), check_value) except Exception: logger.warn("{} is invalid".format(line))
def list(self): with open(self._url_list_file, 'r') as f: for line in f: try: field = line.split('\t') if len(field) < 1: logger.warn("{} is invalid".format(line)) continue check_value = None url_path = None if len(field) == 1: url_path = field[0] else: check_value = field[0].strip() url_path = field[1] ret = urlparse.urlparse(url_path) if ret.path == '': logger.warn("{} is invalid, No path".format(line)) logger.info("yield new object: {}".format( str({ 'store_path': ret.path.strip(), 'url_path': url_path.strip() }))) yield task.Task(ret.path.strip()[1:], check_value, url_path.strip()) except Exception: logger.warn("{} is invalid".format(line))