def save_video(ts_file_list, file_path): file_path = tool.change_path_encoding(file_path) file_handle = open(file_path, "wb") for ts_file_url in ts_file_list: ts_file_return_code, ts_file_data = tool.http_request(ts_file_url)[:2] if ts_file_return_code == 1: file_handle.write(ts_file_data) else: return False file_handle.close() return True
def save_net_file(file_url, file_path, need_content_type=False, header_list=None, cookies_list=None): file_path = tool.change_path_encoding(file_path) # 判断保存目录是否存在 if not tool.make_dir(os.path.dirname(file_path), 0): return False create_file = False for retry_count in range(0, 5): response = http_request(file_url, header_list=header_list, cookies_list=cookies_list, read_timeout=60) if response.status == HTTP_RETURN_CODE_SUCCEED: # response中的Content-Type作为文件后缀名 if need_content_type: content_type = response.getheader("Content-Type") if content_type is not None and content_type != "octet-stream": file_path = os.path.splitext(file_path)[0] + "." + content_type.split("/")[-1] # 下载 with open(file_path, "wb") as file_handle: file_handle.write(response.data) create_file = True # 判断文件下载后的大小和response中的Content-Length是否一致 content_length = response.getheader("Content-Length") if content_length is None: return {"status": 1, "code": 0, "file_path": file_path} file_size = os.path.getsize(file_path) if int(content_length) == file_size: return {"status": 1, "code": 0, "file_path": file_path} else: tool.print_msg("本地文件%s:%s和网络文件%s:%s不一致" % (file_path, content_length, file_url, file_size)) elif response.status == HTTP_RETURN_CODE_URL_INVALID: if create_file: os.remove(file_path) return {"status": 0, "code": -1} # 超过重试次数,直接退出 elif response.status == HTTP_RETURN_CODE_RETRY: if create_file: os.remove(file_path) return {"status": 0, "code": -2} # 500锡类错误,重试 elif response.status in [500, 502, 503, 504]: pass # 其他http code,退出 else: if create_file: os.remove(file_path) return {"status": 0, "code": response.status} if create_file: os.remove(file_path) return {"status": 0, "code": -3}
def save_net_file_list(file_url_list, file_path, header_list=None): file_path = tool.change_path_encoding(file_path) # 判断保存目录是否存在 if not tool.make_dir(os.path.dirname(file_path), 0): return False for retry_count in range(0, 5): # 下载 with open(file_path, "wb") as file_handle: for file_url in file_url_list: response = http_request(file_url, header_list=header_list, read_timeout=60) if response.status == HTTP_RETURN_CODE_SUCCEED: file_handle.write(response.data) # 超过重试次数,直接退出 elif response.status == HTTP_RETURN_CODE_RETRY: os.remove(file_path) return {"status": 0, "code": -1} # 其他http code,退出 else: os.remove(file_path) return {"status": 0, "code": response.status} return {"status": 1, "code": 0} # os.remove(file_path) return {"status": 0, "code": -2}
def read_save_data(save_data_path, key_index, default_value_list): result_list = {} if not os.path.exists(tool.change_path_encoding(save_data_path)): return result_list for single_save_data in tool.read_file(save_data_path, 2): single_save_data = single_save_data.replace("\xef\xbb\xbf", "").replace("\n", "").replace("\r", "") if len(single_save_data) == 0: continue single_save_list = single_save_data.split("\t") # 根据default_value_list给没给字段默认值 index = 0 for default_value in default_value_list: # _开头表示和该数组下标的值一直,如["", "_0"] 表示第1位为空时数值和第0位一致 if default_value != "" and default_value[0] == "_": default_value = single_save_list[int(default_value.replace("_", ""))] if len(single_save_list) <= index: single_save_list.append(default_value) if single_save_list[index] == "": single_save_list[index] = default_value index += 1 result_list[single_save_list[key_index]] = single_save_list return result_list
def save_image(image_byte, image_path): image_path = tool.change_path_encoding(image_path) image_file = open(image_path, "wb") image_file.write(image_byte) image_file.close()
if special_attribute: special_attribute = special_attribute[special_attribute.find(">") + 1:] special_attribute = special_attribute.replace("</span>", "").replace('<span class="d3-color-magic">', "").replace('<span class="value">', "") special_attribute = special_attribute.replace("'", "’") item_introduction = tool.find_sub_string(item_detail, '<div class="item-flavor d3-color-orange serif">', "</div>").strip() item_introduction = item_introduction.replace("'", "’") print item_position, item_name, special_attribute, item_introduction item_attribute_list[item_path].append([item_name, special_attribute, item_introduction]) else: print "error get" + item_url else: print "error get" + item_index_url pagination = tool.find_sub_string(item_index_page, '<ul class="ui-pagination">', '</ul>') if pagination: pagination = re.findall('<a href="#page=([\d]*)">', pagination) max_page = 1 for page in pagination: max_page = max(max_page, int(page)) if page_count < max_page: page_count += 1 continue break tool.make_dir("data", 0) for item_path in item_attribute_list: file_handle = open(tool.change_path_encoding("data\%s.txt" % item_list[item_path]), 'w') for item in item_attribute_list[item_path]: file_handle.write("\t".join(item) + "\n") file_handle.close()
def read_config(config_path): config = ConfigParser.SafeConfigParser() with codecs.open(tool.change_path_encoding(config_path), encoding="UTF-8-SIG") as file_handle: config.readfp(file_handle) return config