示例#1
0
def read_file(file_path, read_type=READ_FILE_TYPE_FULL):
    """Read local file

    :param file_path:
        the path of file

    :param read_type:
        READ_FILE_TYPE_FULL     read full file
        READ_FILE_TYPE_LINE     read each line of file

    :return:
        READ_FILE_TYPE_FULL     type of string
        READ_FILE_TYPE_LINE     type of list
    """
    file_path = path.change_path_encoding(file_path)
    if not os.path.exists(file_path):
        if read_type == 1:
            return ""
        else:
            return []
    with open(file_path, "r") as file_handle:
        if read_type == 1:
            result = file_handle.read()
            if result[-1] == "\n":
                result = result[:-1]
        else:
            result = []
            for line in file_handle.readlines():
                if line[-1] == "\n":
                    line = line[:-1]
                result.append(line)
    return result
示例#2
0
def read_save_data(save_data_path, key_index, default_value_list):
    result_list = {}
    if not os.path.exists(path.change_path_encoding(save_data_path)):
        return result_list
    for single_save_data in tool.read_file(save_data_path, tool.READ_FILE_TYPE_LINE):
        single_save_data = single_save_data.replace("\xef\xbb\xbf", "").replace("\n", "").replace("\r", "")
        if len(single_save_data) == 0:
            continue
        single_save_list = single_save_data.split("\t")

        if single_save_list[key_index] in result_list:
            output.print_msg("存档中存在重复行 %s" % single_save_list[key_index])
            tool.process_exit()

        # 去除前后空格
        single_save_list = map(lambda value: value.strip(), single_save_list)

        # 根据default_value_list给没给字段默认值
        index = 0
        for default_value in default_value_list:
            # _开头表示和该数组下标的值一直,如["", "_0"] 表示第1位为空时数值和第0位一致
            if default_value != "" and default_value[0] == "_":
                default_value = single_save_list[int(default_value.replace("_", ""))]
            if len(single_save_list) <= index:
                single_save_list.append(default_value)
            if single_save_list[index] == "":
                single_save_list[index] = default_value
            index += 1
        result_list[single_save_list[key_index]] = single_save_list
    return result_list
示例#3
0
def read_config(config_path):
    """Read config file"""
    config = {}
    with codecs.open(path.change_path_encoding(config_path),
                     encoding="UTF-8-SIG") as file_handle:
        config_file = ConfigParser.SafeConfigParser()
        config_file.readfp(file_handle)
        for key, value in config_file.items("setting"):
            config[key.encode("UTF-8")] = value.encode("UTF-8")
    return config
示例#4
0
def get_file_md5(file_path):
    file_path = path.change_path_encoding(file_path)
    if not os.path.exists(file_path):
        return None
    md5_obj = hashlib.md5()
    with open(file_path, "rb") as file_handle:
        buffer_size = 2**20  # 1M
        while True:
            file_buffer = file_handle.read(buffer_size)
            if not file_buffer:
                break
            md5_obj.update(file_buffer)
    return md5_obj.hexdigest()
示例#5
0
def write_file(msg, file_path, append_type=WRITE_FILE_TYPE_APPEND):
    file_path = path.change_path_encoding(file_path)
    if not path.create_dir(os.path.dirname(file_path)):
        return False
    if append_type == WRITE_FILE_TYPE_APPEND:
        open_type = "a"
    elif append_type == WRITE_FILE_TYPE_REPLACE:
        open_type = "w"
    else:
        return False
    with open(file_path, open_type) as file_handle:
        if isinstance(msg, unicode):
            msg = msg.encode("UTF-8")
        file_handle.write(msg + "\n")
示例#6
0
def save_net_file_list(file_url_list,
                       file_path,
                       header_list=None,
                       cookies_list=None):
    """Visit web and save to local(multiple remote resource, single local file)

    :param file_url_list:
        the list of remote resource URL which you want to save

    :param file_path:
        the local file path which you want to save remote resource

    :param header_list:
        customize header dictionary

    :param cookies_list:
        customize cookies dictionary, will replaced header_list["Cookie"]

    :return:
        status      0 download failure, 1 download successful
        code        failure reason
    """
    file_path = path.change_path_encoding(file_path)
    # 判断保存目录是否存在
    if not path.create_dir(os.path.dirname(file_path)):
        return False
    for retry_count in range(0, HTTP_DOWNLOAD_RETRY_COUNT):
        # 下载
        with open(file_path, "wb") as file_handle:
            for file_url in file_url_list:
                response = http_request(
                    file_url,
                    header_list=header_list,
                    cookies_list=cookies_list,
                    connection_timeout=HTTP_DOWNLOAD_CONNECTION_TIMEOUT,
                    read_timeout=HTTP_DOWNLOAD_READ_TIMEOUT)
                if response.status == HTTP_RETURN_CODE_SUCCEED:
                    file_handle.write(response.data)
                # 超过重试次数,直接退出
                elif response.status == HTTP_RETURN_CODE_RETRY:
                    path.delete_dir_or_file(file_path)
                    return {"status": 0, "code": -1}
                # 其他http code,退出
                else:
                    path.delete_dir_or_file(file_path)
                    return {"status": 0, "code": response.status}
        return {"status": 1, "code": 0}
    # path.delete_dir_or_file(file_path)
    return {"status": 0, "code": -2}
示例#7
0
                        "</div>").strip()
                    item_introduction = item_introduction.replace("'", "’")
                    output.print_msg("%s %s %s %s" %
                                     (item_position, item_name,
                                      special_attribute, item_introduction))
                    item_attribute_list[item_path].append(
                        [item_name, special_attribute, item_introduction])
                else:
                    output.print_msg("error get" + item_url)
        else:
            output.print_msg("error get" + item_index_url)
        pagination = tool.find_sub_string(item_index_response.data,
                                          '<ul class="ui-pagination">',
                                          "</ul>")
        if pagination:
            pagination = re.findall('<a href="#page=([\d]*)">', pagination)
            max_page = 1
            for page in pagination:
                max_page = max(max_page, int(page))
            if page_count < max_page:
                page_count += 1
                continue
        break

path.create_dir("data")
for item_path in item_attribute_list:
    with open(path.change_path_encoding("data\%s.txt" % item_list[item_path]),
              "w") as file_handle:
        for item in item_attribute_list[item_path]:
            file_handle.write("\t".join(item) + "\n")
示例#8
0
def save_net_file(file_url, file_path, need_content_type=False, header_list=None, cookies_list=None, head_check=True):
    """Visit web and save to local

    :param file_url:
        the remote resource URL which you want to save

    :param file_path:
        the local file path which you want to save remote resource

    :param need_content_type:
        is auto rename file according to "Content-Type" in response headers

    :param header_list:
        customize header dictionary

    :param cookies_list:
        customize cookies dictionary, will replaced header_list["Cookie"]

    :param head_check:
        "HEAD" method request to check response status and file size before download file

    :return:
        status      0 download failure, 1 download successful
        code        failure reason
        file_path   finally local file path(when need_content_type is True, will rename it)
    """
    file_path = path.change_path_encoding(file_path)
    # 判断保存目录是否存在
    if not path.create_dir(os.path.dirname(file_path)):
        return False
    create_file = False
    for retry_count in range(0, HTTP_DOWNLOAD_RETRY_COUNT):
        if head_check:
            request_method = "HEAD"
        else:
            request_method = "GET"
        # 获取头信息
        response = http_request(file_url, request_method, header_list=header_list, cookies_list=cookies_list,
                                connection_timeout=HTTP_CONNECTION_TIMEOUT, read_timeout=HTTP_READ_TIMEOUT)
        if response.status == HTTP_RETURN_CODE_SUCCEED:
            # todo 分段下载
            # 判断文件是不是过大
            content_length = response.getheader("Content-Length")
            if content_length is not None and int(content_length) > HTTP_DOWNLOAD_MAX_SIZE:
                return {"status": 0, "code": -4}
            # response中的Content-Type作为文件后缀名
            if need_content_type:
                content_type = response.getheader("Content-Type")
                if content_type is not None and content_type != "octet-stream":
                    if content_type == "video/quicktime":
                        new_file_type = "mov"
                    else:
                        new_file_type = content_type.split("/")[-1]
                    file_path = os.path.splitext(file_path)[0] + "." + new_file_type

            # 如果是先调用HEAD方法的,需要重新获取完整数据
            if head_check:
                response = http_request(file_url, method="GET", header_list=header_list, cookies_list=cookies_list,
                                        connection_timeout=HTTP_DOWNLOAD_CONNECTION_TIMEOUT, read_timeout=HTTP_DOWNLOAD_READ_TIMEOUT)
                if response.status != HTTP_RETURN_CODE_SUCCEED:
                    continue

            # 下载
            with open(file_path, "wb") as file_handle:
                file_handle.write(response.data)
            create_file = True
            # 判断文件下载后的大小和response中的Content-Length是否一致
            if content_length is None:
                return {"status": 1, "code": 0, "file_path": file_path}
            file_size = os.path.getsize(file_path)
            if int(content_length) == file_size:
                return {"status": 1, "code": 0, "file_path": file_path}
            else:
                output.print_msg("本地文件%s:%s和网络文件%s:%s不一致" % (file_path.encode("UTF-8"), content_length, str(file_url), file_size))
        elif response.status == HTTP_RETURN_CODE_URL_INVALID:
            if create_file:
                path.delete_dir_or_file(file_path)
            return {"status": 0, "code": -1}
        # 超过重试次数,直接退出
        elif response.status == HTTP_RETURN_CODE_RETRY:
            if create_file:
                path.delete_dir_or_file(file_path)
            return {"status": 0, "code": -2}
        # 其他http code,退出
        else:
            if create_file:
                path.delete_dir_or_file(file_path)
            return {"status": 0, "code": response.status}
    if create_file:
        path.delete_dir_or_file(file_path)
    return {"status": 0, "code": -3}
示例#9
0
def read_config(config_path):
    """Read config file"""
    config = ConfigParser.SafeConfigParser()
    with codecs.open(path.change_path_encoding(config_path), encoding="UTF-8-SIG") as file_handle:
        config.readfp(file_handle)
    return config