示例#1
0
def clear_un_img():
    # 图片存放的路径
    all_img_url = os.path.join(RESOURCE_BASE_URL, "collect/img")
    # 这个目录下是需要保留的图片
    leave_img_url = os.path.join(RESOURCE_BASE_URL, "collect")

    if FileUtil.isempty(leave_img_url):
        FileUtil.empty(all_img_url)
    else:
        all_imgs = FileUtil.listdir(all_img_url)

        dirs = [leave_img_url]
        for parent, dirnames, filenames in os.walk(leave_img_url):
            for dirname in dirnames:
                dirs.append(os.path.join(parent, dirname))

        leave_imgs = []
        for dir_ in dirs:
            imglist = collect.read_weibo(dir_, isreadimg=True)
            imglist = flatten(
                [img.get("img") for img in imglist if img.get("img")])
            leave_imgs += imglist

        # 删除多余的图片
        map(lambda p: os.remove(p) if p not in leave_imgs else None, all_imgs)
    def read_train(self, path):
        def handle_read(datas):
            l = []
            d = dict()
            for data in datas:
                if data.startswith("sentence"):
                    d = dict()
                    d["sentence"] = data[data.find(":") + 1:]
                    l.append(d)
                elif data.startswith("img"):
                    d["img"] = filter(lambda x: x, data[data.find(":") + 1:].split(","))
                elif data.startswith("label"):
                    d["label"] = data[data.find(":") + 1:]
            return l

        path = path if path.startswith(RESOURCE_BASE_URL) else os.path.join(RESOURCE_BASE_URL, path)
        filenames = FileUtil.listdir(path, isrecursion=False)
        return flatten([CommonUtil.read_from_file(filename, handle_read) for filename in filenames])
示例#3
0
def read_weibo(path, isreadimg=False):
    def handle_read(datas):
        fit_datas = datas
        if not isreadimg:
            fit_datas = [data for data in datas if not data.startswith("img")]

        l = []
        d = dict()
        for data in fit_datas:
            if data.startswith("sentence"):
                d = dict()
                d["sentence"] = data[data.find(":") + 1:]
                l.append(d)
            elif data.startswith("img"):
                d["img"] = filter(lambda x: x, data[data.find(":") + 1:].split(","))
        return l

    path = path if path.startswith(RESOURCE_BASE_URL) else os.path.join(RESOURCE_BASE_URL, path)
    filenames = FileUtil.listdir(path, isrecursion=False)
    return flatten([read_from_file(filename, handle_read) for filename in filenames])
示例#4
0
    def read_train(self, path):
        def handle_read(datas):
            l = []
            d = dict()
            for data in datas:
                if data.startswith("sentence"):
                    d = dict()
                    d["sentence"] = data[data.find(":") + 1:]
                    l.append(d)
                elif data.startswith("img"):
                    d["img"] = filter(lambda x: x,
                                      data[data.find(":") + 1:].split(","))
                elif data.startswith("label"):
                    d["label"] = data[data.find(":") + 1:]
            return l

        path = path if path.startswith(RESOURCE_BASE_URL) else os.path.join(
            RESOURCE_BASE_URL, path)
        filenames = FileUtil.listdir(path, isrecursion=False)
        return flatten([
            CommonUtil.read_from_file(filename, handle_read)
            for filename in filenames
        ])
示例#5
0
def read_weibo(path, isreadimg=False):
    def handle_read(datas):
        fit_datas = datas
        if not isreadimg:
            fit_datas = [data for data in datas if not data.startswith("img")]

        l = []
        d = dict()
        for data in fit_datas:
            if data.startswith("sentence"):
                d = dict()
                d["sentence"] = data[data.find(":") + 1:]
                l.append(d)
            elif data.startswith("img"):
                d["img"] = filter(lambda x: x,
                                  data[data.find(":") + 1:].split(","))
        return l

    path = path if path.startswith(RESOURCE_BASE_URL) else os.path.join(
        RESOURCE_BASE_URL, path)
    filenames = FileUtil.listdir(path, isrecursion=False)
    return flatten(
        [read_from_file(filename, handle_read) for filename in filenames])
def clear_un_img():
    # 图片存放的路径
    all_img_url = os.path.join(RESOURCE_BASE_URL, "collect/img")
    # 这个目录下是需要保留的图片
    leave_img_url = os.path.join(RESOURCE_BASE_URL, "collect")

    if FileUtil.isempty(leave_img_url):
        FileUtil.empty(all_img_url)
    else:
        all_imgs = FileUtil.listdir(all_img_url)

        dirs = [leave_img_url]
        for parent, dirnames, filenames in os.walk(leave_img_url):
            for dirname in dirnames:
                dirs.append(os.path.join(parent, dirname))

        leave_imgs = []
        for dir_ in dirs:
            imglist = collect.read_weibo(dir_, isreadimg=True)
            imglist = flatten([img.get("img") for img in imglist if img.get("img")])
            leave_imgs += imglist

        # 删除多余的图片
        map(lambda p: os.remove(p) if p not in leave_imgs else None, all_imgs)
def count_img():
    # 图片存放的路径
    all_img_url = os.path.join(RESOURCE_BASE_URL, "collect/img")
    print "It's have %d images" % len(FileUtil.listdir(all_img_url))
示例#8
0
def count_img():
    # 图片存放的路径
    all_img_url = os.path.join(RESOURCE_BASE_URL, "collect/img")
    print "It's have %d images" % len(FileUtil.listdir(all_img_url))