Exemplo n.º 1
0
    def process_item(self, item, spider):

        designer_dir_name = skutils.escape_filename(item['name'])
        designer_dir_path = os.path.join(GlobalState.data_dir, designer_dir_name)
        file_path = os.path.join(designer_dir_path, designer_dir_name)

        # write json file
        with open('%s.json' % file_path, 'w+b') as f:
            exporter = JsonItemExporter(f)
            exporter.start_exporting()
            exporter.export_item(item)
            exporter.finish_exporting()

        # write excel file
        excelutils.write_designer_excel(item, file_path, designer_dir_name)

        return item
Exemplo n.º 2
0
    def process_item(self, item, spider):

        designer_dir_name = skutils.escape_filename(item['name'])
        designer_dir_path = os.path.join(GlobalState.data_dir, designer_dir_name)
        os.makedirs(designer_dir_path)

        files = item['files']
        image_file_to_name_map = {}
        image_file_to_product_id_map = {}
        if item['products']:
            # 可以用这个在REPL中测试: item = {'products': [{'img_url': ['url1','url2'], 'uid': 'uid1'}]}
            img_url_tuple_list = [zip(p['img_url'], [p['uid'] for _ in p['img_url']]) for p in item['products']]
            image_file_to_product_id_map = {x[0]: x[1] for x in [tup for sub in img_url_tuple_list for tup in sub]}
        # move image file to data_dir
        index = 1
        for f in files:
            file_path = os.path.join(GlobalState.files_store, f['path'])
            filename, file_extension = os.path.splitext(file_path)
            uid = image_file_to_product_id_map[f['url']] + "_" if f['url'] in image_file_to_product_id_map else ""
            new_filename = "%spicture%d%s" % (uid, index, file_extension)

            if not os.path.isfile(file_path):
                self.logger.warn(u"move image file failed, file not exist, product id[%s], file[%s]" % (uid, file_path))
                continue

            os.rename(file_path, os.path.join(designer_dir_path, new_filename))
            image_file_to_name_map[f['url']] = new_filename
            index += 1

        # record img_names
        item['img_names'] = image_file_to_name_map.get(item.get('img_url'), '')
        if item['products']:
            for p in item['products']:
                p['img_names'] = [image_file_to_name_map.get(img_url, 'DownloadFailed') for img_url in p['img_url']]

        return item