Пример #1
0
def translate_movie_info(info: MovieInfo):
    """根据配置翻译影片信息"""
    # 翻译标题
    if info.title and cfg.Translate.translate_title:
        result = translate(info.title, cfg.Translate.engine, info.actress)
        if 'trans' in result:
            info.ori_title = info.title
            info.title = result['trans']
            # 如果有的话,附加断句信息
            if 'orig_break' in result:
                setattr(info, 'ori_title_break', result['orig_break'])
            if 'trans_break' in result:
                setattr(info, 'title_break', result['trans_break'])
        else:
            logger.error('翻译标题时出错: ' + result['error'])
            return False
    # 翻译简介
    if info.plot and cfg.Translate.translate_plot:
        result = translate(info.plot, cfg.Translate.engine, info.actress)
        if 'trans' in result:
            # 只有翻译过plot的影片才可能需要ori_plot属性,因此在运行时动态添加,而不添加到类型定义里
            setattr(info, 'ori_plot', info.plot)
            info.plot = result['trans']
        else:
            logger.error('翻译简介时出错: ' + result['error'])
            return False
    return True
Пример #2
0
def info_summary(movie: Movie, all_info):
    """汇总多个来源的在线数据生成最终数据"""
    final_info = MovieInfo(movie)
    ########## 部分字段配置了专门的选取逻辑,先处理这些字段 ##########
    # genre
    if 'javdb' in all_info:
        final_info.genre = all_info['javdb'].genre

    ########## 然后检查所有字段,如果某个字段还是默认值,则按照优先级选取数据 ##########
    # parser直接更新了all_info中的项目,而初始all_info是按照优先级生成的,已经符合配置的优先级顺序了
    # 按照优先级取出各个爬虫获取到的信息
    attrs = [i for i in dir(final_info) if not i.startswith('_')]
    covers, big_covers = [], []
    for name, data in all_info.items():
        absorbed = []
        # 遍历所有属性,如果某一属性当前值为空而爬取的数据中含有该属性,则采用爬虫的属性
        for attr in attrs:
            incoming = getattr(data, attr)
            if attr == 'cover':
                if incoming and (incoming not in covers):
                    covers.append(incoming)
                    absorbed.append(attr)
            elif attr == 'big_cover':
                if incoming and (incoming not in big_covers):
                    big_covers.append(incoming)
                    absorbed.append(attr)
            else:
                current = getattr(final_info, attr)
                if (not current) and (incoming):
                    setattr(final_info, attr, incoming)
                    absorbed.append(attr)
        if absorbed:
            logger.debug(f"从'{name}'中获取了字段: " + ' '.join(absorbed))
    setattr(final_info, 'covers', covers)
    setattr(final_info, 'big_covers', big_covers)
    # 对cover和big_cover赋值,避免后续检查必须字段时出错
    if covers:
        final_info.cover = covers[0]
    if big_covers:
        final_info.big_cover = big_covers[0]
    ########## 部分字段放在最后进行检查 ##########
    # title
    if cfg.Crawler.title__chinese_first and 'airav' in all_info:
        if all_info[
                'airav'].title and final_info.title != all_info['airav'].title:
            final_info.ori_title = final_info.title
            final_info.title = all_info['airav'].title
    # 检查是否所有必需的字段都已经获得了值
    for attr in cfg.Crawler.required_keys:
        if not getattr(final_info, attr, None):
            logger.error(f"所有爬虫均未获取到字段: '{attr}',抓取失败")
            return False
    # 必需字段均已获得了值:将最终的数据附加到movie
    movie.info = final_info
    return True
Пример #3
0
def parse_clean_data(movie: MovieInfo):
    """解析指定番号的影片数据并进行清洗"""
    success = parse_data(movie)
    if not success:
        return
    movie.genre_norm = genre_map.map(movie.genre_id)
    movie.genre_id = None  # 没有别的地方需要再用到,清空genre id(表明已经完成转换)
    # 将此功能放在各个抓取器以保持数据的一致,避免影响转换(写入nfo时的信息来自多个抓取器的汇总,数据来源一致性不好)
    if cfg.Crawler.title__remove_actor:
        new_title = remove_trail_actor_in_title(movie.title, movie.actress)
        if new_title != movie.title:
            movie.ori_title = movie.title
            movie.title = new_title