Пример #1
0
def make_pgen(args, ids, listparser, sub_pid, key_id, key_type, kidattr):
    """作品情報作成ジェネレータの作成"""
    if args.from_tsv:
        # TSVファイルから一覧をインポート
        verbose('Import from_tsv()')
        p_gen = libssw.from_tsv(args.keyword)
    elif args.from_wiki:
        # ウィキテキストから一覧をインポート
        verbose('Import from_wiki()')
        p_gen = libssw.from_wiki(args.keyword)
    elif args.cid or args.cid_l:
        # 品番指定
        if '{}' in ids[0]:
            # '品番基準' 開始番号 終了番号 ステップ
            p_gen = from_sequence(ids, args.service, sub_pid)
        else:
            # 各cidからURLを作成
            p_gen = (makeproditem(c, args.service, sub_pid) for c in ids)
    else:
        # DMMから一覧を検索/取得
        verbose('Call from_dmm()')
        if args.retrieval in {'url', 'keyword'}:
            priurls = args.keyword
        else:
            priurls = libssw.join_priurls(args.retrieval,
                                          *ids,
                                          service=args.service)
        p_gen = libssw.from_dmm(listparser,
                                priurls,
                                pages_last=args.pages_last,
                                key_id=key_id,
                                key_type=key_type,
                                idattr=kidattr)

    return p_gen
Пример #2
0
def main(argv=None):

    args = get_args(argv or sys.argv[1:])

    make = MakeType(actress=args.table != 1, table=args.table)
    outfile = parse_outfile(args, make)
    verbose('outfile: ', outfile)

    ids = tuple(libssw.extr_ids(args.keyword, args.cid))
    verbose('ids: ', ids)

    if not args.retrieval:
        args.retrieval = libssw.extr_ids.retrieval
    emsg('I', '対象: {}'.format(args.retrieval))

    # -L, -K , -U 以外では --not-in-series は意味がない
    if args.retrieval not in {'label', 'maker', 'url'}:
        args.n_i_s = False
        verbose('force disabled n_i_s')

    if args.retrieval == 'actress':
        for i in filter(lambda i: i in libssw.HIDE_NAMES, ids):
            emsg('W', '削除依頼が出されている女優です: {}'.format(libssw.HIDE_NAMES[i]))
            ids.remove(i)

    # 除外対象
    no_omits = libssw.gen_no_omits(args.no_omit)
    verbose('non omit target: ', no_omits)

    # 品番生成用パターンのコンパイル
    sub_pid = (re.compile(args.pid_regex[0], re.I),
               args.pid_regex[1]) if args.pid_regex else None
    # 副題生成用パターンのコンパイル
    re_subtitle = (re.compile(args.subtitle_regex[0], re.I),
                   args.subtitle_regex[1]) if args.subtitle_regex else None

    # フィルター用パターン
    filter_id, fidattr = det_filterpatn(args)

    re_filter_pid_s = args.filter_pid_s and re.compile(args.filter_pid_s, re.I)
    re_filter_ttl = args.filter_title and re.compile(args.filter_title, re.I)

    # 作成開始品番
    key_id, key_type, kidattr = det_keyinfo(args)
    not_key_id = libssw.NotKeyIdYet(key_id, key_type, kidattr)

    listparser = libssw.DMMTitleListParser(no_omits=no_omits, patn_pid=sub_pid)
    seriesparser = libssw.DMMTitleListParser(patn_pid=sub_pid, show_info=False)

    # 作品情報取得用イテラブルの作成
    p_gen = make_pgen(args, ids, listparser, sub_pid, key_id, key_type,
                      kidattr)

    # 作品情報の取り込み
    # 新着順
    products = OrderedDict((u, p) for u, p in p_gen)
    emsg('I', '一覧取得完了')

    total = len(products)
    if not total:
        emsg('E', '検索結果は0件でした。')

    if not args.service:
        # TSVやウィキテキスト入力の場合の作品情報からサービス判定
        args.service = libssw.resolve_service(next(iter(products)))

    join_d = dict()
    libssw.ret_joindata(join_d, args)

    if (args.join_tsv or args.join_wiki or args.join_html) and not len(join_d):
        emsg('E', '--join-* オプションで読み込んだデータが0件でした。')

    if args.existings_html:
        # 既存の一覧ページから既出の作品情報の取得
        verbose('existings html')
        existings = set(k[0] for k in libssw.from_html(args.existings_html,
                                                       service=args.service))
        if not existings:
            emsg('E', '--existings-* オプションで読み込んだデータが0件でした。')
    else:
        existings = set()

    # 作品情報の作成
    verbose('Start building product info')
    if not VERBOSE and args.wikitext:
        print('作成中...', file=sys.stderr, flush=True)

    wikitexts = []
    title_list = []
    nis_series_names = set()  # 発見したシリーズ名 (n_i_s用)
    nis_series_urls = set()  # 発見したシリーズ一覧のURL (n_i_s用)
    rest = total
    omitted = listparser.omitted
    before = True if key_id else False

    dmmparser = libssw.DMMParser(no_omits=no_omits,
                                 patn_pid=sub_pid,
                                 start_date=args.start_date,
                                 start_pid_s=args.start_pid_s,
                                 filter_pid_s=re_filter_pid_s,
                                 pass_bd=args.pass_bd,
                                 n_i_s=args.n_i_s,
                                 longtitle=args.longtitle,
                                 check_rental=args.check_rental,
                                 check_rltd=args.check_rltd)

    if args.retrieval in {'maker', 'label', 'series'}:
        keyiter = libssw.sort_by_id(products)
    else:
        keyiter = iter(products)

    for url in keyiter:
        props = products[url]

        # 品番の生成
        if not props.pid:
            props.pid, props.cid = libssw.gen_pid(props.url, sub_pid)

        # 開始ID指定処理(--{start,last}-{p,c}id)
        if before:
            # 指定された品番が見つかるまでスキップ
            if not_key_id(getattr(props, kidattr)):
                emsg(
                    'I', '作品を除外しました: {}={} (id not met yet)'.format(
                        kidattr, getattr(props, kidattr)))
                omitted += 1
                rest -= 1
                continue
            else:
                before = False
                if key_type == 'start':
                    emsg('I',
                         '開始IDが見つかりました: {}'.format(getattr(props, kidattr)))
                else:
                    emsg('I',
                         '最終IDが見つかりました: {}'.format(getattr(props, kidattr)))
                    continue

        # 品番(pid/cid)が指定されたパターンにマッチしないものはスキップ処理(--filter-{p,c}id)
        if filter_id and not filter_id.search(getattr(props, fidattr)):
            emsg(
                'I',
                '作品を除外しました: {}={} (filtered)'.format(fidattr,
                                                     getattr(props, fidattr)))
            omitted += 1
            rest -= 1
            continue

        # 作品名が指定されたパターンにマッチしないものはスキップ処理(--filter-title)
        if args.filter_title and not re_filter_ttl.search(props.title):
            emsg('I', '作品を除外しました: title={} (filtered)'.format(props.title))
            omitted += 1
            rest -= 1
            continue

        # 一覧ページ内に既存の作品はスキップ(--existings-)
        if props.url in existings:
            emsg('I', '作品を除外しました: pid={} (already existent)'.format(props.pid))
            omitted += 1
            rest -= 1
            continue

        # 既知のシリーズ物のURLならスキップ (--not-in-series)
        if props.url in nis_series_urls:
            emsg('I',
                 '作品を除外しました: title="{}" (known series)'.format(props.title))
            omitted += 1
            rest -= 1
            continue

        if props.url in join_d:
            # joinデータがあるとデータをマージ
            props.merge(join_d[props.url])
            if args.hunter:
                props.title = join_d[props.url].title

        # 副題の生成
        if args.retrieval == 'series':
            # シリーズ一覧時のカスタムサブタイトル
            props.subtitle = libssw.sub(re_subtitle, props.title).strip() \
                if args.subtitle_regex else ''

        if args.wikitext:
            # ウィキテキストを作成
            libssw.inprogress('(残り {} 件/全 {} 件: 除外 {} 件)  '.format(
                rest, total, omitted))

            verbose('Call dmm2ssw')
            b, status, data = dmm2ssw.main(props, args, dmmparser)
            # 返り値:
            # b -> Bool
            # status -> url if b else http.status or 'Omitted'
            # data -> if b:
            #             ReturnVal(release,
            #                       pid,
            #                       title,
            #                       title_dmm,
            #                       url,
            #                       time,
            #                       ('maker', 'maker_id'),
            #                       ('label', 'label_id'),
            #                       ('series', 'series_id'),
            #                       wikitext_a,
            #                       wikitext_t)
            #         else:
            #             (key, hue) or empty ReturnVal (404)
            verbose('Return from dmm2ssw: {}, {}, {}'.format(b, status, data))

            if b:
                wikitexts.append(data)
            elif status == 404:
                wikitexts.append(data)
            else:
                emsg(
                    'I', '作品を除外しました: '
                    'cid={0}, reason=("{1[0]}", {1[1]})'.format(
                        props.cid, data))
                if args.n_i_s and data[0] == 'series':
                    # no-in-series用シリーズ作品先行取得
                    verbose('Retriving series products...')
                    nis_series_names.add(data[1].name)
                    priurls = libssw.join_priurls('series',
                                                  data[1].sid,
                                                  service=args.service)
                    nis_series_urls.update(
                        u[0] for u in libssw.from_dmm(seriesparser, priurls))
                omitted += 1

        else:
            # 一覧出力
            title_list.append(
                props.tsv('url', 'title', 'pid', 'actress', 'number',
                          'director', 'note'))

        rest -= 1

    if wikitexts:
        # ウィキテキストの書き出し
        verbose('Writing wikitext')

        # アーティクル名の決定
        article_name, article_header = det_articlename(args, ids, wikitexts,
                                                       listparser)
        verbose('article name: ', article_name)
        verbose('article header: ', repr(article_header))

        if not libssw.le80bytes(article_name):
            emsg('W', 'ページ名が80バイトを超えています')

        # ソート
        sortkeys = set_sortkeys(args.sort_key)
        for k in sortkeys:
            wikitexts.sort(key=attrgetter(k))

        if args.add_column:
            add_header = '|'.join(c.split(':')[0]
                                  for c in args.add_column) + '|'
            args.add_column = tuple(truncate_th(args.add_column))
        else:
            add_header = ''
        verbose('add header: {}\nadd column: {}'.format(
            add_header, args.add_column))

        if make.table and args.header:
            table_header = '|~{{}}|PHOTO|{}|ACTRESS|{}{}RELEASE|NOTE|'.format(
                'SUBTITLE' if args.retrieval == 'series' else 'TITLE',
                'DIRECTOR|' if args.dir_col else '', add_header)
        else:
            table_header = ''

        build_page = BuildPage(wikitexts, args.split, args.retrieval,
                               article_name, article_header, table_header)

        print(file=sys.stderr)

        result = '\n'.join(
            finalize(build_page, args.row, make, args.n_i_s, nis_series_names,
                     outfile))

        build_page.open_browser(args.browser)

        if args.copy:
            libssw.copy2clipboard(result)

    else:
        # タブ区切り一覧の書き出し

        fd = open(args.out, outfile.writemode) if args.out else sys.stdout

        print(*title_list, sep='\n', file=fd)

        if args.out:
            fd.close()
Пример #3
0
def build_header_listpage(retrieval, service, name, lstid):
    url = libssw.join_priurls(retrieval, lstid, service=service)[0]
    return build_header(((name, url), ))
Пример #4
0
def main():
    global ROOTID
    global PREFIXES

    existings = OrderedDict()
    newcomers = OrderedDict()

    mk_prefix = Counter()

    mk_ophans = []
    mk_ophans_prods = dict()
    mk_ophans_prefix = Counter()
    mk_ophans_latest = ''

    lb_newcomers = dict()

    lb_name = dict()
    lb_url = dict()
    lb_prods = dict()
    lb_prefix = Counter()
    lb_latest = dict()
    lb_series = dict()

    lb_ophans = dict()
    lb_ophans_prods = dict()
    lb_ophans_prefix = Counter()
    lb_ophans_latest = dict()

    sr_name = dict()
    sr_url = dict()
    sr_prods = dict()
    sr_prefix = Counter()
    sr_latest = dict()

    args = get_args()
    PREFIXES = args.prefixes

    if args.root.startswith('http://'):
        # IDがURL渡しだったときの対処
        ROOTID = libssw.get_id(args.root)[0]
        target = libssw.get_article(args.root)
        service = libssw.resolve_service(args.root)
    else:
        ROOTID = args.root
        target = args.target
        service = args.service
    verbose('root id: {}'.format(ROOTID))
    verbose('target: {}'.format(target))
    verbose('service: {}'.format(service))

    listparser = libssw.DMMTitleListParser(no_omits=('イメージビデオ', '総集編'),
                                           show_info=False)
    ret_members = RetrieveMembers(listparser, service)

    flprefix = '{}.{}'.format(target, ROOTID)
    pkfile = tuple(
        Path(args.pickle_path or '.').glob('{}.*.pickle'.format(flprefix)))
    pkfile = pkfile[0] if pkfile else None
    if pkfile:
        with pkfile.open('rb') as f:
            (existings, lb_name, lb_url, lb_prods, lb_latest, sr_name, sr_url,
             sr_prods, sr_latest, lb_series, lb_ophans, lb_ophans_prods,
             lb_ophans_latest, mk_ophans, mk_ophans_prods,
             mk_ophans_latest) = pickle.load(f)

    exist_set = set(existings)

    # 新規追加分を取得
    priurls = libssw.join_priurls(target, ROOTID, service=service)

    try:
        last_pid = next(reversed(existings.values()))['pid']
    except StopIteration:
        last_pid = None

    # メーカーの新規作品情報の取得
    for nurl, nprops in libssw.from_dmm(listparser,
                                        priurls,
                                        key_id=last_pid,
                                        key_type='last',
                                        idattr='pid',
                                        ignore=True):
        if nurl not in exist_set:
            newcomers[nurl] = dict(nprops)

    nc_num = len(newcomers)
    total = nc_num + len(existings)
    if not total:
        emsg('E', '検索結果は0件でした。')

    article_name = listparser.article[0][0]

    # メーカー名がわかったところで出力ファイルのチェック
    outstem = '{}.{}'.format(flprefix, libssw.trans_filename(article_name))

    outfile = args.outfile or '{}.wiki'.format(outstem)
    if args.replace:
        writemode = 'w'
    else:
        libssw.files_exists('w', outfile)
        writemode = 'x'

    emsg('I', '{} [id={}, 新規{}/全{}作品]'.format(article_name, ROOTID, nc_num,
                                              total))

    # まずレーベル別にまとめ
    lb_set = set(lb_name)
    lb_set_add = lb_set.add
    for lid, lname, lurl, lprods in ret_members('label', newcomers, exist_set,
                                                last_pid):
        if lid not in lb_set:
            # 新規レーベル
            lb_name[lid] = lname
            lb_url[lid] = lurl
            lb_prods[lid] = lprods
            lb_series[lid] = []
            lb_set_add(lid)
        else:
            # 既知レーベルの新規作品
            for u in reversed(lprods):
                lb_prods[lid][u] = lprods[u]

        lb_newcomers[lid] = lprods
        lb_latest[lid] = get_latest(lprods)

        emsg('I', 'レーベル: {} [id={}, 新規{}作品]'.format(lname, lid, len(lprods)))

    #
    # メーカーその他作品まとめ
    #
    ncmk_ophans = ret_members.ophans.copy()
    ncmk_ophans_prods = ret_members.ophans_prods.copy()
    emsg('I', '{}その他: {}作品'.format(article_name, len(ncmk_ophans_prods)))

    # メーカーその他作品の追加
    mk_ophans.extend(ncmk_ophans)
    for u in reversed(ncmk_ophans_prods):
        mk_ophans_prods[u] = ncmk_ophans_prods[u]
    verbose('mk_ophans_prods: {}'.format(len(mk_ophans_prods)))

    # メーカーその他作品の品番およびプレフィクス
    if args.regen_pid:
        for p in mk_ophans_prods:
            mk_ophans_prods[p]['pid'] = libssw.gen_pid(
                mk_ophans_prods[p]['url'])[0]
    mk_ophans_prefix = count_prefixes(mk_ophans_prods)

    # メーカーその他作品の最新リリース日
    ncmk_ophans_latest = ret_members.ophans_latest
    if ncmk_ophans_latest > mk_ophans_latest:
        mk_ophans_latest = ncmk_ophans_latest

    #
    # レーベル別まとめ
    #
    # 新作情報を追加してレーベル全体のプレフィクス情報の再作成
    for lid in lb_prods:
        if args.regen_pid:
            for p in lb_prods[lid]:
                lb_prods[lid][p]['pid'] = libssw.gen_pid(
                    lb_prods[lid][p]['url'])[0]
        lb_prefix[lid] = count_prefixes(lb_prods[lid])

    # レーベルごとにシリーズまとめ
    for lid in lb_prods:
        lprods = lb_prods[lid]

        if lb_name[lid].startswith(IGNORE_LABELS) and args.suppress:
            lb_series[lid] = ()
            lb_ophans_prefix[lid] = ()
            continue

        emsg('I', '')
        emsg('I', 'レーベル「{}」のシリーズ'.format(lb_name[lid]))

        verbose('exising ophans: {}'.format(len(lb_ophans.get(lid, ()))))
        sr_set = set(sr_name)
        sr_set_add = sr_set.add
        for sid, sname, surl, sprods in ret_members('series', lprods,
                                                    exist_set, last_pid):
            if sid not in sr_set:
                sr_name[sid] = sname
                sr_url[sid] = surl
                sr_prods[sid] = sprods
                sr_set_add(sid)
            else:
                for u in reversed(sprods):
                    sr_prods[sid][u] = sprods[u]
            emsg('I',
                 'シリーズ: {} [id={}, 新規{}作品]'.format(sname, sid, len(sprods)))

            sr_latest[sid] = get_latest(sprods)
            try:
                lb_series[lid].append(sid)
            except KeyError:
                lb_series[lid] = [sid]

        # レーベルその他作品まとめ
        nclb_ophans = ret_members.ophans.copy()
        nclb_ophans_prods = ret_members.ophans_prods.copy()
        emsg('I', '{}その他: {}作品'.format(lb_name[lid], len(nclb_ophans_prods)))
        nclb_ophans_latest = ret_members.ophans_latest

        if lid not in lb_ophans:
            lb_ophans[lid] = []
            lb_ophans_prods[lid] = OrderedDict()
            lb_ophans_latest[lid] = '0000/00/00'

        lb_ophans[lid].extend(nclb_ophans)
        for u in reversed(nclb_ophans_prods):
            lb_ophans_prods[lid][u] = nclb_ophans_prods[u]
        verbose('lb_ophans_prods[{}]: {}'.format(lid, len(lb_ophans_prods)))
        nclb_ophans_latest = ret_members.ophans_latest
        if nclb_ophans_latest > lb_ophans_latest[lid]:
            lb_ophans_latest[lid] = nclb_ophans_latest

    verbose('lb_ophans_prods: {}'.format(lid, len(lb_ophans_prods)))
    for lid in lb_ophans_prods:
        verbose('lb_ophans_prods[{}]: {}'.format(lid,
                                                 len(lb_ophans_prods[lid])))
        if args.regen_pid:
            for p in lb_ophans_prods[lid]:
                lb_ophans_prods[lid][p]['pid'] = libssw.gen_pid(
                    lb_ophans_prods[lid][p]['url'])[0]
        lb_ophans_prefix[lid] = count_prefixes(lb_ophans_prods[lid])
        verbose('lb_ophans_prefix[{}]: {}'.format(lid,
                                                  len(lb_ophans_prefix[lid])))

    for sid in sr_prods:
        if args.regen_pid:
            for p in sr_prods[sid]:
                sr_prods[sid][p]['pid'] = libssw.gen_pid(
                    sr_prods[sid][p]['url'])[0]
        sr_prefix[sid] = count_prefixes(sr_prods[sid])

    for url in reversed(newcomers):
        existings[url] = newcomers[url]
    mk_prefix = count_prefixes(existings)

    print('\n')

    fd = open(outfile, writemode)

    if target == 'maker':
        print('*[[{}(メーカー)]]'.format(article_name), file=fd)
        # print('全{}作品'.format(total), file=fd)
        summ_prefixes(mk_prefix, fd)
    print(time.strftime('(%Y年%m月%d日現在)'), file=fd)

    if args.sort_key == 'release':
        keyiter = lb_latest.items()
        reverse = True
    elif args.sort_key == 'name':
        keyiter = lb_name.items()
        reverse = False
    elif args.sort_key == 'number':
        keyiter = tuple((lid, len(lb_prods[lid])) for lid in lb_prods)
        reverse = True

    if not args.only_series:
        for n, item in enumerate(sorted(keyiter,
                                        key=itemgetter(1),
                                        reverse=reverse),
                                 start=1):
            lid = item[0]
            print('**{}.[[{}]]'.format(n, lb_name[lid]), file=fd)
            summ_prefixes(lb_prefix[lid], fd)

            if args.latest:
                print('-最新リリース: {}'.format(lb_latest[lid]), file=fd)

            if args.dmm:
                print('-[[DMMの一覧>{}]]'.format(lb_url[lid]), file=fd)

            if not args.only_label:
                # シリーズ別出力
                numofseries = len(lb_series.get(lid, ()))
                numofoph = len(lb_ophans_prefix.get(lid, ()))
                is_exist = numofseries or numofoph
                if is_exist:
                    print('[+]', file=fd)
                    print('シリーズ数:', numofseries, file=fd)

                try:
                    print_serises(lb_series[lid], sr_name, sr_prefix, sr_prods,
                                  sr_url, sr_latest, args.dmm, args.latest,
                                  args.sort_key, fd)
                except KeyError:
                    pass

                if numofoph:
                    print('***{}その他'.format(lb_name[lid]), file=fd)
                    summ_prefixes(lb_ophans_prefix[lid], fd)
                    if args.latest:
                        print('-最新リリース: {}'.format(lb_ophans_latest[lid]),
                              file=fd)

                if is_exist:
                    print('[END]', file=fd)

            print(file=fd)

        if mk_ophans:
            print('**{}その他'.format(article_name), file=fd)
            summ_prefixes(mk_ophans_prefix, fd)

            if args.latest:
                print('-最新リリース: {}'.format(mk_ophans_latest), file=fd)

    elif not args.only_label:
        """only-series"""
        print_serises(sr_name, sr_name, sr_prefix, sr_prods, sr_url, sr_latest,
                      args.dmm, args.latest, args.sort_key, fd)

    fd.close()

    print('出力ファイル:', outfile)

    if newcomers or args.regen_pid:
        pkpath = Path(args.pickle_path or '.') / '{}.pickle'.format(outstem)
        try:
            pkpath.rename(pkpath.with_suffix(pkpath.suffix + '.bak'))
        except FileNotFoundError:
            pass

        verbose('save file: {}'.format(pkpath))
        with pkpath.open('wb') as f:
            pickle.dump((existings, lb_name, lb_url, lb_prods, lb_latest,
                         sr_name, sr_url, sr_prods, sr_latest, lb_series,
                         lb_ophans, lb_ophans_prods, lb_ophans_latest,
                         mk_ophans, mk_ophans_prods, mk_ophans_latest), f)

    # キャッシュディレクトリの削除
    if args.clear_cache:
        libssw.clear_cache()
Пример #5
0
    def __call__(self, tier, newcomers, existings, last_pid):
        """レーベル/シリーズ情報を返す"""

        if tier == 'label':
            rname = 'メーカー'
            nwidx = 7
        else:
            rname = 'レーベル'
            nwidx = 5

        self.ophans.clear()
        self.ophans_prods.clear()
        self.ophans_prefix.clear()
        self.ophans_latest = '0000/00/00'

        queue = list(newcomers.keys())
        while queue:
            url = queue[-1]

            if url in existings or url in self.ophans:
                queue.pop()
                continue

            props = newcomers[url]
            verbose('popped: {}'.format(props.items()))
            libssw.inprogress('この{}残り: {}'.format(rname, len(queue)))

            # 作品ページを開いて要素を取得
            el = get_elems(props)
            if not len(el):
                queue.pop()
                continue

            # レーベル/シリーズIDと名前
            mid, mname = ret_idname(el[nwidx])
            mreldate = libssw.getnext_text(el[1])
            if not mid:
                self.ophans.append(url)
                self.ophans_prods[url] = props
                mprefix = libssw.split_pid(props['pid'])[0]
                self.ophans_prefix[mprefix] += 1
                verbose('ophans: {}'.format(props['pid']))

                if mreldate > self.ophans_latest:
                    self.ophans_latest = mreldate
                    verbose('ophans latest: {}'.format(self.ophans_latest))
                queue.pop()
                continue

            # 複数メーカーにまたがっていて扱いが面倒なレーベルは整理
            if tier == 'label' and \
               ROOTID in IGNORE_PARES and \
               mid in IGNORE_PARES[ROOTID]:
                verbose('ignore label: {}'.format(mname))
                queue.pop()
                continue

            priurls = libssw.join_priurls(tier, mid, service=self.service)

            # レーベル/シリーズの全作品情報の取得
            mprods = OrderedDict()
            for murl, mprops in libssw.from_dmm(self.listparser,
                                                priurls,
                                                key_id=last_pid,
                                                key_type='last',
                                                idattr='pid',
                                                ignore=True,
                                                show_info=False):
                if murl not in existings:
                    mprods[murl] = dict(mprops)

            if not mprods:
                return

            yield int(mid), mname, priurls[0], mprods.copy()

            for key in mprods:
                try:
                    queue.remove(key)
                except ValueError:
                    pass
Пример #6
0
def main():

    args = get_args()

    libssw.files_exists('r', *args.wikifiles)
    if args.out:
        if args.replace:
            writemode = 'w'
        else:
            libssw.files_exists('w', args.out)
            writemode = 'x'
    else:
        writemode = None

    g_actid = []
    seq = []
    contents = dict()
    release = dict()

    # 女優IDがURL渡しだったときの対処
    add_actid(g_actid, args.actress_id)

    # 除外対象
    no_omits = libssw.gen_no_omits(args.no_omit)
    verbose('non omit target: ', no_omits)

    # ウィキテキストの読み込み
    # 女優IDも取得
    for key, rdate, item in get_existing(g_actid, args.wikifiles):
        seq.append(key)
        contents[key] = item
        release[key] = rdate
        verbose('key: ', key)
        verbose('rdate: ', rdate)
        verbose('item: ', item)

    # 複数のIDがあった時にカンマで区切る
    aidstr = ','.join(g_actid)
    emsg('I', '女優ID: ', aidstr)

    listparser = libssw.DMMTitleListParser(no_omits)
    priurls = libssw.join_priurls('actress', aidstr)

    # Wikiにない作品情報の取り込み
    emsg('I', '作品一覧を取得中...')
    products = OrderedDict((u, p)
                           for u, p in libssw.from_dmm(listparser, priurls)
                           if u not in seq)
    emsg('I', '一覧取得完了')

    total = len(products)
    if not total:
        emsg('E', '検索結果は0件でした。')

    verbose('Start building product info')
    if not VERBOSE:
        print('作成中...', file=sys.stderr, flush=True)

    # 不足分のウィキテキストを作成
    current = seq[:]
    newitems = []
    rest = total
    omitted = 0

    dmmparser = libssw.DMMParser(no_omits)

    for url in products:
        props = products[url]
        verbose('props: ', props.items())

        props.pid, g_cid = libssw.gen_pid(props.url)

        libssw.inprogress('(残り {} 件/全 {} 件: 除外 {} 件)  '.format(
            rest, total, omitted))

        b, status, data = dmm2ssw.main(props=props,
                                       p_args=args,
                                       dmmparser=dmmparser)
        verbose('Return from dmm2ssw: {}, {}, {}'.format(b, status, data))

        if b:
            newitems.append(data)
        elif status == 404:
            emsg('I', 'ページが見つかりませんでした: url="{}"'.format(props.url))
            newitems.append(data)
        else:
            emsg(
                'I', 'ページを除外しました: '
                'cid={0}, reason=("{1[0]}", "{1[1]}")'.format(props.cid, data))
            omitted += 1
        rest -= 1
        verbose('rest: {} / total: {} / omitted: {}'.format(
            rest, total, omitted))

    # レンタル先行作品があった場合のためソート
    newitems.sort(key=itemgetter(0), reverse=True)

    # マージ
    i = -1
    for new in newitems:
        verbose('new: ', new)
        if new.url in seq:
            # レンタル版に変更にあったものの既存チェック
            continue

        for i, key in enumerate(seq[i + 1:], start=i + 1):
            # 時系列で途中のデータの挿入
            verbose('i, key: {}, {}'.format(i, key))
            if not key.isdecimal():
                verbose('new: {} > curr: {}'.format(
                    new.release.replace('/', '.'), release[key]))
                if new.release.replace('/', '.') > release[key]:
                    # 新規データの挿入
                    verbose('insert: {}, {}'.format(key, contents[key][1]))
                    seq.insert(i, new.url)
                    contents[new.url] = new.wktxt_a
                    release[new.url] = new.release
                    break
                elif '----' in contents[key]:
                    seq.append(new.url)
                    contents[new.url] = new.wktxt_a
                    release[new.url] = new.release
                    break
        else:
            # 残りのデータの追加
            seq.append(new.url)
            contents[new.url] = new.wktxt_a
            release[new.url] = new.release

    if args.diff:
        # 差分の出力
        libssw.show_diff(tuple(map(contents, current)),
                         tuple(map(contents, seq)), '追加前', '追加後')

    # 出力
    header = False
    i = 0
    fd = open(args.out, writemode) if args.out else sys.stdout
    while seq:
        key = seq.pop(0)
        if key.startswith('http://'):
            i += 1
            if args.split and not i % args.split:
                print('// {}'.format(i), file=fd)
        content = contents[key]
        if content.startswith('*'):
            header = True
        print(content, file=fd)
        if (not header or len(content) > 2) and seq:
            print(file=fd)
        header = False
    print()
    if args.out:
        fd.close()