if args.rm3: search_rankers.append('rm3') searcher.set_rm3() # invalid topics name if topics == {}: print(f'Topic {args.topics} Not Found') exit() # get re-ranker use_prcl = args.prcl and len(args.prcl) > 0 and args.alpha > 0 if use_prcl is True: ranker = PseudoRelevanceClassifierReranker(searcher.index_dir, args.vectorizer, args.prcl, r=args.r, n=args.n, alpha=args.alpha) # build output path output_path = args.output if output_path is None: if use_prcl is True: clf_rankers = [] for t in args.prcl: if t == ClassifierType.LR: clf_rankers.append('lr') elif t == ClassifierType.SVM: clf_rankers.append('svm') r_str = f'prcl.r_{args.r}'
print(f'Using whitespace analyzer because of pretokenized topics') tokenizer = AutoTokenizer.from_pretrained(args.tokenizer) print(f'Using {args.tokenizer} to preprocess topics') if args.stopwords: analyzer = JDefaultEnglishAnalyzer.fromArguments( 'porter', False, args.stopwords) searcher.set_analyzer(analyzer) print(f'Using custom stopwords={args.stopwords}') # get re-ranker use_prcl = args.prcl and len(args.prcl) > 0 and args.alpha > 0 if use_prcl is True: ranker = PseudoRelevanceClassifierReranker(searcher.index_dir, args.vectorizer, args.prcl, r=args.r, n=args.n, alpha=args.alpha) # build output path output_path = args.output if output_path is None: if use_prcl is True: clf_rankers = [] for t in args.prcl: if t == ClassifierType.LR: clf_rankers.append('lr') elif t == ClassifierType.SVM: clf_rankers.append('svm') r_str = f'prcl.r_{args.r}'