class TokenizerAnserini(ModuleBase): module_type = "tokenizer" module_name = "anserini" config_spec = [ ConfigOption("keepstops", True, "keep stopwords if True"), ConfigOption("stemmer", "none", "stemmer: porter, krovetz, or none"), ]
class CollectionSecret(ModuleBase): module_type = "collection" module_name = "secretdocs" config_keys_not_in_path = ["path"] config_spec = [ ConfigOption(key="version", default_value="aliens", description="redacted"), ConfigOption(key="path", default_value="nicetry", description="redacted"), ]
class AFoo(ModuleTypeA): module_name = "AFoo" config_spec = [ ConfigOption(key="foo1", default_value="val1", description="test option"), ConfigOption(key="changethis", default_value=0, description="something to override"), ] dependencies = [ Dependency(key="myfoobar", module="Atype", name="AFooBar") ]
class RerankTask(Task): module_name = "rerank" config_spec = [ ConfigOption("fold", "s1", "fold to run"), ConfigOption( "optimize", "map", "metric to maximize on the dev set" ), # affects train() because we check to save weights ] dependencies = [ Dependency(key="benchmark", module="benchmark", name="rob04yang", provide_this=True, provide_children=["collection"]), Dependency(key="rank", module="task", name="rank"), Dependency(key="reranker", module="reranker", name="DRMM"), ]
class RerankerDRMM(ModuleBase): module_type = "reranker" module_name = "DRMM" dependencies = [ Dependency(key="extractor", module="extractor", name="embedtext"), Dependency(key="trainer", module="trainer", name="pytorch"), ] config_spec = [ ConfigOption("nbins", 29, "number of bins in matching histogram"), ConfigOption("nodes", 5, "hidden layer dimension for matching network"), ConfigOption("histType", "LCH", "histogram type: CH, NH, LCH"), ConfigOption("gateType", "IDF", "term gate type: TV or IDF"), ]
class ModuleA(SimpleModuleType): module_name = "A" config_spec = [ ConfigOption(key="key1", default_value="val1", description="test option") ]
class AFooBar(ModuleTypeA): module_name = "AFooBar" config_spec = [ ConfigOption(key="foobar1", default_value="val1", description="test option") ]
class SearcherBM25(ModuleBase): module_type = "searcher" module_name = "bm25" dependencies = [Dependency(key="index", module="index", name="anserini")] config_spec = [ConfigOption(key="k1", default_value=1.0, description="k1 parameter")] # Searchers are unlikely to actually need a seed, but we require it for testing requires_random_seed = True
class TrainerPytorch(ModuleBase): module_type = "trainer" module_name = "pytorch" config_spec = [ ConfigOption("batch", 32, "batch size"), ConfigOption("niters", 20), ConfigOption("itersize", 512), ConfigOption("gradacc", 1), ConfigOption("lr", 0.001), ConfigOption("softmaxloss", False), ConfigOption("fastforward", False), ConfigOption("validatefreq", 1), ConfigOption("boardname", "default"), ] config_keys_not_in_path = ["fastforward", "boardname"]
class IndexAnserini(ModuleBase): module_type = "index" module_name = "anserini" dependencies = [ Dependency(key="collection", module="collection", name="robust04") ] config_spec = [ ConfigOption(key="stemmer", default_value="porter", description="stemming") ]
class AParent(ModuleTypeA): module_name = "AParent" config_spec = [ ConfigOption(key="key1", default_value="val1", description="test option") ] dependencies = [ Dependency(key="myfoo", module="Atype", name="AFoo", default_config_overrides={"changethis": 42}), Dependency(key="bar", module="Atype", name="ABar"), ]
class ExtractorEmbedtext(ModuleBase): module_type = "extractor" module_name = "embedtext" dependencies = [ Dependency(key="index", module="index", name="anserini", default_config_overrides={"stemmer": "none"}), Dependency(key="tokenizer", module="tokenizer", name="anserini"), ] config_spec = [ ConfigOption("embeddings", "glove6b"), ConfigOption("zerounk", False), ConfigOption("calcidf", True), ConfigOption("maxqlen", 4), ConfigOption("maxdoclen", 800), ConfigOption("usecache", False), ]
class ModuleFoo(ModuleBase): module_type = "Atype" module_name = "foo" config_spec = [ ConfigOption(key="str1", default_value="foo"), ConfigOption(key="str2", default_value=9, value_type=str), ConfigOption(key="int1", default_value=2), ConfigOption(key="int2", default_value="3", value_type=int), ConfigOption(key="float1", default_value=2.2), ConfigOption(key="float2", default_value="3.3", value_type=float), ConfigOption(key="bool1", default_value=False), ConfigOption(key="bool2", default_value="false", value_type=bool), ConfigOption(key="bool3", default_value="true", value_type=bool), ConfigOption(key="strlist1", default_value=3, value_type="strlist"), ConfigOption(key="strlist2", default_value=[4, 5], value_type="strlist"), ConfigOption(key="strlist3", default_value="4,5", value_type="strlist"), ConfigOption(key="intlist1", default_value=3, value_type="intlist"), ConfigOption(key="intlist2", default_value="3", value_type="intlist"), ConfigOption(key="intlist3", default_value=(4, 5), value_type="intlist"), ConfigOption(key="intlist4", default_value="4,5", value_type="intlist"), ConfigOption(key="floatlist1", default_value=3, value_type="floatlist"), ConfigOption(key="none-or-str", default_value=None), ]