index[tstconf] = f return f # (0) decode command-line (configfile, testfilename) = sys.argv[1 : ] # (1) load configuration config = ConfigLoader.load(configfile) linking = config.getDataSources().isEmpty() if linking: lowlimit = 0.0 else: lowlimit = 0.4 # (2) index up all the data processor = Processor(config) database = processor.getDatabase() if not linking: processor.index(config.getDataSources(), 40000) else: processor.index(config.getDataSources(1), 40000) # (3) actual genetic stuff pkg = "no.priv.garshol.duke.comparators." comparators = ["DiceCoefficientComparator", "DifferentComparator", "ExactComparator", "JaroWinkler", "JaroWinklerTokenized", "Levenshtein", "NumericComparator",
LinkDatabaseUtils.loadTestFile(sys.argv[2], golddb) else: golddb = None # (1) load configuration config = ConfigLoader.load(configfile) properties = config.getProperties()[:] idprops = config.getIdentityProperties() linking = not config.isDeduplicationMode() if linking: lowlimit = 0.0 else: lowlimit = 0.4 # (2) index up all the data processor = Processor(config) alldb = processor.getDatabase() if not linking: processor.index(config.getDataSources(), 40000) else: processor.index(config.getDataSources(1), 40000) processor.index(config.getDataSources(2), 40000) if linking: config.setPath( (config.getPath() or '/tmp/duke-active-ix-') + '2') # AHEM... processor = Processor(config) database = processor.getDatabase() if not linking: processor.index(config.getDataSources(), 40000) else: