def test_t3(self): # logger.info("Running Tests4Dataset1test1/test_t3") ds = Dataset(TESTFILE2) it0 = iter(ds.instances_original()) inst0 = next(it0) indep, dep = inst0 # print("DEBUG: indep=", indep, file=sys.stderr) assert indep == [['invincible', 'is', 'a', 'wonderful', 'movie', '.']] assert dep == "pos" # check low level conversion methods first inst1 = [[['invincible', 'is', 'a', 'wonderful', 'movie', '.']], 'pos'] (indep1, dep1) = inst1 indep1_conv = ds.features(indep1) logger.debug("Original indep1=%r", indep1) logger.debug("Converted indep1=%r", indep1_conv) ngram1 = indep1_conv[0] assert len(ngram1) == 6 # print("DEBUG ngram1[0]=", ngram1[0], file=sys.stderr) assert ngram1[0] == 3543 assert ngram1[1] == 9 it1 = iter(ds.instances_converted(train=False, convert=True)) rec = next(it1) logger.debug("TESTFILE2 rec1=%r", rec) logger.debug("TESTFILE2 info=%r" % ds.get_info()) (indep1_it, dep1_it) = rec ngram1_it = indep1_it[0] logger.debug("TESTFILE2 dep_it=%r", dep1_it) # print("DEBUG dep1_it=", dep1_it, file=sys.stderr) assert len(ngram1_it) == 6 assert ngram1_it[0] == 3543 assert ngram1_it[1] == 9 assert dep1_it == 1
def test_t4(self): # logger.info("Running Tests4Dataset1test1/test_t4") ds = Dataset(TESTFILE3) logger.debug("TESTFILE3 attrs=%r", ds.meta.get("featureInfo").get("attributes")) # Features constructor finishes the vocab, so we need to re-initilize features = ds.features logger.debug("TESTFILE3 features=%r", features) it1 = iter(ds.instances_original()) rec = next(it1) logger.debug("TESTFILE3 rec1=%r", rec) logger.debug("TESTFILE3 info=%r" % ds.get_info())