class Test_GraphAlignServer(unittest.TestCase): """ Tests with an immediate GraphAlignSrver instance """ def setUp(self): self.init_feat_extractor() self.setup_timbl_server() self.gaserver = AlignServer(feat_extractor=self.feat_extractor) self.s1 = "Ik ben één zin .".decode("utf-8") self.s2 = "Ik ben ook een zin .".decode("utf-8") def init_feat_extractor(self): # create base feature description descriptor = Descriptor.fromfeats(feats.same_root + feats.same_pos) self.feat_extractor = TimblExtractor( descriptor, node_selector=TimblExtractor.select_lexical_node) # get feature descrition including administrative features for Timbl self.descriptor = self.feat_extractor.descriptor def setup_timbl_server(self): options = "+vo +vdb +vdi %s -f %s" % ( self.descriptor.metrics, os.path.abspath("data/base.inst")) # Timbl server will automatically terminate when TimblServer object # dies, so keep a reference to it self.server = TimblServer(options=options) self.server.start() def test_1(self): result = self.gaserver.align(self.s1, self.s2)
def setup_timbl_server(self): options = "+vo +vdb +vdi %s -f %s" % ( self.descriptor.metrics, os.path.abspath("data/base.inst")) # Timbl server will automatically terminate when TimblServer object # dies, so keep a reference to it self.server = TimblServer(options=options) self.server.start()
def test_start_without_stop(self): server = TimblServer(timbl_opts=self.timbl_opts, wait_for_dead=True) server.start() self.assertTrue(server.pid) self.assertTrue(server.pid in server.kill_pids) self.assertTrue(os.getpgid(server.pid))
def test_server_logfile(self): log_file = tempfile.NamedTemporaryFile(mode="rb", bufsize=0) log_fname= log_file.name server = TimblServer(timbl_opts=self.timbl_opts, server_log_fname=log_fname) server.start() server.stop() self.assertTrue(open(log_fname).readlines())
def test_logging_2(self): log_fname = tempfile.NamedTemporaryFile().name logger = file_logger("my_log", log_fname) server = TimblServer(timbl_opts=self.timbl_opts, logger=logger) server.start() server.stop() #self.assertTrue(open(log_fname).read()) print open(log_fname).read() os.remove(log_fname)
def _init_server(self, descriptor, inst_fname, inst_base_fname, options, server_log_fname): options = timbl_options_string(descriptor, inst_fname=inst_fname, inst_base_fname=inst_base_fname, other=options) # Timbl server will automatically terminate when TimblServer object # dies, so keep a reference to it self._server = TimblServer(timbl_opts=options, server_log_fname=server_log_fname) self._server.start()
def test_logging_1(self): # quick & global config of logging system so output of loggers # goes to stdout logging.basicConfig(level=logging.DEBUG, format="%(levelname)s <%(name)s> :: %(message)s") server = TimblServer(timbl_opts=self.timbl_opts, wait_for_dead=True, log_tag="server1") server.start() server.stop() # global reset of logging level logging.getLogger().setLevel(logging.CRITICAL)
def test_multiple_servers(self): # make sure there are no left overs TimblServer.kill_pids = [] for i in range(10): server = TimblServer(timbl_opts=self.timbl_opts, wait_for_dead=True) server.start() self.assertEqual(len(TimblServer.kill_pids), 10) for pid in TimblServer.kill_pids: self.assertTrue(os.getpgid(server.pid))
def test_start_and_stop(self): server = TimblServer(timbl_opts=self.timbl_opts, wait_for_dead=True) server.start() self.assertTrue(server.pid) self.assertTrue(server.pid in server.kill_pids) self.assertTrue(os.getpgid(server.pid)) server_pid = server.pid server.stop() self.assertFalse(server.pid) self.assertFalse(server.pid in server.kill_pids) # this requires wait_for_dead=True self.assertRaises(OSError, os.getpgid, server_pid)
def test_restart(self): server = TimblServer(timbl_opts=self.timbl_opts, wait_for_dead=True) server.start() server.restart() self.assertTrue(server.pid) self.assertTrue(server.pid in server.kill_pids) self.assertTrue(os.getpgid(server.pid))
def test_server_logfile(self): log_file = tempfile.NamedTemporaryFile(mode="rb", bufsize=0) log_fname = log_file.name server = TimblServer(timbl_opts=self.timbl_opts, server_log_fname=log_fname) server.start() server.stop() self.assertTrue(open(log_fname).readlines())
def test_init(self): server = TimblServer(timbl_opts=self.timbl_opts)
class TimblClassifier(Classifier): def __init__(self, descriptor, inst_fname=None, inst_base_fname=None, options="", weight_func=None, server_log_fname=None): """ Create a new TimblClassifier instance @param descriptor: Descriptor instance @keyword inst_fname: name of file containing Timbl instances @keyword inst_base_fname: name of file containing Timbl instance base @keyword options: list of additional Timbl options, excluding -f, -m, +vo, +vdb, +vdi @keyword server_log_fname: filename for Timbl server log @param weight_func: weight function; defaults to entropy_weight """ self.no_rel = descriptor.no_rel self._init_server(descriptor, inst_fname, inst_base_fname, options, server_log_fname) self._init_client() self.weight_func = weight_func or entropy_weight def _init_server(self, descriptor, inst_fname, inst_base_fname, options, server_log_fname): options = timbl_options_string(descriptor, inst_fname=inst_fname, inst_base_fname=inst_base_fname, other=options) # Timbl server will automatically terminate when TimblServer object # dies, so keep a reference to it self._server = TimblServer(timbl_opts=options, server_log_fname=server_log_fname) self._server.start() def _init_client(self): self._client = TimblClient(self._server.port) self._client.connect() def classify(self, instances): """ adds predicted class and associated weight to instances @param instances: numpy.ndarray instance """ for inst in instances: # Assumes that last field in instance is the true class inst_str = "\t".join( self._to_str(value) for value in inst ) result = self._client.classify(inst_str) inst["pred_relation"] = result["CATEGORY"] # The Timbl client is lazy and does not automatically parse the # distribution string, so we use parse_distrib to obtain an # iterator over (class, count) pairs distribution = parse_distrib(result["DISTRIBUTION"]) inst["pred_weight"] = self.weight_func( category=result["CATEGORY"], distribution=distribution) def _to_str(self, value): # value can be a bool, number, ascii string or unicode string try: return str(value) except UnicodeEncodeError: return value.encode("utf-8")
def start_timbl_server(): global SERVER options = "-f {0}/dimin.train".format(DATA_DIR) SERVER = TimblServer(timbl_opts=options) SERVER.start()