예제 #1
0
class Test_GraphAlignServer(unittest.TestCase):
    """
    Tests with an immediate GraphAlignSrver instance
    """
    def setUp(self):
        self.init_feat_extractor()
        self.setup_timbl_server()
        self.gaserver = AlignServer(feat_extractor=self.feat_extractor)
        self.s1 = "Ik ben één zin .".decode("utf-8")
        self.s2 = "Ik ben ook een zin .".decode("utf-8")

    def init_feat_extractor(self):
        # create base feature description
        descriptor = Descriptor.fromfeats(feats.same_root + feats.same_pos)
        self.feat_extractor = TimblExtractor(
            descriptor, node_selector=TimblExtractor.select_lexical_node)
        # get feature descrition including administrative features for Timbl
        self.descriptor = self.feat_extractor.descriptor

    def setup_timbl_server(self):
        options = "+vo +vdb +vdi %s -f %s" % (
            self.descriptor.metrics, os.path.abspath("data/base.inst"))
        # Timbl server will automatically terminate when TimblServer object
        # dies, so keep a reference to it
        self.server = TimblServer(options=options)
        self.server.start()

    def test_1(self):
        result = self.gaserver.align(self.s1, self.s2)
예제 #2
0
class Test_GraphAlignServer(unittest.TestCase):
    """
    Tests with an immediate GraphAlignSrver instance
    """
    
    def setUp(self):
        self.init_feat_extractor()
        self.setup_timbl_server()
        self.gaserver = AlignServer(feat_extractor=self.feat_extractor)
        self.s1 = "Ik ben één zin .".decode("utf-8")
        self.s2 = "Ik ben ook een zin .".decode("utf-8")
        

    def init_feat_extractor(self):
        # create base feature description
        descriptor = Descriptor.fromfeats(feats.same_root + feats.same_pos)
        self.feat_extractor = TimblExtractor(
            descriptor,
            node_selector=TimblExtractor.select_lexical_node) 
        # get feature descrition including administrative features for Timbl
        self.descriptor = self.feat_extractor.descriptor
        
        
    def setup_timbl_server(self):
        options = "+vo +vdb +vdi %s -f %s" % (
            self.descriptor.metrics,
            os.path.abspath("data/base.inst"))
        # Timbl server will automatically terminate when TimblServer object
        # dies, so keep a reference to it
        self.server = TimblServer(options=options)
        self.server.start()

        
    def test_1(self):
        result = self.gaserver.align(self.s1, self.s2)
예제 #3
0
 def setup_timbl_server(self):
     options = "+vo +vdb +vdi %s -f %s" % (
         self.descriptor.metrics, os.path.abspath("data/base.inst"))
     # Timbl server will automatically terminate when TimblServer object
     # dies, so keep a reference to it
     self.server = TimblServer(options=options)
     self.server.start()
예제 #4
0
 def test_start_without_stop(self):
     server = TimblServer(timbl_opts=self.timbl_opts,
                          wait_for_dead=True)
     server.start()
     self.assertTrue(server.pid)
     self.assertTrue(server.pid in server.kill_pids)
     self.assertTrue(os.getpgid(server.pid))
예제 #5
0
 def test_server_logfile(self):
     log_file = tempfile.NamedTemporaryFile(mode="rb", bufsize=0)
     log_fname= log_file.name
     server = TimblServer(timbl_opts=self.timbl_opts,
                          server_log_fname=log_fname)
     server.start()
     server.stop()
     self.assertTrue(open(log_fname).readlines())
예제 #6
0
 def test_logging_2(self):
     log_fname = tempfile.NamedTemporaryFile().name
     logger = file_logger("my_log", log_fname) 
     server = TimblServer(timbl_opts=self.timbl_opts,
                          logger=logger)
     server.start()
     server.stop()
     #self.assertTrue(open(log_fname).read())
     print open(log_fname).read()
     os.remove(log_fname)
예제 #7
0
 def _init_server(self, descriptor, inst_fname, inst_base_fname, options,
                  server_log_fname):
     options = timbl_options_string(descriptor, 
                                    inst_fname=inst_fname,
                                    inst_base_fname=inst_base_fname,
                                    other=options)
     # Timbl server will automatically terminate when TimblServer object
     # dies, so keep a reference to it
     self._server = TimblServer(timbl_opts=options,
                                server_log_fname=server_log_fname)
     self._server.start()
예제 #8
0
 def test_logging_1(self):
     # quick & global config of logging system so output of loggers
     # goes to stdout
     logging.basicConfig(level=logging.DEBUG,
                         format="%(levelname)s <%(name)s> :: %(message)s")
     server = TimblServer(timbl_opts=self.timbl_opts,
                          wait_for_dead=True,
                          log_tag="server1")
     server.start()
     server.stop()
     # global reset of logging level
     logging.getLogger().setLevel(logging.CRITICAL)
예제 #9
0
    def test_multiple_servers(self):
        # make sure there are no left overs
        TimblServer.kill_pids = []

        for i in range(10):
            server = TimblServer(timbl_opts=self.timbl_opts,
                                 wait_for_dead=True)
            server.start()

        self.assertEqual(len(TimblServer.kill_pids), 10)

        for pid in TimblServer.kill_pids:
            self.assertTrue(os.getpgid(server.pid))
예제 #10
0
    def test_multiple_servers(self):
        # make sure there are no left overs
        TimblServer.kill_pids = []
        
        for i in range(10):
            server = TimblServer(timbl_opts=self.timbl_opts,
                                 wait_for_dead=True)
            server.start()

        self.assertEqual(len(TimblServer.kill_pids), 10)

        for pid in TimblServer.kill_pids:
            self.assertTrue(os.getpgid(server.pid))
예제 #11
0
    def test_start_and_stop(self):
        server = TimblServer(timbl_opts=self.timbl_opts,
                             wait_for_dead=True)
        server.start()
        self.assertTrue(server.pid)
        self.assertTrue(server.pid in server.kill_pids)
        self.assertTrue(os.getpgid(server.pid))

        server_pid = server.pid
        server.stop()
        self.assertFalse(server.pid)
        self.assertFalse(server.pid in server.kill_pids)
        # this requires wait_for_dead=True
        self.assertRaises(OSError, os.getpgid, server_pid)
예제 #12
0
 def setup_timbl_server(self):
     options = "+vo +vdb +vdi %s -f %s" % (
         self.descriptor.metrics,
         os.path.abspath("data/base.inst"))
     # Timbl server will automatically terminate when TimblServer object
     # dies, so keep a reference to it
     self.server = TimblServer(options=options)
     self.server.start()
예제 #13
0
 def test_restart(self):
     server = TimblServer(timbl_opts=self.timbl_opts, wait_for_dead=True)
     server.start()
     server.restart()
     self.assertTrue(server.pid)
     self.assertTrue(server.pid in server.kill_pids)
     self.assertTrue(os.getpgid(server.pid))
예제 #14
0
 def test_server_logfile(self):
     log_file = tempfile.NamedTemporaryFile(mode="rb", bufsize=0)
     log_fname = log_file.name
     server = TimblServer(timbl_opts=self.timbl_opts,
                          server_log_fname=log_fname)
     server.start()
     server.stop()
     self.assertTrue(open(log_fname).readlines())
예제 #15
0
 def test_logging_2(self):
     log_fname = tempfile.NamedTemporaryFile().name
     logger = file_logger("my_log", log_fname)
     server = TimblServer(timbl_opts=self.timbl_opts, logger=logger)
     server.start()
     server.stop()
     #self.assertTrue(open(log_fname).read())
     print open(log_fname).read()
     os.remove(log_fname)
예제 #16
0
 def test_logging_1(self):
     # quick & global config of logging system so output of loggers
     # goes to stdout
     logging.basicConfig(level=logging.DEBUG,
                         format="%(levelname)s <%(name)s> :: %(message)s")
     server = TimblServer(timbl_opts=self.timbl_opts,
                          wait_for_dead=True,
                          log_tag="server1")
     server.start()
     server.stop()
     # global reset of logging level
     logging.getLogger().setLevel(logging.CRITICAL)
예제 #17
0
    def test_start_and_stop(self):
        server = TimblServer(timbl_opts=self.timbl_opts, wait_for_dead=True)
        server.start()
        self.assertTrue(server.pid)
        self.assertTrue(server.pid in server.kill_pids)
        self.assertTrue(os.getpgid(server.pid))

        server_pid = server.pid
        server.stop()
        self.assertFalse(server.pid)
        self.assertFalse(server.pid in server.kill_pids)
        # this requires wait_for_dead=True
        self.assertRaises(OSError, os.getpgid, server_pid)
예제 #18
0
 def test_init(self):
     server = TimblServer(timbl_opts=self.timbl_opts)
예제 #19
0
class TimblClassifier(Classifier):
    
    def __init__(self, descriptor, inst_fname=None, inst_base_fname=None, 
                 options="", weight_func=None, server_log_fname=None):
        """
        Create a new TimblClassifier instance
        
        @param descriptor: Descriptor instance
        
        @keyword inst_fname: name of file containing Timbl instances 
        
        @keyword inst_base_fname: name of file containing Timbl instance base 
        
        @keyword options: list of additional Timbl options, excluding -f, -m,
        +vo, +vdb, +vdi
        
        @keyword server_log_fname: filename for Timbl server log
        
        @param weight_func: weight function; defaults to entropy_weight
        """
        self.no_rel = descriptor.no_rel
        self._init_server(descriptor, inst_fname, inst_base_fname, options,
                          server_log_fname)
        self._init_client()
        self.weight_func = weight_func or entropy_weight
        
    def _init_server(self, descriptor, inst_fname, inst_base_fname, options,
                     server_log_fname):
        options = timbl_options_string(descriptor, 
                                       inst_fname=inst_fname,
                                       inst_base_fname=inst_base_fname,
                                       other=options)
        # Timbl server will automatically terminate when TimblServer object
        # dies, so keep a reference to it
        self._server = TimblServer(timbl_opts=options,
                                   server_log_fname=server_log_fname)
        self._server.start()
        
    def _init_client(self):
        self._client = TimblClient(self._server.port)
        self._client.connect()
    
    def classify(self, instances):
        """
        adds predicted class and associated weight to instances
        
        @param instances: numpy.ndarray instance 
        """
        for inst in instances:
            # Assumes that last field in instance is the true class
            inst_str = "\t".join( self._to_str(value) 
                                  for value in inst )
            result = self._client.classify(inst_str) 
            inst["pred_relation"] = result["CATEGORY"]
            # The Timbl client is lazy and does not automatically parse the
            # distribution string, so we use parse_distrib to obtain an
            # iterator over (class, count) pairs
            distribution = parse_distrib(result["DISTRIBUTION"]) 
            inst["pred_weight"] = self.weight_func(
                category=result["CATEGORY"], 
                distribution=distribution)
    
    def _to_str(self, value):
        # value can be a bool, number, ascii string or unicode string
        try:
            return str(value)
        except UnicodeEncodeError:
            return value.encode("utf-8")
예제 #20
0
def start_timbl_server():
    global SERVER

    options = "-f {0}/dimin.train".format(DATA_DIR)
    SERVER = TimblServer(timbl_opts=options)
    SERVER.start()
예제 #21
0
def start_timbl_server():
    global SERVER
    
    options = "-f {0}/dimin.train".format(DATA_DIR)
    SERVER = TimblServer(timbl_opts=options)
    SERVER.start()