Exemplo n.º 1
0
class State:
    def __init__(self):
        """Initialises the State object that holds the state of the app.
        The default settings are read from Options.py and bayescustomize.ini
        and are then overridden by the command-line processing code in the
        __main__ code below."""
        self.logFile = None
        self.bayes = None
        self.platform_mutex = None
        self.prepared = False
        self.can_stop = True
        self.init()

        # Load up the other settings from Option.py / bayescustomize.ini
        self.uiPort = options["html_ui", "port"]
        self.launchUI = options["html_ui", "launch_browser"]
        self.gzipCache = options["Storage", "cache_use_gzip"]
        self.cacheExpiryDays = options["Storage", "cache_expiry_days"]
        self.runTestServer = False
        self.isTest = False

    def init(self):
        assert not self.prepared, "init after prepare, but before close"
        # Load the environment for translation.
        self.lang_manager = i18n.LanguageManager()
        # Set the system user default language.
        self.lang_manager.set_language(\
            self.lang_manager.locale_default_lang())
        # Set interface to use the user language in the configuration file.
        for language in reversed(options["globals", "language"]):
            # We leave the default in there as the last option, to fall
            # back on if necessary.
            self.lang_manager.add_language(language)
        if options["globals", "verbose"]:
            print "Asked to add languages: " + \
                  ", ".join(options["globals", "language"])
            print "Set language to " + \
                  str(self.lang_manager.current_langs_codes)

        # Open the log file.
        if options["globals", "verbose"]:
            self.logFile = open('_pop3proxy.log', 'wb', 0)

        if not hasattr(self, "servers"):
            # Could have already been set via the command line.
            self.servers = []
            if options["pop3proxy", "remote_servers"]:
                for server in options["pop3proxy", "remote_servers"]:
                    server = server.strip()
                    if server.find(':') > -1:
                        server, port = server.split(':', 1)
                    else:
                        port = '110'
                    self.servers.append((server, int(port)))

        if not hasattr(self, "proxyPorts"):
            # Could have already been set via the command line.
            self.proxyPorts = []
            if options["pop3proxy", "listen_ports"]:
                splitPorts = options["pop3proxy", "listen_ports"]
                self.proxyPorts = map(_addressAndPort, splitPorts)

        if len(self.servers) != len(self.proxyPorts):
            print "pop3proxy_servers & pop3proxy_ports are different lengths!"
            sys.exit()

        # Remember reported errors.
        self.reported_errors = {}

        # Set up the statistics.
        self.totalSessions = 0
        self.activeSessions = 0
        self.numSpams = 0
        self.numHams = 0
        self.numUnsure = 0

        # Unique names for cached messages - see `getNewMessageName()` below.
        self.lastBaseMessageName = ''
        self.uniquifier = 2

    def close(self):
        assert self.prepared, "closed without being prepared!"
        self.servers = None
        if self.bayes is not None:
            # Only store a non-empty db.
            if self.bayes.nham != 0 and self.bayes.nspam != 0:
                state.bayes.store()
            self.bayes.close()
            self.bayes = None
        if self.mdb is not None:
            self.mdb.store()
            self.mdb.close()
            self.mdb = None
            spambayes.message.Message().message_info_db = None

        self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
        self.spamTrainer = self.hamTrainer = None

        self.prepared = False
        close_platform_mutex(self.platform_mutex)
        self.platform_mutex = None

    def prepare(self, can_stop=True):
        """Do whatever needs to be done to prepare for running.  If
        can_stop is False, then we may not let the user shut down the
        proxy - for example, running as a Windows service this should
        be the case."""
        # If we can, prevent multiple servers from running at the same time.
        assert self.platform_mutex is None, "Should not already have the mutex"
        self.platform_mutex = open_platform_mutex()

        self.can_stop = can_stop

        # Do whatever we've been asked to do...
        self.createWorkers()
        self.prepared = True

    def buildServerStrings(self):
        """After the server details have been set up, this creates string
        versions of the details, for display in the Status panel."""
        serverStrings = ["%s:%s" % (s, p) for s, p in self.servers]
        self.serversString = ', '.join(serverStrings)
        self.proxyPortsString = ', '.join(map(_addressPortStr,
                                              self.proxyPorts))

    def buildStatusStrings(self):
        """Build the status message(s) to display on the home page of the
        web interface."""
        nspam = self.bayes.nspam
        nham = self.bayes.nham
        if nspam > 10 and nham > 10:
            db_ratio = nham / float(nspam)
            if db_ratio > 5.0:
                self.warning = _("Warning: you have much more ham than " \
                                 "spam - SpamBayes works best with " \
                                 "approximately even numbers of ham and " \
                                 "spam.")
            elif db_ratio < (1 / 5.0):
                self.warning = _("Warning: you have much more spam than " \
                                 "ham - SpamBayes works best with " \
                                 "approximately even numbers of ham and " \
                                 "spam.")
            else:
                self.warning = ""
        elif nspam > 0 or nham > 0:
            self.warning = _("Database only has %d good and %d spam - " \
                             "you should consider performing additional " \
                             "training.") % (nham, nspam)
        else:
            self.warning = _("Database has no training information.  " \
                             "SpamBayes will classify all messages as " \
                             "'unsure', ready for you to train.")
        # Add an additional warning message if the user's thresholds are
        # truly odd.
        spam_cut = options["Categorization", "spam_cutoff"]
        ham_cut = options["Categorization", "ham_cutoff"]
        if spam_cut < 0.5:
            self.warning += _("<br/>Warning: we do not recommend " \
                              "setting the spam threshold less than 0.5.")
        if ham_cut > 0.5:
            self.warning += _("<br/>Warning: we do not recommend " \
                              "setting the ham threshold greater than 0.5.")
        if ham_cut > spam_cut:
            self.warning += _("<br/>Warning: your ham threshold is " \
                              "<b>higher</b> than your spam threshold. " \
                              "Results are unpredictable.")

    def createWorkers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        print "Loading database...",
        if self.isTest:
            self.useDB = "pickle"
            self.DBName = '_pop3proxy_test.pickle'  # This is never saved.
        if not hasattr(self, "DBName"):
            self.DBName, self.useDB = storage.database_type([])
        self.bayes = storage.open_storage(self.DBName, self.useDB)
        self.mdb = spambayes.message.Message().message_info_db

        # Load stats manager.
        self.stats = Stats.Stats(options, self.mdb)

        self.buildStatusStrings()

        # Don't set up the caches and training objects when running the self-test,
        # so as not to clutter the filesystem.
        if not self.isTest:
            # Create/open the Corpuses.  Use small cache sizes to avoid hogging
            # lots of memory.
            sc = get_pathname_option("Storage", "spam_cache")
            hc = get_pathname_option("Storage", "ham_cache")
            uc = get_pathname_option("Storage", "unknown_cache")
            map(storage.ensureDir, [sc, hc, uc])
            if self.gzipCache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"] * 24 * 60 * 60
            self.spamCorpus = ExpiryFileCorpus(age,
                                               factory,
                                               sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age,
                                              factory,
                                              hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age,
                                                  factory,
                                                  uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spamTrainer = storage.SpamTrainer(self.bayes)
            self.hamTrainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spamTrainer)
            self.hamCorpus.addObserver(self.hamTrainer)

    def getNewMessageName(self):
        # The message name is the time it arrived, with a uniquifier
        # appended if two arrive within one clock tick of each other.
        messageName = "%10.10d" % long(time.time())
        if messageName == self.lastBaseMessageName:
            messageName = "%s-%d" % (messageName, self.uniquifier)
            self.uniquifier += 1
        else:
            self.lastBaseMessageName = messageName
            self.uniquifier = 2
        return messageName

    def RecordClassification(self, cls, score):
        """Record the classification in the session statistics.

        cls should match one of the options["Headers", "header_*_string"]
        values.

        score is the score the message received.        
        """
        if cls == options["Headers", "header_ham_string"]:
            self.numHams += 1
        elif cls == options["Headers", "header_spam_string"]:
            self.numSpams += 1
        else:
            self.numUnsure += 1
        self.stats.RecordClassification(score)
Exemplo n.º 2
0
class CoreState:
    """This keeps the global state of the module - the command-line options,
    statistics like how many mails have been classified, the handle of the
    log file, the Classifier and FileCorpus objects, and so on."""
    def __init__(self):
        """Initialises the State object that holds the state of the app.
        The default settings are read from Options.py and bayescustomize.ini
        and are then overridden by the command-line processing code in the
        __main__ code below."""
        self.log_file = None
        self.bayes = None
        self.mutex = None
        self.prepared = False
        self.can_stop = True
        self.plugin = None
        self.last_base_message_name = ''
        self.uniquifier = 2
        self.numSpams = 0
        self.numHams = 0
        self.numUnsure = 0
        self.servers = ""
        self.ui_port = options["html_ui", "port"]
        self.launch_ui = options["html_ui", "launch_browser"]
        self.gzip_cache = options["Storage", "cache_use_gzip"]
        self.run_test_server = False
        self.is_test = False
        self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
        self.spam_trainer = self.ham_trainer = None
        self.init()
    def init(self):
        assert not self.prepared, "init after prepare, but before close"
        self.lang_manager = None
        if options["globals", "verbose"]:
            self.log_file = open('_core_server.log', 'wb', 0)
        self.reported_errors = {}
    def close(self):
        assert self.prepared, "closed without being prepared!"
        if self.bayes is not None:
            if self.bayes.nham != 0 and self.bayes.nspam != 0:
                self.bayes.store()
            self.bayes.close()
            self.bayes = None
        spambayes.message.Message().message_info_db = None
        self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
        self.spam_trainer = self.ham_trainer = None
        self.prepared = False
        self.close_platform_mutex()
    def prepare(self, can_stop=True):
        """Do whatever needs to be done to prepare for running.  If
        can_stop is False, then we may not let the user shut down the
        proxy - for example, running as a Windows service this should
        be the case."""
        self.init()
        assert self.mutex is None, "Should not already have the mutex"
        self.open_platform_mutex()
        self.can_stop = can_stop
        self.create_workers()
        self.prepared = True
    def build_status_strings(self):
        """Build the status message(s) to display on the home page of the
        web interface."""
        nspam = self.bayes.nspam
        nham = self.bayes.nham
        if nspam > 10 and nham > 10:
            db_ratio = nham/float(nspam)
            if db_ratio > 5.0:
                self.warning = _("Warning: you have much more ham than " \
                                 "spam - SpamBayes works best with " \
                                 "approximately even numbers of ham and " \
                                 "spam.")
            elif db_ratio < (1/5.0):
                self.warning = _("Warning: you have much more spam than " \
                                 "ham - SpamBayes works best with " \
                                 "approximately even numbers of ham and " \
                                 "spam.")
            else:
                self.warning = ""
        elif nspam > 0 or nham > 0:
            self.warning = _("Database only has %d good and %d spam - " \
                             "you should consider performing additional " \
                             "training.") % (nham, nspam)
        else:
            self.warning = _("Database has no training information.  " \
                             "SpamBayes will classify all messages as " \
                             "'unsure', ready for you to train.")
        spam_cut = options["Categorization", "spam_cutoff"]
        ham_cut = options["Categorization", "ham_cutoff"]
        if spam_cut < 0.5:
            self.warning += _("<br/>Warning: we do not recommend " \
                              "setting the spam threshold less than 0.5.")
        if ham_cut > 0.5:
            self.warning += _("<br/>Warning: we do not recommend " \
                              "setting the ham threshold greater than 0.5.")
        if ham_cut > spam_cut:
            self.warning += _("<br/>Warning: your ham threshold is " \
                              "<b>higher</b> than your spam threshold. " \
                              "Results are unpredictable.")
    def create_workers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        if self.is_test:
            self.use_db = "pickle"
            self.db_name = '_core_server.pickle'   # This is never saved.
        if not hasattr(self, "db_name"):
            self.db_name, self.use_db = storage.database_type([])
        self.bayes = storage.open_storage(self.db_name, self.use_db)
        self.stats = Stats.Stats(options,
                                 spambayes.message.Message().message_info_db)
        self.build_status_strings()
        if not self.is_test:
            sc = get_pathname_option("Storage", "core_spam_cache")
            hc = get_pathname_option("Storage", "core_ham_cache")
            uc = get_pathname_option("Storage", "core_unknown_cache")
            for d in [sc, hc, uc]:
                storage.ensureDir(d)
            if self.gzip_cache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()
            self.spam_trainer = storage.SpamTrainer(self.bayes)
            self.ham_trainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spam_trainer)
            self.hamCorpus.addObserver(self.ham_trainer)
    def getNewMessageName(self):
        """The message name is the time it arrived with a uniquifier
        appended if two arrive within one clock tick of each other.
        """
        message_name = "%10.10d" % long(time.time())
        if message_name == self.last_base_message_name:
            message_name = "%s-%d" % (message_name, self.uniquifier)
            self.uniquifier += 1
        else:
            self.last_base_message_name = message_name
            self.uniquifier = 2
        return message_name
    def record_classification(self, cls, score):
        """Record the classification in the session statistics.
        cls should match one of the options["Headers", "header_*_string"]
        values.
        score is the score the message received.        
        """
        if cls == options["Headers", "header_ham_string"]:
            self.numHams += 1
        elif cls == options["Headers", "header_spam_string"]:
            self.numSpams += 1
        else:
            self.numUnsure += 1
        self.stats.RecordClassification(score)
    def buildStatusStrings(self):
        return ""
    def recreate_state(self):
        if self.prepared:    
            self.close()
        state = CoreState()
        state.prepare()
        return state
    def open_platform_mutex(self, mutex_name="SpamBayesServer"):
        """Implementations of a mutex or other resource which can prevent
        multiple servers starting at once.  Platform specific as no
        reasonable cross-platform solution exists (however, an old trick is
        to use a directory for a mutex, as a create/test atomic API
        generally exists).  Will set self.mutex or may throw
        AlreadyRunningException
        """
        if sys.platform.startswith("win"):
            try:
                import win32event, win32api, winerror
                try:
                    hmutex = win32event.CreateMutex(None, True, mutex_name)
                except win32event.error, details:
                    if details[0] != winerror.ERROR_ACCESS_DENIED:
                        raise
                    raise AlreadyRunningException
                if win32api.GetLastError()==winerror.ERROR_ALREADY_EXISTS:
                    win32api.CloseHandle(hmutex)
                    raise AlreadyRunningException
                self.mutex = hmutex
                return
            except ImportError:
                pass
Exemplo n.º 3
0
class State:
    def __init__(self):
        """Initialises the State object that holds the state of the app.
        The default settings are read from Options.py and bayescustomize.ini
        and are then overridden by the command-line processing code in the
        __main__ code below."""
        self.logFile = None
        self.bayes = None
        self.platform_mutex = None
        self.prepared = False
        self.can_stop = True
        self.init()

        # Load up the other settings from Option.py / bayescustomize.ini
        self.uiPort = options["html_ui", "port"]
        self.launchUI = options["html_ui", "launch_browser"]
        self.gzipCache = options["Storage", "cache_use_gzip"]
        self.cacheExpiryDays = options["Storage", "cache_expiry_days"]
        self.runTestServer = False
        self.isTest = False

    def init(self):
        assert not self.prepared, "init after prepare, but before close"
        # Load the environment for translation.
        self.lang_manager = i18n.LanguageManager()
        # Set the system user default language.
        self.lang_manager.set_language(\
            self.lang_manager.locale_default_lang())
        # Set interface to use the user language in the configuration file.
        for language in reversed(options["globals", "language"]):
            # We leave the default in there as the last option, to fall
            # back on if necessary.
            self.lang_manager.add_language(language)
        if options["globals", "verbose"]:
            print "Asked to add languages: " + \
                  ", ".join(options["globals", "language"])
            print "Set language to " + \
                  str(self.lang_manager.current_langs_codes)

        # Open the log file.
        if options["globals", "verbose"]:
            self.logFile = open('_pop3proxy.log', 'wb', 0)

        if not hasattr(self, "servers"):
            # Could have already been set via the command line.
            self.servers = []
            if options["pop3proxy", "remote_servers"]:
                for server in options["pop3proxy", "remote_servers"]:
                    server = server.strip()
                    if server.find(':') > -1:
                        server, port = server.split(':', 1)
                    else:
                        port = '110'
                    self.servers.append((server, int(port)))

        if not hasattr(self, "proxyPorts"):
            # Could have already been set via the command line.
            self.proxyPorts = []
            if options["pop3proxy", "listen_ports"]:
                splitPorts = options["pop3proxy", "listen_ports"]
                self.proxyPorts = map(_addressAndPort, splitPorts)

        if len(self.servers) != len(self.proxyPorts):
            print "pop3proxy_servers & pop3proxy_ports are different lengths!"
            sys.exit()

        # Remember reported errors.
        self.reported_errors = {}

        # Set up the statistics.
        self.totalSessions = 0
        self.activeSessions = 0
        self.numSpams = 0
        self.numHams = 0
        self.numUnsure = 0

        # Unique names for cached messages - see `getNewMessageName()` below.
        self.lastBaseMessageName = ''
        self.uniquifier = 2

    def close(self):
        assert self.prepared, "closed without being prepared!"
        self.servers = None
        if self.bayes is not None:
            # Only store a non-empty db.
            if self.bayes.nham != 0 and self.bayes.nspam != 0:
                state.bayes.store()
            self.bayes.close()
            self.bayes = None
        if self.mdb is not None:
            self.mdb.store()
            self.mdb.close()
            self.mdb = None
            spambayes.message.Message().message_info_db = None

        self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
        self.spamTrainer = self.hamTrainer = None

        self.prepared = False
        close_platform_mutex(self.platform_mutex)
        self.platform_mutex = None

    def prepare(self, can_stop=True):
        """Do whatever needs to be done to prepare for running.  If
        can_stop is False, then we may not let the user shut down the
        proxy - for example, running as a Windows service this should
        be the case."""
        # If we can, prevent multiple servers from running at the same time.
        assert self.platform_mutex is None, "Should not already have the mutex"
        self.platform_mutex = open_platform_mutex()

        self.can_stop = can_stop

        # Do whatever we've been asked to do...
        self.createWorkers()
        self.prepared = True

    def buildServerStrings(self):
        """After the server details have been set up, this creates string
        versions of the details, for display in the Status panel."""
        serverStrings = ["%s:%s" % (s, p) for s, p in self.servers]
        self.serversString = ', '.join(serverStrings)
        self.proxyPortsString = ', '.join(map(_addressPortStr, self.proxyPorts))

    def buildStatusStrings(self):
        """Build the status message(s) to display on the home page of the
        web interface."""
        nspam = self.bayes.nspam
        nham = self.bayes.nham
        if nspam > 10 and nham > 10:
            db_ratio = nham/float(nspam)
            if db_ratio > 5.0:
                self.warning = _("Warning: you have much more ham than " \
                                 "spam - SpamBayes works best with " \
                                 "approximately even numbers of ham and " \
                                 "spam.")
            elif db_ratio < (1/5.0):
                self.warning = _("Warning: you have much more spam than " \
                                 "ham - SpamBayes works best with " \
                                 "approximately even numbers of ham and " \
                                 "spam.")
            else:
                self.warning = ""
        elif nspam > 0 or nham > 0:
            self.warning = _("Database only has %d good and %d spam - " \
                             "you should consider performing additional " \
                             "training.") % (nham, nspam)
        else:
            self.warning = _("Database has no training information.  " \
                             "SpamBayes will classify all messages as " \
                             "'unsure', ready for you to train.")
        # Add an additional warning message if the user's thresholds are
        # truly odd.
        spam_cut = options["Categorization", "spam_cutoff"]
        ham_cut = options["Categorization", "ham_cutoff"]
        if spam_cut < 0.5:
            self.warning += _("<br/>Warning: we do not recommend " \
                              "setting the spam threshold less than 0.5.")
        if ham_cut > 0.5:
            self.warning += _("<br/>Warning: we do not recommend " \
                              "setting the ham threshold greater than 0.5.")
        if ham_cut > spam_cut:
            self.warning += _("<br/>Warning: your ham threshold is " \
                              "<b>higher</b> than your spam threshold. " \
                              "Results are unpredictable.")

    def createWorkers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        print "Loading database...",
        if self.isTest:
            self.useDB = "pickle"
            self.DBName = '_pop3proxy_test.pickle'   # This is never saved.
        if not hasattr(self, "DBName"):
            self.DBName, self.useDB = storage.database_type([])
        self.bayes = storage.open_storage(self.DBName, self.useDB)
        self.mdb = spambayes.message.Message().message_info_db

        # Load stats manager.
        self.stats = Stats.Stats(options, self.mdb)

        self.buildStatusStrings()

        # Don't set up the caches and training objects when running the self-test,
        # so as not to clutter the filesystem.
        if not self.isTest:
            # Create/open the Corpuses.  Use small cache sizes to avoid hogging
            # lots of memory.
            sc = get_pathname_option("Storage", "spam_cache")
            hc = get_pathname_option("Storage", "ham_cache")
            uc = get_pathname_option("Storage", "unknown_cache")
            map(storage.ensureDir, [sc, hc, uc])
            if self.gzipCache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spamTrainer = storage.SpamTrainer(self.bayes)
            self.hamTrainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spamTrainer)
            self.hamCorpus.addObserver(self.hamTrainer)

    def getNewMessageName(self):
        # The message name is the time it arrived, with a uniquifier
        # appended if two arrive within one clock tick of each other.
        messageName = "%10.10d" % long(time.time())
        if messageName == self.lastBaseMessageName:
            messageName = "%s-%d" % (messageName, self.uniquifier)
            self.uniquifier += 1
        else:
            self.lastBaseMessageName = messageName
            self.uniquifier = 2
        return messageName

    def RecordClassification(self, cls, score):
        """Record the classification in the session statistics.

        cls should match one of the options["Headers", "header_*_string"]
        values.

        score is the score the message received.        
        """
        if cls == options["Headers", "header_ham_string"]:
            self.numHams += 1
        elif cls == options["Headers", "header_spam_string"]:
            self.numSpams += 1
        else:
            self.numUnsure += 1
        self.stats.RecordClassification(score)
Exemplo n.º 4
0
class CoreState:
    """This keeps the global state of the module - the command-line options,
    statistics like how many mails have been classified, the handle of the
    log file, the Classifier and FileCorpus objects, and so on."""

    def __init__(self):
        """Initialises the State object that holds the state of the app.
        The default settings are read from Options.py and bayescustomize.ini
        and are then overridden by the command-line processing code in the
        __main__ code below."""
        self.log_file = None
        self.bayes = None
        self.mutex = None
        self.prepared = False
        self.can_stop = True
        self.plugin = None

        # Unique names for cached messages - see `getNewMessageName()` below.
        self.last_base_message_name = ''
        self.uniquifier = 2

        # Set up the statistics.
        self.numSpams = 0
        self.numHams = 0
        self.numUnsure = 0

        self.servers = ""

        # Load up the other settings from Option.py / bayescustomize.ini
        self.ui_port = options["html_ui", "port"]
        self.launch_ui = options["html_ui", "launch_browser"]
        self.gzip_cache = options["Storage", "cache_use_gzip"]
        self.run_test_server = False
        self.is_test = False

        self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
        self.spam_trainer = self.ham_trainer = None

        self.init()

    def init(self):
        assert not self.prepared, "init after prepare, but before close"
## no i18n yet...
##         # Load the environment for translation.
##         self.lang_manager = i18n.LanguageManager()
##         # Set the system user default language.
##         self.lang_manager.set_language(\
##             self.lang_manager.locale_default_lang())
##         # Set interface to use the user language in the configuration file.
##         for language in reversed(options["globals", "language"]):
##             # We leave the default in there as the last option, to fall
##             # back on if necessary.
##             self.lang_manager.add_language(language)
##         if options["globals", "verbose"]:
##             print "Asked to add languages: " + \
##                   ", ".join(options["globals", "language"])
##             print "Set language to " + \
##                   str(self.lang_manager.current_langs_codes)
        self.lang_manager = None

        # Open the log file.
        if options["globals", "verbose"]:
            self.log_file = open('_core_server.log', 'wb', 0)

        # Remember reported errors.
        self.reported_errors = {}

    def close(self):
        assert self.prepared, "closed without being prepared!"
        if self.bayes is not None:
            # Only store a non-empty db.
            if self.bayes.nham != 0 and self.bayes.nspam != 0:
                self.bayes.store()
            self.bayes.close()
            self.bayes = None
        spambayes.message.Message().message_info_db = None

        self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
        self.spam_trainer = self.ham_trainer = None

        self.prepared = False
        self.close_platform_mutex()

    def prepare(self, can_stop=True):
        """Do whatever needs to be done to prepare for running.  If
        can_stop is False, then we may not let the user shut down the
        proxy - for example, running as a Windows service this should
        be the case."""

        self.init()
        # If we can, prevent multiple servers from running at the same time.
        assert self.mutex is None, "Should not already have the mutex"
        self.open_platform_mutex()

        self.can_stop = can_stop

        # Do whatever we've been asked to do...
        self.create_workers()
        self.prepared = True

    def build_status_strings(self):
        """Build the status message(s) to display on the home page of the
        web interface."""
        nspam = self.bayes.nspam
        nham = self.bayes.nham
        if nspam > 10 and nham > 10:
            db_ratio = nham/float(nspam)
            if db_ratio > 5.0:
                self.warning = _("Warning: you have much more ham than " \
                                 "spam - SpamBayes works best with " \
                                 "approximately even numbers of ham and " \
                                 "spam.")
            elif db_ratio < (1/5.0):
                self.warning = _("Warning: you have much more spam than " \
                                 "ham - SpamBayes works best with " \
                                 "approximately even numbers of ham and " \
                                 "spam.")
            else:
                self.warning = ""
        elif nspam > 0 or nham > 0:
            self.warning = _("Database only has %d good and %d spam - " \
                             "you should consider performing additional " \
                             "training.") % (nham, nspam)
        else:
            self.warning = _("Database has no training information.  " \
                             "SpamBayes will classify all messages as " \
                             "'unsure', ready for you to train.")
        # Add an additional warning message if the user's thresholds are
        # truly odd.
        spam_cut = options["Categorization", "spam_cutoff"]
        ham_cut = options["Categorization", "ham_cutoff"]
        if spam_cut < 0.5:
            self.warning += _("<br/>Warning: we do not recommend " \
                              "setting the spam threshold less than 0.5.")
        if ham_cut > 0.5:
            self.warning += _("<br/>Warning: we do not recommend " \
                              "setting the ham threshold greater than 0.5.")
        if ham_cut > spam_cut:
            self.warning += _("<br/>Warning: your ham threshold is " \
                              "<b>higher</b> than your spam threshold. " \
                              "Results are unpredictable.")

    def create_workers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        if self.is_test:
            self.use_db = "pickle"
            self.db_name = '_core_server.pickle'   # This is never saved.
        if not hasattr(self, "db_name"):
            self.db_name, self.use_db = storage.database_type([])
        self.bayes = storage.open_storage(self.db_name, self.use_db)

        # Load stats manager.
        self.stats = Stats.Stats(options,
                                 spambayes.message.Message().message_info_db)

        self.build_status_strings()

        # Don't set up the caches and training objects when running the
        # self-test, so as not to clutter the filesystem.
        if not self.is_test:
            # Create/open the Corpuses.  Use small cache sizes to avoid
            # hogging lots of memory.
            sc = get_pathname_option("Storage", "core_spam_cache")
            hc = get_pathname_option("Storage", "core_ham_cache")
            uc = get_pathname_option("Storage", "core_unknown_cache")
            for d in [sc, hc, uc]:
                storage.ensureDir(d)
            if self.gzip_cache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spam_trainer = storage.SpamTrainer(self.bayes)
            self.ham_trainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spam_trainer)
            self.hamCorpus.addObserver(self.ham_trainer)

    def getNewMessageName(self):
        """The message name is the time it arrived with a uniquifier
        appended if two arrive within one clock tick of each other.
        """
        message_name = "%10.10d" % long(time.time())
        if message_name == self.last_base_message_name:
            message_name = "%s-%d" % (message_name, self.uniquifier)
            self.uniquifier += 1
        else:
            self.last_base_message_name = message_name
            self.uniquifier = 2
        return message_name

    def record_classification(self, cls, score):
        """Record the classification in the session statistics.

        cls should match one of the options["Headers", "header_*_string"]
        values.

        score is the score the message received.        
        """
        if cls == options["Headers", "header_ham_string"]:
            self.numHams += 1
        elif cls == options["Headers", "header_spam_string"]:
            self.numSpams += 1
        else:
            self.numUnsure += 1
        self.stats.RecordClassification(score)

    def buildStatusStrings(self):
        return ""

    def recreate_state(self):
        if self.prepared:    
            # Close the state (which saves if necessary)
            self.close()
        # And get a new one going.
        state = CoreState()

        state.prepare()
        return state

    def open_platform_mutex(self, mutex_name="SpamBayesServer"):
        """Implementations of a mutex or other resource which can prevent
        multiple servers starting at once.  Platform specific as no
        reasonable cross-platform solution exists (however, an old trick is
        to use a directory for a mutex, as a create/test atomic API
        generally exists).  Will set self.mutex or may throw
        AlreadyRunningException
        """

        if sys.platform.startswith("win"):
            try:
                import win32event, win32api, winerror
                # ideally, the mutex name could include either the username,
                # or the munged path to the INI file - this would mean we
                # would allow multiple starts so long as they weren't for
                # the same user.  However, as of now, the service version
                # is likely to start as a different user, so a single mutex
                # is best for now.
                # XXX - even if we do get clever with another mutex name, we
                # should consider still creating a non-exclusive
                # "SpamBayesServer" mutex, if for no better reason than so
                # an installer can check if we are running
                try:
                    hmutex = win32event.CreateMutex(None, True, mutex_name)
                except win32event.error as details:
                    # If another user has the mutex open, we get an "access
                    # denied" error - this is still telling us what we need
                    # to know.
                    if details[0] != winerror.ERROR_ACCESS_DENIED:
                        raise
                    raise AlreadyRunningException
                # mutex opened - now check if we actually created it.
                if win32api.GetLastError()==winerror.ERROR_ALREADY_EXISTS:
                    win32api.CloseHandle(hmutex)
                    raise AlreadyRunningException
                self.mutex = hmutex
                return
            except ImportError:
                # no win32all - no worries, just start
                pass
        self.mutex = None

    def close_platform_mutex(self):
        """Toss out the current mutex."""
        if sys.platform.startswith("win"):
            if self.mutex is not None:
                self.mutex.Close()
        self.mutex = None
Exemplo n.º 5
0
class State:
    def __init__(self):
        """Initialises the State object that holds the state of the app.
        The default settings are read from Options.py and bayescustomize.ini
        and are then overridden by the command-line processing code in the
        __main__ code below."""
        self.logFile = None
        self.bayes = None
        self.platform_mutex = None
        self.prepared = False
        self.init()

        # Load up the other settings from Option.py / bayescustomize.ini
        self.uiPort = options["html_ui", "port"]
        self.launchUI = options["html_ui", "launch_browser"]
        self.gzipCache = options["Storage", "cache_use_gzip"]
        self.cacheExpiryDays = options["Storage", "cache_expiry_days"]
        self.runTestServer = False
        self.isTest = False

    def init(self):
        assert not self.prepared, "init after prepare, but before close"
        # Open the log file.
        if options["globals", "verbose"]:
            self.logFile = open('_pop3proxy.log', 'wb', 0)
        self.servers = []
        self.proxyPorts = []
        if options["pop3proxy", "remote_servers"]:
            for server in options["pop3proxy", "remote_servers"]:
                server = server.strip()
                if server.find(':') > -1:
                    server, port = server.split(':', 1)
                else:
                    port = '110'
                self.servers.append((server, int(port)))

        if options["pop3proxy", "listen_ports"]:
            splitPorts = options["pop3proxy", "listen_ports"]
            self.proxyPorts = map(_addressAndPort, splitPorts)

        if len(self.servers) != len(self.proxyPorts):
            print "pop3proxy_servers & pop3proxy_ports are different lengths!"
            sys.exit()

        # Remember reported errors.
        self.reported_errors = {}

        # Set up the statistics.
        self.totalSessions = 0
        self.activeSessions = 0
        self.numSpams = 0
        self.numHams = 0
        self.numUnsure = 0

        # Unique names for cached messages - see `getNewMessageName()` below.
        self.lastBaseMessageName = ''
        self.uniquifier = 2

    def close(self):
        assert self.prepared, "closed without being prepared!"
        self.servers = None
        if self.bayes is not None:
            # Only store a non-empty db.
            if self.bayes.nham != 0 and self.bayes.nspam != 0:
                state.bayes.store()
            self.bayes.close()
            self.bayes = None

        self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
        self.spamTrainer = self.hamTrainer = None

        self.prepared = False
        close_platform_mutex(self.platform_mutex)
        self.platform_mutex = None

    def prepare(self):
        # If we can, prevent multiple servers from running at the same time.
        assert self.platform_mutex is None, "Should not already have the mutex"
        self.platform_mutex = open_platform_mutex()

        # Do whatever we've been asked to do...
        self.createWorkers()
        self.prepared = True

    def buildServerStrings(self):
        """After the server details have been set up, this creates string
        versions of the details, for display in the Status panel."""
        serverStrings = ["%s:%s" % (s, p) for s, p in self.servers]
        self.serversString = ', '.join(serverStrings)
        self.proxyPortsString = ', '.join(map(_addressPortStr, self.proxyPorts))

    def buildStatusStrings(self):
        """Build the status message(s) to display on the home page of the
        web interface."""
        nspam = self.bayes.nspam
        nham = self.bayes.nham
        if nspam > 10 and nham > 10:
            db_ratio = nham/float(nspam)
            big = small = None
            if db_ratio > 5.0:
                big = "ham"
                small = "spam"
            elif db_ratio < (1/5.0):
                big = "spam"
                small = "ham"
            if big is not None:
                self.warning = "Warning: you have much more %s than %s - " \
                               "SpamBayes works best with approximately even " \
                               "numbers of ham and spam." % (big, small)
            else:
                self.warning = ""
        elif nspam > 0 or nham > 0:
            self.warning = "Database only has %d good and %d spam - you should " \
                           "consider performing additional training." % (nham, nspam)
        else:
            self.warning = "Database has no training information.  SpamBayes " \
                           "will classify all messages as 'unsure', " \
                           "ready for you to train."

    def createWorkers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        print "Loading database...",
        if self.isTest:
            self.useDB = "pickle"
            self.DBName = '_pop3proxy_test.pickle'   # This is never saved.
        if not hasattr(self, "DBName"):
            self.DBName, self.useDB = storage.database_type([])
        self.bayes = storage.open_storage(self.DBName, self.useDB)
        
        self.buildStatusStrings()

        # Don't set up the caches and training objects when running the self-test,
        # so as not to clutter the filesystem.
        if not self.isTest:
            def ensureDir(dirname):
                try:
                    os.mkdir(dirname)
                except OSError, e:
                    if e.errno != errno.EEXIST:
                        raise

            # Create/open the Corpuses.  Use small cache sizes to avoid hogging
            # lots of memory.
            sc = get_pathname_option("Storage", "spam_cache")
            hc = get_pathname_option("Storage", "ham_cache")
            uc = get_pathname_option("Storage", "unknown_cache")
            map(ensureDir, [sc, hc, uc])
            if self.gzipCache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spamTrainer = storage.SpamTrainer(self.bayes)
            self.hamTrainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spamTrainer)
            self.hamCorpus.addObserver(self.hamTrainer)
Exemplo n.º 6
0
class CoreState:
    """This keeps the global state of the module - the command-line options,
    statistics like how many mails have been classified, the handle of the
    log file, the Classifier and FileCorpus objects, and so on."""
    def __init__(self):
        """Initialises the State object that holds the state of the app.
        The default settings are read from Options.py and bayescustomize.ini
        and are then overridden by the command-line processing code in the
        __main__ code below."""
        self.log_file = None
        self.bayes = None
        self.mutex = None
        self.prepared = False
        self.can_stop = True
        self.plugin = None

        # Unique names for cached messages - see `getNewMessageName()` below.
        self.last_base_message_name = ''
        self.uniquifier = 2

        # Set up the statistics.
        self.numSpams = 0
        self.numHams = 0
        self.numUnsure = 0

        self.servers = ""

        # Load up the other settings from Option.py / bayescustomize.ini
        self.ui_port = options["html_ui", "port"]
        self.launch_ui = options["html_ui", "launch_browser"]
        self.gzip_cache = options["Storage", "cache_use_gzip"]
        self.run_test_server = False
        self.is_test = False

        self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
        self.spam_trainer = self.ham_trainer = None

        self.init()

    def init(self):
        assert not self.prepared, "init after prepare, but before close"
        ## no i18n yet...
        ##         # Load the environment for translation.
        ##         self.lang_manager = i18n.LanguageManager()
        ##         # Set the system user default language.
        ##         self.lang_manager.set_language(\
        ##             self.lang_manager.locale_default_lang())
        ##         # Set interface to use the user language in the configuration file.
        ##         for language in reversed(options["globals", "language"]):
        ##             # We leave the default in there as the last option, to fall
        ##             # back on if necessary.
        ##             self.lang_manager.add_language(language)
        ##         if options["globals", "verbose"]:
        ##             print "Asked to add languages: " + \
        ##                   ", ".join(options["globals", "language"])
        ##             print "Set language to " + \
        ##                   str(self.lang_manager.current_langs_codes)
        self.lang_manager = None

        # Open the log file.
        if options["globals", "verbose"]:
            self.log_file = open('_core_server.log', 'wb', 0)

        # Remember reported errors.
        self.reported_errors = {}

    def close(self):
        assert self.prepared, "closed without being prepared!"
        if self.bayes is not None:
            # Only store a non-empty db.
            if self.bayes.nham != 0 and self.bayes.nspam != 0:
                self.bayes.store()
            self.bayes.close()
            self.bayes = None
        spambayes.message.Message().message_info_db = None

        self.spamCorpus = self.hamCorpus = self.unknownCorpus = None
        self.spam_trainer = self.ham_trainer = None

        self.prepared = False
        self.close_platform_mutex()

    def prepare(self, can_stop=True):
        """Do whatever needs to be done to prepare for running.  If
        can_stop is False, then we may not let the user shut down the
        proxy - for example, running as a Windows service this should
        be the case."""

        self.init()
        # If we can, prevent multiple servers from running at the same time.
        assert self.mutex is None, "Should not already have the mutex"
        self.open_platform_mutex()

        self.can_stop = can_stop

        # Do whatever we've been asked to do...
        self.create_workers()
        self.prepared = True

    def build_status_strings(self):
        """Build the status message(s) to display on the home page of the
        web interface."""
        nspam = self.bayes.nspam
        nham = self.bayes.nham
        if nspam > 10 and nham > 10:
            db_ratio = nham / float(nspam)
            if db_ratio > 5.0:
                self.warning = _("Warning: you have much more ham than " \
                                 "spam - SpamBayes works best with " \
                                 "approximately even numbers of ham and " \
                                 "spam.")
            elif db_ratio < (1 / 5.0):
                self.warning = _("Warning: you have much more spam than " \
                                 "ham - SpamBayes works best with " \
                                 "approximately even numbers of ham and " \
                                 "spam.")
            else:
                self.warning = ""
        elif nspam > 0 or nham > 0:
            self.warning = _("Database only has %d good and %d spam - " \
                             "you should consider performing additional " \
                             "training.") % (nham, nspam)
        else:
            self.warning = _("Database has no training information.  " \
                             "SpamBayes will classify all messages as " \
                             "'unsure', ready for you to train.")
        # Add an additional warning message if the user's thresholds are
        # truly odd.
        spam_cut = options["Categorization", "spam_cutoff"]
        ham_cut = options["Categorization", "ham_cutoff"]
        if spam_cut < 0.5:
            self.warning += _("<br/>Warning: we do not recommend " \
                              "setting the spam threshold less than 0.5.")
        if ham_cut > 0.5:
            self.warning += _("<br/>Warning: we do not recommend " \
                              "setting the ham threshold greater than 0.5.")
        if ham_cut > spam_cut:
            self.warning += _("<br/>Warning: your ham threshold is " \
                              "<b>higher</b> than your spam threshold. " \
                              "Results are unpredictable.")

    def create_workers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        if self.is_test:
            self.use_db = "pickle"
            self.db_name = '_core_server.pickle'  # This is never saved.
        if not hasattr(self, "db_name"):
            self.db_name, self.use_db = storage.database_type([])
        self.bayes = storage.open_storage(self.db_name, self.use_db)

        # Load stats manager.
        self.stats = Stats.Stats(options,
                                 spambayes.message.Message().message_info_db)

        self.build_status_strings()

        # Don't set up the caches and training objects when running the
        # self-test, so as not to clutter the filesystem.
        if not self.is_test:
            # Create/open the Corpuses.  Use small cache sizes to avoid
            # hogging lots of memory.
            sc = get_pathname_option("Storage", "core_spam_cache")
            hc = get_pathname_option("Storage", "core_ham_cache")
            uc = get_pathname_option("Storage", "core_unknown_cache")
            for d in [sc, hc, uc]:
                storage.ensureDir(d)
            if self.gzip_cache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"] * 24 * 60 * 60
            self.spamCorpus = ExpiryFileCorpus(age,
                                               factory,
                                               sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age,
                                              factory,
                                              hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age,
                                                  factory,
                                                  uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spam_trainer = storage.SpamTrainer(self.bayes)
            self.ham_trainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spam_trainer)
            self.hamCorpus.addObserver(self.ham_trainer)

    def getNewMessageName(self):
        """The message name is the time it arrived with a uniquifier
        appended if two arrive within one clock tick of each other.
        """
        message_name = "%10.10d" % long(time.time())
        if message_name == self.last_base_message_name:
            message_name = "%s-%d" % (message_name, self.uniquifier)
            self.uniquifier += 1
        else:
            self.last_base_message_name = message_name
            self.uniquifier = 2
        return message_name

    def record_classification(self, cls, score):
        """Record the classification in the session statistics.

        cls should match one of the options["Headers", "header_*_string"]
        values.

        score is the score the message received.        
        """
        if cls == options["Headers", "header_ham_string"]:
            self.numHams += 1
        elif cls == options["Headers", "header_spam_string"]:
            self.numSpams += 1
        else:
            self.numUnsure += 1
        self.stats.RecordClassification(score)

    def buildStatusStrings(self):
        return ""

    def recreate_state(self):
        if self.prepared:
            # Close the state (which saves if necessary)
            self.close()
        # And get a new one going.
        state = CoreState()

        state.prepare()
        return state

    def open_platform_mutex(self, mutex_name="SpamBayesServer"):
        """Implementations of a mutex or other resource which can prevent
        multiple servers starting at once.  Platform specific as no
        reasonable cross-platform solution exists (however, an old trick is
        to use a directory for a mutex, as a create/test atomic API
        generally exists).  Will set self.mutex or may throw
        AlreadyRunningException
        """

        if sys.platform.startswith("win"):
            try:
                import win32event, win32api, winerror
                # ideally, the mutex name could include either the username,
                # or the munged path to the INI file - this would mean we
                # would allow multiple starts so long as they weren't for
                # the same user.  However, as of now, the service version
                # is likely to start as a different user, so a single mutex
                # is best for now.
                # XXX - even if we do get clever with another mutex name, we
                # should consider still creating a non-exclusive
                # "SpamBayesServer" mutex, if for no better reason than so
                # an installer can check if we are running
                try:
                    hmutex = win32event.CreateMutex(None, True, mutex_name)
                except win32event.error, details:
                    # If another user has the mutex open, we get an "access
                    # denied" error - this is still telling us what we need
                    # to know.
                    if details[0] != winerror.ERROR_ACCESS_DENIED:
                        raise
                    raise AlreadyRunningException
                # mutex opened - now check if we actually created it.
                if win32api.GetLastError() == winerror.ERROR_ALREADY_EXISTS:
                    win32api.CloseHandle(hmutex)
                    raise AlreadyRunningException
                self.mutex = hmutex
                return
            except ImportError:
                # no win32all - no worries, just start
                pass