示例#1
0
 def __init__(self, name='Classifier', C=1, **kwargs):
     self.C = C
     self.stopper = Stopper(**kwargs)
     self.w = None
     self.n = None
     self.p = None
     self.X = None
     self.y = None
     self.name = name
示例#2
0
    def __init__(self, rootPath="", inputFolder=""):
        self.metadata = Metadata()

        self.stopper = Stopper()
        stopwords_folder = os.path.join(rootPath, "stopwords")
        print("Preprocessor root path: ", rootPath)
        self.stopper.load_stopwords(stopwords_folder)

        self.normalizer_tokenizer = NormalizationTokenization()
        self.stemmer = Stemmer()

        self.p1_path = ""
        self.p2_path = ""
        self.p3_path = ""

        self.rootPath = rootPath
        self.inputFolder = inputFolder
示例#3
0
文件: power.py 项目: Sch-Tomi/PowerPy
    def __init__(self, pidfile):

        super().__init__(pidfile)

        self.observedPorts = {"TCP": [], "UDP": []}
        self.observationInterval = None
        self.inactiveTime = None
        self.haltCommand = None
        self.debug = None

        self.debugLogger = None
        self.uptimeLogger = None

        self.__setUpLoggers()
        self.__readConf()

        self.stopper = Stopper()
示例#4
0
#!/usr/bin/python
import rospy, sys
from stopper import Stopper

if __name__ == "__main__":
    rospy.init_node("stopper_node", argv=sys.argv)
    forward_speed = 0.5
    rotate_speed = 0.5
    if rospy.has_param('~forward_speed'):
        forward_speed = rospy.get_param('~forward_speed')
    if rospy.has_param('~angular_speed'):
        rotate_speed = rospy.get_param('~angular_speed')
    if rospy.has_param('~end_point_x'):
        end_point_x = rospy.get_param('~end_point_x')
    if rospy.has_param('~end_point_y'):
        end_point_y = rospy.get_param('~end_point_y')
    my_stopper = Stopper(forward_speed, rotate_speed, end_point_x, end_point_y)
    my_stopper.start_moving()
示例#5
0
import logging
from stopper import Stopper

logger = logging.getLogger()
logger.setLevel(logging.INFO)
# logger.setLevel(logging.DEBUG)

stopper = Stopper()

# This is the method that will be registered
# with Lambda and run on a schedule
def handler(event={}, context={}):
  if 'ping' in event:
    logger.info('pong')
    return {'message': 'pong'}

  stopper.run()

# If being called locally, just call handler
if __name__ == '__main__':
  import os
  import json
  import sys

  logging.basicConfig()
  event = {}

  # TODO if argv[1], read contents, parse into json
  if len(sys.argv) > 1:
    input_file = sys.argv[1]
    with open(input_file, 'r') as f:
示例#6
0
文件: bee.py 项目: patmha/CiTOCrawler
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
# SOFTWARE.

__author__ = 'essepuntato'

from conf_bee import *
from stopper import Stopper
import traceback
from datetime import datetime
from epmc_processor import EuropeanPubMedCentralProcessor
import os

start_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
exception_string = None
try:
    epmc = EuropeanPubMedCentralProcessor(
        stored_file, reference_dir, error_dir, pagination_file, Stopper(reference_dir), p_size=page_size)
    epmc.process(True)
except Exception as e:
    exception_string = str(e) + " " + traceback.format_exc().rstrip("\n+")

end_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

if exception_string is not None:
    print exception_string
    if not os.path.exists(error_dir):
        os.makedirs(error_dir)
    with open(error_dir + end_time.replace(":", "-") + ".err", "w") as f:
        f.write(exception_string)

print "\nStarted at:\t%s\nEnded at:\t%s" % (start_time, end_time)
示例#7
0
class Preprocessor:
    def __init__(self, rootPath="", inputFolder=""):
        self.metadata = Metadata()

        self.stopper = Stopper()
        stopwords_folder = os.path.join(rootPath, "stopwords")
        print("Preprocessor root path: ", rootPath)
        self.stopper.load_stopwords(stopwords_folder)

        self.normalizer_tokenizer = NormalizationTokenization()
        self.stemmer = Stemmer()

        self.p1_path = ""
        self.p2_path = ""
        self.p3_path = ""

        self.rootPath = rootPath
        self.inputFolder = inputFolder

    def prepare_output_folders(self):
        self.p1_path = os.path.join(self.rootPath, outputFolder,
                                    practice_1_output_folder)
        self.p2_path = os.path.join(self.rootPath, outputFolder,
                                    practice_2_output_folder)
        self.p3_path = os.path.join(self.rootPath, outputFolder,
                                    practice_3_output_folder)
        if not os.path.exists(self.p1_path):
            os.makedirs(self.p1_path)
        if not os.path.exists(self.p2_path):
            os.makedirs(self.p2_path)
        if not os.path.exists(self.p3_path):
            os.makedirs(self.p3_path)

    def preprocess_text(self, text):
        '''
        This method does the preprocessing of the given text and returns
        the list of the processed tokens.
        '''
        token_list = self.normalizer_tokenizer.process_text(text)
        tokens_without_stopwords = self.stopper.remove_stopwords(token_list)
        tokens_stems_only = self.stemmer.get_stems(tokens_without_stopwords)
        return tokens_stems_only

    def preprocess(self, generate_metadata=False, generate_output_files=False):
        '''
        This method does all the preprocessing of all the files in the system
        '''
        self.prepare_output_folders()

        start_time = timer()
        inputPath = os.path.join(self.rootPath, self.inputFolder)
        for file in os.listdir(inputPath):
            fileName = os.fsdecode(file)

            ### <Practice 1>
            htmlFilter = HtmlFilter(inputPath, fileName)
            text = htmlFilter.filter_html()

            token_list = self.normalizer_tokenizer.process_text(text)

            txtFileName = fileName.replace('.html', '.txt')
            if generate_output_files:
                full_path = os.path.join(self.p1_path, txtFileName)
                self.write_string_list_to_file(full_path, token_list)
            ### </Practice 1>

            ### <Practice 2>
            tokens_without_stopwords = self.stopper.remove_stopwords(
                token_list)
            if generate_output_files:
                full_path = os.path.join(self.p2_path, txtFileName)
                self.write_string_list_to_file(full_path,
                                               tokens_without_stopwords)
            ### </Practice 2>

            ### <Practice 3>
            tokens_stems_only = self.stemmer.get_stems(
                tokens_without_stopwords)
            full_path = os.path.join(self.p3_path, txtFileName)
            self.write_string_list_to_file(full_path, tokens_stems_only)
            ### </Practice 3>

            if (generate_metadata):
                self.metadata.practice1_metadata(token_list)
                self.metadata.num_of_files += 1
                self.metadata.practice2_metadata(tokens_without_stopwords)
                self.metadata.practice3_metadata(tokens_stems_only)

        if (generate_metadata):
            self.metadata.final_metadata()

            self.metadata.avg_tokens_per_file = self.metadata.num_of_tokens / self.metadata.num_of_files
            self.metadata.avg_num_of_words_per_file = self.metadata.num_of_words_after_removing_stopwords / self.metadata.num_of_files
            self.metadata.avg_words_stemming = self.metadata.num_of_words_after_stemming / self.metadata.num_of_files
            end_time = timer()
            self.metadata.time_needed = str(end_time - start_time)

            self.metadata.print_metadata()

    def write_string_list_to_file(self, path, string_list):
        with open(path, 'w+') as file:
            file.write('\n'.join(string_list))
示例#8
0
文件: power.py 项目: Sch-Tomi/PowerPy
class PowerPy(daemon):

    def __init__(self, pidfile):

        super().__init__(pidfile)

        self.observedPorts = {"TCP": [], "UDP": []}
        self.observationInterval = None
        self.inactiveTime = None
        self.haltCommand = None
        self.debug = None

        self.debugLogger = None
        self.uptimeLogger = None

        self.__setUpLoggers()
        self.__readConf()

        self.stopper = Stopper()

    def run(self):
        while True:
            if self.debug:
                self.__printDebug()
            
            isConnectionActive = self.__examinePorts(self.__getActivePorts())

            if isConnectionActive:
                self.stopper.reset()
            else:
                self.stopper.progress()
                if self.stopper.timeSpent > self.inactiveTime:
                    self.uptimeLogger.info("POWER DOWN")
                    self.__runCommand(self.haltCommand)
                    self.stopper.reset()
                    self.uptimeLogger.info("POWER UP")
        
            sleep(self.observationInterval)
        

    def __setUpLoggers(self):
         # Create the Logger
        self.debugLogger = logging.getLogger(__name__."-DEBUG")
        self.debugLogger.setLevel(logging.DEBUG)

        self.uptimeLogger = logging.getLogger(__name__."-UPTIME")
        self.debugLogger.setLevel(logging.DEBUG)
    
        # Create the Handler for logging data to a file
        debug_logger_handler = RotatingFileHandler('./logs/debug.log', mode='a', maxBytes=5*1024*1024, backupCount=2, encoding=None, delay=0)
        debug_logger_handler.setLevel(logging.DEBUG)

        uptime_logger_handler = RotatingFileHandler('./logs/uptime.log', mode='a', maxBytes=5*1024*1024, backupCount=2, encoding=None, delay=0)
        uptime_logger_handler.setLevel(logging.DEBUG)
    
        # Create a Formatter for formatting the log messages
        logger_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    
        # Add the Formatter to the Handler
        debug_logger_handler.setFormatter(logger_formatter)
        uptime_logger_handler.setFormatter(logger_formatter)
    
        # Add the Handler to the Logger
        self.debugLogger.addHandler(debug_logger_handler)
        self.uptimeLogger.addHandler(uptime_logger_handler)

    def __readConf(self):
        config = configparser.ConfigParser()
        config.read("config.ini")

        self.inactiveTime = config["BASIC"].getint("inactiveTime")
        self.observationInterval = config["BASIC"].getint("checkInterval")
        self.debug = config["BASIC"].getint("debug")
        self.haltCommand = config["BASIC"]["haltCommand"]

        for protocol in ["TCP", "UDP"]:
            if protocol in config:
                self.observedPorts[protocol] = [val for key, val in config[protocol].items()]

        if "TCP/UDP" in config:
            for key, val in config['TCP/UDP'].items():
                if val not in self.observedPorts["TCP"]:
                    self.observedPorts["TCP"].append(val)
                if val not in self.observedPorts["UDP"]:
                    self.observedPorts["UDP"].append(val)


    def __getActivePorts(self):
        output = self.__runCommand("netstat -antu | grep ESTABLISHED").decode('UTF-8')
        return self.__processOutputPorts(output)

    def __runCommand(self, command):
        #out = subprocess.check_output(command.split())
        #print(out)
        ps = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
        out = ps.communicate()[0]
        return out
    
    def __processOutputPorts(self, output):
        """
        Connection Indexes:
        0 - tcp/udp
        1 - Recv-Q
        2 - Send-Q
        3 - Local Address (IP:PORT)
        4 - Foreign Address
        5 - State (LISTEN / ESTABLISHED / TIME_WAIT / CLOSE_WAIT)
        """
        ports = {"TCP":[], "UDP":[]}
        for line in str(output).splitlines():
            connection = line.split()
            ip = connection[3]

            ports[str(connection[0]).upper()].append(ip.split(":")[1])
        return ports

    def __examinePorts(self, activePorts):
        for protocol in activePorts.keys():
            for port in activePorts[protocol]:
                if port in self.observedPorts[protocol]:
                    if self.debug:
                        self.debugLogger.debug("Active port: {}".format(port))
                    return True
        return False

    def __printDebug(self):
        self.debugLogger.debug("Observed ports: {}".format(self.observedPorts))
        self.debugLogger.debug("Active ports: {}".format(self.__getActivePorts()))
        self.debugLogger.debug("Matched ports: {}".format(self.__examinePorts(self.__getActivePorts())))
        self.debugLogger.debug("Stopper time: {}".format(self.stopper.timeSpent))
        self.debugLogger.debug("\n\n ------------------------ \n\n")
示例#9
0
from support import move_file
from resource_finder import ResourceFinder
from orcid_finder import ORCIDFinder
from graphlib import ProvSet
from storer import Storer
import os
import traceback
from dataset_handler import DatasetHandler
from datetime import datetime
import re
import shutil

start_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
error = False
last_file = None
s = Stopper(reference_dir)
try:
    for cur_dir, cur_subdir, cur_files in os.walk(reference_dir):
        if s.can_proceed():
            for cur_file in sorted(cur_files):
                if s.can_proceed():
                    if cur_file.endswith(".json"):
                        cur_file_path = cur_dir + os.sep + cur_file
                        cur_local_dir_path = re.sub("^([0-9]+-[0-9]+-[0-9]+-[0-9]+).+$", "\\1", cur_file)
                        with open(cur_file_path) as fp:
                            last_file = cur_file_path
                            last_local_dir = cur_local_dir_path
                            print "\n\nProcess file '%s'\n" % cur_file_path
                            json_object = json.load(fp)
                            crp = CrossrefProcessor(base_iri, context_path, info_dir, json_object,
                                                    ResourceFinder(ts_url=triplestore_url),
示例#10
0
from support import move_file
from resource_finder import ResourceFinder
from orcid_finder import ORCIDFinder
from graphlib import ProvSet
from storer import Storer
import os
import traceback
from dataset_handler import DatasetHandler
from datetime import datetime
import re
import shutil

start_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
error = False
last_file = None
s = Stopper(reference_dir)
try:
    for cur_dir, cur_subdir, cur_files in os.walk(reference_dir):
        if s.can_proceed():
            for cur_file in sorted(cur_files):
                if s.can_proceed():
                    if cur_file.endswith(".json"):
                        cur_file_path = cur_dir + os.sep + cur_file
                        cur_local_dir_path = re.sub(
                            "^([0-9]+-[0-9]+-[0-9]+-[0-9]+).+$", "\\1",
                            cur_file)
                        with open(cur_file_path) as fp:
                            last_file = cur_file_path
                            last_local_dir = cur_local_dir_path
                            print "\n\nProcess file '%s'\n" % cur_file_path
                            json_object = json.load(fp)
示例#11
0
class Classifier:
    def __init__(self, name='Classifier', C=1, **kwargs):
        self.C = C
        self.stopper = Stopper(**kwargs)
        self.w = None
        self.n = None
        self.p = None
        self.X = None
        self.y = None
        self.name = name

    def fit(self, X, y):
        X = self._add_ones(X)
        y = y.reshape(-1, 1)
        self.n = y.shape[0]
        self.p = X.shape[1]
        self.w = np.zeros([self.p, 1])
        self.X = X
        self.y = y
        while not self.stopper.stop(self):
            # print(Metric.Acc.evaluate(self._predict(X), y))
            self._train_outer_iteration()
            # y_pred_proba = self._predict(X)
            # self.log_likelihood.append(self._log_likelihood(y, y_pred_proba))
        return self

    def predict(self, X):
        X = self._add_ones(X)
        return self._predict(X)

    def score(self, X, y_true, metric: Metric = Metric.Acc):
        y_pred = self.predict(X)
        return metric.evaluate(y_true, y_pred)

    def _train_outer_iteration(self):
        pass

    def _predict(self, X):
        """
        :param X: matrix with observations: n_observations x n_predictors
        :return: predictions as np.array n_observations x 1
        """
        res = np.sign(X @ self.w)
        res = res.reshape(-1)
        res[res == 0] = 1  # should be rare unless w==0
        return res

    def _add_ones(self, X):
        ones = np.ones([X.shape[0], 1])
        return np.concatenate([ones, X], axis=1)

    def L2_SVM_loss(self, w=None):
        if w is None:
            w = self.w
        loss = 0
        for i in range(self.n):
            loss += max(1 - self.y[i, 0] * self.X[i, :] @ w, 0)**2
        loss *= self.C
        return loss + w.T @ w / 2

    @staticmethod
    def _log_likelihood(y_true, y_pred_proba):
        return (np.log(y_pred_proba + 1e-6).T @ y_true +
                np.log(1 - y_pred_proba + 1e-6).T @ (1 - y_true))[0, 0]