def __init__(self, name='Classifier', C=1, **kwargs): self.C = C self.stopper = Stopper(**kwargs) self.w = None self.n = None self.p = None self.X = None self.y = None self.name = name
def __init__(self, rootPath="", inputFolder=""): self.metadata = Metadata() self.stopper = Stopper() stopwords_folder = os.path.join(rootPath, "stopwords") print("Preprocessor root path: ", rootPath) self.stopper.load_stopwords(stopwords_folder) self.normalizer_tokenizer = NormalizationTokenization() self.stemmer = Stemmer() self.p1_path = "" self.p2_path = "" self.p3_path = "" self.rootPath = rootPath self.inputFolder = inputFolder
def __init__(self, pidfile): super().__init__(pidfile) self.observedPorts = {"TCP": [], "UDP": []} self.observationInterval = None self.inactiveTime = None self.haltCommand = None self.debug = None self.debugLogger = None self.uptimeLogger = None self.__setUpLoggers() self.__readConf() self.stopper = Stopper()
#!/usr/bin/python import rospy, sys from stopper import Stopper if __name__ == "__main__": rospy.init_node("stopper_node", argv=sys.argv) forward_speed = 0.5 rotate_speed = 0.5 if rospy.has_param('~forward_speed'): forward_speed = rospy.get_param('~forward_speed') if rospy.has_param('~angular_speed'): rotate_speed = rospy.get_param('~angular_speed') if rospy.has_param('~end_point_x'): end_point_x = rospy.get_param('~end_point_x') if rospy.has_param('~end_point_y'): end_point_y = rospy.get_param('~end_point_y') my_stopper = Stopper(forward_speed, rotate_speed, end_point_x, end_point_y) my_stopper.start_moving()
import logging from stopper import Stopper logger = logging.getLogger() logger.setLevel(logging.INFO) # logger.setLevel(logging.DEBUG) stopper = Stopper() # This is the method that will be registered # with Lambda and run on a schedule def handler(event={}, context={}): if 'ping' in event: logger.info('pong') return {'message': 'pong'} stopper.run() # If being called locally, just call handler if __name__ == '__main__': import os import json import sys logging.basicConfig() event = {} # TODO if argv[1], read contents, parse into json if len(sys.argv) > 1: input_file = sys.argv[1] with open(input_file, 'r') as f:
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS # SOFTWARE. __author__ = 'essepuntato' from conf_bee import * from stopper import Stopper import traceback from datetime import datetime from epmc_processor import EuropeanPubMedCentralProcessor import os start_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') exception_string = None try: epmc = EuropeanPubMedCentralProcessor( stored_file, reference_dir, error_dir, pagination_file, Stopper(reference_dir), p_size=page_size) epmc.process(True) except Exception as e: exception_string = str(e) + " " + traceback.format_exc().rstrip("\n+") end_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') if exception_string is not None: print exception_string if not os.path.exists(error_dir): os.makedirs(error_dir) with open(error_dir + end_time.replace(":", "-") + ".err", "w") as f: f.write(exception_string) print "\nStarted at:\t%s\nEnded at:\t%s" % (start_time, end_time)
class Preprocessor: def __init__(self, rootPath="", inputFolder=""): self.metadata = Metadata() self.stopper = Stopper() stopwords_folder = os.path.join(rootPath, "stopwords") print("Preprocessor root path: ", rootPath) self.stopper.load_stopwords(stopwords_folder) self.normalizer_tokenizer = NormalizationTokenization() self.stemmer = Stemmer() self.p1_path = "" self.p2_path = "" self.p3_path = "" self.rootPath = rootPath self.inputFolder = inputFolder def prepare_output_folders(self): self.p1_path = os.path.join(self.rootPath, outputFolder, practice_1_output_folder) self.p2_path = os.path.join(self.rootPath, outputFolder, practice_2_output_folder) self.p3_path = os.path.join(self.rootPath, outputFolder, practice_3_output_folder) if not os.path.exists(self.p1_path): os.makedirs(self.p1_path) if not os.path.exists(self.p2_path): os.makedirs(self.p2_path) if not os.path.exists(self.p3_path): os.makedirs(self.p3_path) def preprocess_text(self, text): ''' This method does the preprocessing of the given text and returns the list of the processed tokens. ''' token_list = self.normalizer_tokenizer.process_text(text) tokens_without_stopwords = self.stopper.remove_stopwords(token_list) tokens_stems_only = self.stemmer.get_stems(tokens_without_stopwords) return tokens_stems_only def preprocess(self, generate_metadata=False, generate_output_files=False): ''' This method does all the preprocessing of all the files in the system ''' self.prepare_output_folders() start_time = timer() inputPath = os.path.join(self.rootPath, self.inputFolder) for file in os.listdir(inputPath): fileName = os.fsdecode(file) ### <Practice 1> htmlFilter = HtmlFilter(inputPath, fileName) text = htmlFilter.filter_html() token_list = self.normalizer_tokenizer.process_text(text) txtFileName = fileName.replace('.html', '.txt') if generate_output_files: full_path = os.path.join(self.p1_path, txtFileName) self.write_string_list_to_file(full_path, token_list) ### </Practice 1> ### <Practice 2> tokens_without_stopwords = self.stopper.remove_stopwords( token_list) if generate_output_files: full_path = os.path.join(self.p2_path, txtFileName) self.write_string_list_to_file(full_path, tokens_without_stopwords) ### </Practice 2> ### <Practice 3> tokens_stems_only = self.stemmer.get_stems( tokens_without_stopwords) full_path = os.path.join(self.p3_path, txtFileName) self.write_string_list_to_file(full_path, tokens_stems_only) ### </Practice 3> if (generate_metadata): self.metadata.practice1_metadata(token_list) self.metadata.num_of_files += 1 self.metadata.practice2_metadata(tokens_without_stopwords) self.metadata.practice3_metadata(tokens_stems_only) if (generate_metadata): self.metadata.final_metadata() self.metadata.avg_tokens_per_file = self.metadata.num_of_tokens / self.metadata.num_of_files self.metadata.avg_num_of_words_per_file = self.metadata.num_of_words_after_removing_stopwords / self.metadata.num_of_files self.metadata.avg_words_stemming = self.metadata.num_of_words_after_stemming / self.metadata.num_of_files end_time = timer() self.metadata.time_needed = str(end_time - start_time) self.metadata.print_metadata() def write_string_list_to_file(self, path, string_list): with open(path, 'w+') as file: file.write('\n'.join(string_list))
class PowerPy(daemon): def __init__(self, pidfile): super().__init__(pidfile) self.observedPorts = {"TCP": [], "UDP": []} self.observationInterval = None self.inactiveTime = None self.haltCommand = None self.debug = None self.debugLogger = None self.uptimeLogger = None self.__setUpLoggers() self.__readConf() self.stopper = Stopper() def run(self): while True: if self.debug: self.__printDebug() isConnectionActive = self.__examinePorts(self.__getActivePorts()) if isConnectionActive: self.stopper.reset() else: self.stopper.progress() if self.stopper.timeSpent > self.inactiveTime: self.uptimeLogger.info("POWER DOWN") self.__runCommand(self.haltCommand) self.stopper.reset() self.uptimeLogger.info("POWER UP") sleep(self.observationInterval) def __setUpLoggers(self): # Create the Logger self.debugLogger = logging.getLogger(__name__."-DEBUG") self.debugLogger.setLevel(logging.DEBUG) self.uptimeLogger = logging.getLogger(__name__."-UPTIME") self.debugLogger.setLevel(logging.DEBUG) # Create the Handler for logging data to a file debug_logger_handler = RotatingFileHandler('./logs/debug.log', mode='a', maxBytes=5*1024*1024, backupCount=2, encoding=None, delay=0) debug_logger_handler.setLevel(logging.DEBUG) uptime_logger_handler = RotatingFileHandler('./logs/uptime.log', mode='a', maxBytes=5*1024*1024, backupCount=2, encoding=None, delay=0) uptime_logger_handler.setLevel(logging.DEBUG) # Create a Formatter for formatting the log messages logger_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # Add the Formatter to the Handler debug_logger_handler.setFormatter(logger_formatter) uptime_logger_handler.setFormatter(logger_formatter) # Add the Handler to the Logger self.debugLogger.addHandler(debug_logger_handler) self.uptimeLogger.addHandler(uptime_logger_handler) def __readConf(self): config = configparser.ConfigParser() config.read("config.ini") self.inactiveTime = config["BASIC"].getint("inactiveTime") self.observationInterval = config["BASIC"].getint("checkInterval") self.debug = config["BASIC"].getint("debug") self.haltCommand = config["BASIC"]["haltCommand"] for protocol in ["TCP", "UDP"]: if protocol in config: self.observedPorts[protocol] = [val for key, val in config[protocol].items()] if "TCP/UDP" in config: for key, val in config['TCP/UDP'].items(): if val not in self.observedPorts["TCP"]: self.observedPorts["TCP"].append(val) if val not in self.observedPorts["UDP"]: self.observedPorts["UDP"].append(val) def __getActivePorts(self): output = self.__runCommand("netstat -antu | grep ESTABLISHED").decode('UTF-8') return self.__processOutputPorts(output) def __runCommand(self, command): #out = subprocess.check_output(command.split()) #print(out) ps = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT) out = ps.communicate()[0] return out def __processOutputPorts(self, output): """ Connection Indexes: 0 - tcp/udp 1 - Recv-Q 2 - Send-Q 3 - Local Address (IP:PORT) 4 - Foreign Address 5 - State (LISTEN / ESTABLISHED / TIME_WAIT / CLOSE_WAIT) """ ports = {"TCP":[], "UDP":[]} for line in str(output).splitlines(): connection = line.split() ip = connection[3] ports[str(connection[0]).upper()].append(ip.split(":")[1]) return ports def __examinePorts(self, activePorts): for protocol in activePorts.keys(): for port in activePorts[protocol]: if port in self.observedPorts[protocol]: if self.debug: self.debugLogger.debug("Active port: {}".format(port)) return True return False def __printDebug(self): self.debugLogger.debug("Observed ports: {}".format(self.observedPorts)) self.debugLogger.debug("Active ports: {}".format(self.__getActivePorts())) self.debugLogger.debug("Matched ports: {}".format(self.__examinePorts(self.__getActivePorts()))) self.debugLogger.debug("Stopper time: {}".format(self.stopper.timeSpent)) self.debugLogger.debug("\n\n ------------------------ \n\n")
from support import move_file from resource_finder import ResourceFinder from orcid_finder import ORCIDFinder from graphlib import ProvSet from storer import Storer import os import traceback from dataset_handler import DatasetHandler from datetime import datetime import re import shutil start_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') error = False last_file = None s = Stopper(reference_dir) try: for cur_dir, cur_subdir, cur_files in os.walk(reference_dir): if s.can_proceed(): for cur_file in sorted(cur_files): if s.can_proceed(): if cur_file.endswith(".json"): cur_file_path = cur_dir + os.sep + cur_file cur_local_dir_path = re.sub("^([0-9]+-[0-9]+-[0-9]+-[0-9]+).+$", "\\1", cur_file) with open(cur_file_path) as fp: last_file = cur_file_path last_local_dir = cur_local_dir_path print "\n\nProcess file '%s'\n" % cur_file_path json_object = json.load(fp) crp = CrossrefProcessor(base_iri, context_path, info_dir, json_object, ResourceFinder(ts_url=triplestore_url),
from support import move_file from resource_finder import ResourceFinder from orcid_finder import ORCIDFinder from graphlib import ProvSet from storer import Storer import os import traceback from dataset_handler import DatasetHandler from datetime import datetime import re import shutil start_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') error = False last_file = None s = Stopper(reference_dir) try: for cur_dir, cur_subdir, cur_files in os.walk(reference_dir): if s.can_proceed(): for cur_file in sorted(cur_files): if s.can_proceed(): if cur_file.endswith(".json"): cur_file_path = cur_dir + os.sep + cur_file cur_local_dir_path = re.sub( "^([0-9]+-[0-9]+-[0-9]+-[0-9]+).+$", "\\1", cur_file) with open(cur_file_path) as fp: last_file = cur_file_path last_local_dir = cur_local_dir_path print "\n\nProcess file '%s'\n" % cur_file_path json_object = json.load(fp)
class Classifier: def __init__(self, name='Classifier', C=1, **kwargs): self.C = C self.stopper = Stopper(**kwargs) self.w = None self.n = None self.p = None self.X = None self.y = None self.name = name def fit(self, X, y): X = self._add_ones(X) y = y.reshape(-1, 1) self.n = y.shape[0] self.p = X.shape[1] self.w = np.zeros([self.p, 1]) self.X = X self.y = y while not self.stopper.stop(self): # print(Metric.Acc.evaluate(self._predict(X), y)) self._train_outer_iteration() # y_pred_proba = self._predict(X) # self.log_likelihood.append(self._log_likelihood(y, y_pred_proba)) return self def predict(self, X): X = self._add_ones(X) return self._predict(X) def score(self, X, y_true, metric: Metric = Metric.Acc): y_pred = self.predict(X) return metric.evaluate(y_true, y_pred) def _train_outer_iteration(self): pass def _predict(self, X): """ :param X: matrix with observations: n_observations x n_predictors :return: predictions as np.array n_observations x 1 """ res = np.sign(X @ self.w) res = res.reshape(-1) res[res == 0] = 1 # should be rare unless w==0 return res def _add_ones(self, X): ones = np.ones([X.shape[0], 1]) return np.concatenate([ones, X], axis=1) def L2_SVM_loss(self, w=None): if w is None: w = self.w loss = 0 for i in range(self.n): loss += max(1 - self.y[i, 0] * self.X[i, :] @ w, 0)**2 loss *= self.C return loss + w.T @ w / 2 @staticmethod def _log_likelihood(y_true, y_pred_proba): return (np.log(y_pred_proba + 1e-6).T @ y_true + np.log(1 - y_pred_proba + 1e-6).T @ (1 - y_true))[0, 0]