def __init__(self, args): # create logger self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) # create console handler and set level to debug ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') # add formatter to ch ch.setFormatter(formatter) # add ch to logger self.logger.addHandler(ch) self.logger.debug("Starting Collector process in %s" % os.getcwd()) self.logger.debug("Gevent Version %s" % gevent.__version__) #TODO: move output file name to config #fname = "./NetFlow.%s.bin"%str(time.time()*100000) #WARN: might want to remove this after testing #self.out = open(fname,"wb") #create tool instances self.interface = Interface() self.parse = Parse() self.describe = Describe() self.standardize = Standardize() self.transform = Transform() self.partition = Partition() self.q = Queue() self.inWindow = False self.score = Score() #TODO: move csv name to config self.csv = CSV("output.csv") return super(Collector, self).__init__(args)
def __init__(self,args): # create logger self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) # create console handler and set level to debug ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # add formatter to ch ch.setFormatter(formatter) # add ch to logger self.logger.addHandler(ch) self.logger.debug( "Starting Collector process in %s"%os.getcwd()) #self.logger.debug( "Gevent Version %s"%gevent.__version__) #TODO: move output file name to config fname = "./NetFlow.%d.bin"%int(time.time()) #WARN: might want to remove this after testing self.out = open(fname,"wb") #create tool instances self.interface = Interface() self.parse = Parse() self.context = Context() self.describe = Describe() self.standardize = Standardize() self.transform = Transform() self.partition = Partition() self.q = Queue() self.inWindow = settings.SETTINGS.get("collector","inWindow") self.score = Score() #TODO: move csv name to config #self.csv = CSV("output.csv") self.output = Output() return super(Collector,self).__init__(args)
from loader import FileLoader from describe import Describe class Pair_Plot(): def pair_plot(self, data, desc): try: data.drop("Index", axis=1, inplace=True) sns.pairplot(data, hue="Hogwarts House", markers=".") plt.show() except Exception as e: print("Histogram failed : {}".format(e)) exit() if (__name__ == '__main__'): file = "datasets/dataset_train.csv" result = 0 if (len(sys.argv) < 3): if (len(sys.argv) == 2): file = sys.argv[1] loader = FileLoader() path = sys.path[0] + '/' + file data = loader.load(path) describer = Describe() result = describer.describe(data) pair_plotter = Pair_Plot() pair_plotter.pair_plot(data, result) else: print("There is too much arguments.")
class Collector(DatagramServer): x = 0 def __init__(self,args): # create logger self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) # create console handler and set level to debug ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # add formatter to ch ch.setFormatter(formatter) # add ch to logger self.logger.addHandler(ch) self.logger.debug( "Starting Collector process in %s"%os.getcwd()) #self.logger.debug( "Gevent Version %s"%gevent.__version__) #TODO: move output file name to config fname = "./NetFlow.%d.bin"%int(time.time()) #WARN: might want to remove this after testing self.out = open(fname,"wb") #create tool instances self.interface = Interface() self.parse = Parse() self.context = Context() self.describe = Describe() self.standardize = Standardize() self.transform = Transform() self.partition = Partition() self.q = Queue() self.inWindow = settings.SETTINGS.get("collector","inWindow") self.score = Score() #TODO: move csv name to config #self.csv = CSV("output.csv") self.output = Output() return super(Collector,self).__init__(args) def done(self): #pass self.out.close() #really important to call del on the csv obj to ensure it closes correctly #del self.csv def handle(self, rawData, address): Collector.x += 1 #print '%s %s: got %r' % (Collector.x, address[0], rawData) self.out.write(rawData) interfacedData = self.interface.run(rawData) #self.logger.debug("Interface: %s"%(repr(interfacedData))) #once the rawData is "interfaced" we are passing it around by reference # interfaced data must be iterable try: for record in interfacedData: self.parse.run(record) #self.logger.debug("Parse: %s"%(repr(record))) self.context.run(record) #self.logger.debug("Context: %s"%(repr(record))) self.describe.run(record) #self.logger.debug("Describe: %s"%(repr(record))) #push the record onto the queue until window if not (self.inWindow): self.q.put(record) #self.logger.debug("adding record to queue %s"%(repr(record))) if (self.q.qsize() == int(settings.SETTINGS.get("collector","describeWindow"))): #self.logger.debug("Describe Window of %s records met, Begin Processing queue"%settings.SETTINGS.get("collector","describeWindow")) self.inWindow = True while not self.q.empty(): item = self.q.get() #self.logger.debug("processing record from queue %s"%(repr(item))) self.standardize.run(item) self.transform.run(item) self.partition.run(item) #self.csv.writeRow(self.csv.format(item)) self.output.run(item) self.q.task_done() else: self.standardize.run(record) #self.logger.debug("Standardize: %s"%(repr(record))) self.transform.run(record) #self.logger.debug("Transform: %s"%(repr(record))) self.partition.run(record) #self.logger.debug("Partition: %s"%(repr(record))) #self.csv.writeRow(self.csv.format(record)) self.output.run(record) #self.score.run(record) except Exception as e: self.logger.error("Interfaced data is not iterable %s"%(str(e)))
class Collector(DatagramServer): x = 0 def __init__(self, args): # create logger self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) # create console handler and set level to debug ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') # add formatter to ch ch.setFormatter(formatter) # add ch to logger self.logger.addHandler(ch) self.logger.debug("Starting Collector process in %s" % os.getcwd()) self.logger.debug("Gevent Version %s" % gevent.__version__) #TODO: move output file name to config #fname = "./NetFlow.%s.bin"%str(time.time()*100000) #WARN: might want to remove this after testing #self.out = open(fname,"wb") #create tool instances self.interface = Interface() self.parse = Parse() self.describe = Describe() self.standardize = Standardize() self.transform = Transform() self.partition = Partition() self.q = Queue() self.inWindow = False self.score = Score() #TODO: move csv name to config self.csv = CSV("output.csv") return super(Collector, self).__init__(args) def done(self): #self.out.close() #really important to call del on the csv obj to ensure it closes correctly del self.csv def handle(self, rawData, address): Collector.x += 1 #print '%s %s: got %r' % (Collector.x, address[0], data) #self.out.write(rawData) interfacedData = self.interface.run(rawData) #once the rawData is "interfaced" we are passing it around by reference # interfaced data must be iterable try: for record in interfacedData: self.parse.run(record) self.describe.run(record) #push the record onto the queue until window if not (self.inWindow): self.q.put(record) #self.logger.debug("adding record to queue %s"%(repr(record))) if (self.q.qsize() == int( settings.SETTINGS.get("collector", "describeWindow"))): self.logger.debug( "Describe Window of %s records met, Begin Processing queue" % settings.SETTINGS.get("collector", "describeWindow")) self.inWindow = True while not self.q.empty(): item = self.q.get() #self.logger.debug("processing record from queue %s"%(repr(item))) self.standardize.run(item) self.transform.run(item) self.partition.run(item) self.csv.writeRow(self.csv.format(item)) self.q.task_done() else: self.standardize.run(record) self.transform.run(record) self.partition.run(record) self.csv.writeRow(self.csv.format(record)) self.score.run(record) except Exception as e: self.logger.error("Interfaced data is not iterable %s" % (str(e)))