示例#1
0
    def __init__(self, args):

        # create logger
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)

        # create console handler and set level to debug
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)

        # create formatter
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')

        # add formatter to ch
        ch.setFormatter(formatter)

        # add ch to logger
        self.logger.addHandler(ch)
        self.logger.debug("Starting Collector process in %s" % os.getcwd())
        self.logger.debug("Gevent Version %s" % gevent.__version__)

        #TODO: move output file name to config
        #fname = "./NetFlow.%s.bin"%str(time.time()*100000)

        #WARN: might want to remove this after testing
        #self.out = open(fname,"wb")

        #create tool instances
        self.interface = Interface()
        self.parse = Parse()
        self.describe = Describe()
        self.standardize = Standardize()
        self.transform = Transform()
        self.partition = Partition()

        self.q = Queue()
        self.inWindow = False

        self.score = Score()
        #TODO: move csv name to config
        self.csv = CSV("output.csv")

        return super(Collector, self).__init__(args)
示例#2
0
    def __init__(self,args):

        # create logger
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)
        
        # create console handler and set level to debug
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        
        # create formatter
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        
        # add formatter to ch
        ch.setFormatter(formatter)
        
        # add ch to logger
        self.logger.addHandler(ch)        
        self.logger.debug( "Starting Collector process in %s"%os.getcwd())
        #self.logger.debug( "Gevent Version %s"%gevent.__version__)
        
        #TODO: move output file name to config
        fname = "./NetFlow.%d.bin"%int(time.time())
        
        #WARN: might want to remove this after testing
        self.out = open(fname,"wb")
        
        #create tool instances
        self.interface = Interface()
        self.parse = Parse()
        self.context = Context()
        self.describe = Describe()
        self.standardize = Standardize()
        self.transform = Transform()
        self.partition = Partition()
        
        self.q = Queue()
        
        self.inWindow = settings.SETTINGS.get("collector","inWindow")
        
        self.score = Score()
        #TODO: move csv name to config
        #self.csv = CSV("output.csv")
        
        self.output = Output()
        
        return super(Collector,self).__init__(args)
示例#3
0
from loader import FileLoader
from describe import Describe


class Pair_Plot():
    def pair_plot(self, data, desc):
        try:
            data.drop("Index", axis=1, inplace=True)
            sns.pairplot(data, hue="Hogwarts House", markers=".")
            plt.show()
        except Exception as e:
            print("Histogram failed : {}".format(e))
            exit()


if (__name__ == '__main__'):
    file = "datasets/dataset_train.csv"
    result = 0
    if (len(sys.argv) < 3):
        if (len(sys.argv) == 2):
            file = sys.argv[1]
        loader = FileLoader()
        path = sys.path[0] + '/' + file
        data = loader.load(path)
        describer = Describe()
        result = describer.describe(data)
        pair_plotter = Pair_Plot()
        pair_plotter.pair_plot(data, result)
    else:
        print("There is too much arguments.")
示例#4
0
class Collector(DatagramServer):
    x = 0
    def __init__(self,args):

        # create logger
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)
        
        # create console handler and set level to debug
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        
        # create formatter
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        
        # add formatter to ch
        ch.setFormatter(formatter)
        
        # add ch to logger
        self.logger.addHandler(ch)        
        self.logger.debug( "Starting Collector process in %s"%os.getcwd())
        #self.logger.debug( "Gevent Version %s"%gevent.__version__)
        
        #TODO: move output file name to config
        fname = "./NetFlow.%d.bin"%int(time.time())
        
        #WARN: might want to remove this after testing
        self.out = open(fname,"wb")
        
        #create tool instances
        self.interface = Interface()
        self.parse = Parse()
        self.context = Context()
        self.describe = Describe()
        self.standardize = Standardize()
        self.transform = Transform()
        self.partition = Partition()
        
        self.q = Queue()
        
        self.inWindow = settings.SETTINGS.get("collector","inWindow")
        
        self.score = Score()
        #TODO: move csv name to config
        #self.csv = CSV("output.csv")
        
        self.output = Output()
        
        return super(Collector,self).__init__(args)
    
    def done(self):
        #pass
        self.out.close()
        #really important to call del on the csv obj to ensure it closes correctly
        #del self.csv
    
    def handle(self, rawData, address):
        Collector.x += 1
        #print '%s %s: got %r' % (Collector.x, address[0], rawData)  
        self.out.write(rawData)
        
        interfacedData = self.interface.run(rawData)
        #self.logger.debug("Interface: %s"%(repr(interfacedData)))
        #once the rawData is "interfaced" we are passing it around by reference
        # interfaced data must be iterable
        try:
            for record in interfacedData:
                self.parse.run(record)
                #self.logger.debug("Parse: %s"%(repr(record)))
                self.context.run(record)
                #self.logger.debug("Context: %s"%(repr(record)))
                self.describe.run(record)
                #self.logger.debug("Describe: %s"%(repr(record)))
                #push the record onto the queue until window 
                if not (self.inWindow):
                    self.q.put(record)
                    #self.logger.debug("adding record to queue %s"%(repr(record)))
                    if (self.q.qsize() == int(settings.SETTINGS.get("collector","describeWindow"))):
                        #self.logger.debug("Describe Window of %s records met, Begin Processing queue"%settings.SETTINGS.get("collector","describeWindow"))
                        self.inWindow = True
                        
                        while not self.q.empty():
                            item = self.q.get()
                            #self.logger.debug("processing record from queue %s"%(repr(item)))
                            self.standardize.run(item)
                            self.transform.run(item)
                            self.partition.run(item)
                            #self.csv.writeRow(self.csv.format(item))
                            self.output.run(item)
                            self.q.task_done()
                else:
                    self.standardize.run(record)
                    #self.logger.debug("Standardize: %s"%(repr(record)))
                    self.transform.run(record)
                    #self.logger.debug("Transform: %s"%(repr(record)))
                    self.partition.run(record)
                    #self.logger.debug("Partition: %s"%(repr(record)))
                    #self.csv.writeRow(self.csv.format(record))
                    self.output.run(record)
                    
                    #self.score.run(record)
                    
        except Exception as e:
            self.logger.error("Interfaced data is not iterable %s"%(str(e)))
示例#5
0
class Collector(DatagramServer):
    x = 0

    def __init__(self, args):

        # create logger
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)

        # create console handler and set level to debug
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)

        # create formatter
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')

        # add formatter to ch
        ch.setFormatter(formatter)

        # add ch to logger
        self.logger.addHandler(ch)
        self.logger.debug("Starting Collector process in %s" % os.getcwd())
        self.logger.debug("Gevent Version %s" % gevent.__version__)

        #TODO: move output file name to config
        #fname = "./NetFlow.%s.bin"%str(time.time()*100000)

        #WARN: might want to remove this after testing
        #self.out = open(fname,"wb")

        #create tool instances
        self.interface = Interface()
        self.parse = Parse()
        self.describe = Describe()
        self.standardize = Standardize()
        self.transform = Transform()
        self.partition = Partition()

        self.q = Queue()
        self.inWindow = False

        self.score = Score()
        #TODO: move csv name to config
        self.csv = CSV("output.csv")

        return super(Collector, self).__init__(args)

    def done(self):
        #self.out.close()
        #really important to call del on the csv obj to ensure it closes correctly
        del self.csv

    def handle(self, rawData, address):
        Collector.x += 1
        #print '%s %s: got %r' % (Collector.x, address[0], data)
        #self.out.write(rawData)

        interfacedData = self.interface.run(rawData)
        #once the rawData is "interfaced" we are passing it around by reference
        # interfaced data must be iterable
        try:
            for record in interfacedData:
                self.parse.run(record)
                self.describe.run(record)
                #push the record onto the queue until window
                if not (self.inWindow):
                    self.q.put(record)
                    #self.logger.debug("adding record to queue %s"%(repr(record)))
                    if (self.q.qsize() == int(
                            settings.SETTINGS.get("collector",
                                                  "describeWindow"))):
                        self.logger.debug(
                            "Describe Window of %s records met, Begin Processing queue"
                            % settings.SETTINGS.get("collector",
                                                    "describeWindow"))
                        self.inWindow = True

                        while not self.q.empty():
                            item = self.q.get()
                            #self.logger.debug("processing record from queue %s"%(repr(item)))
                            self.standardize.run(item)
                            self.transform.run(item)
                            self.partition.run(item)
                            self.csv.writeRow(self.csv.format(item))
                            self.q.task_done()
                else:
                    self.standardize.run(record)
                    self.transform.run(record)
                    self.partition.run(record)
                    self.csv.writeRow(self.csv.format(record))

                    self.score.run(record)

        except Exception as e:
            self.logger.error("Interfaced data is not iterable %s" % (str(e)))