Exemplo n.º 1
0
 def __init__(self):
     # TODO: add common attributes among data sources
     self._files_generated = {}
     self._type = self.TYPE_L  # Local source by default
     # Configuration
     self.config = Configure()
     # Get root path for creating data files
     self.rootDataPath = self.config.get_config(
     )['GeneralParams']['rootPath']
Exemplo n.º 2
0
    def run(self):

        logging.info("Running Source Master Manager ...")

        method_name = "run()"

        # Get configuration
        config = Configure()
        timer = config.get_config()['GeneralParams']['dataSourcesScheduling']
        timeout = config.get_config(
        )['GeneralParams']['dataSourcesNotReadyWaitingTime']

        try:
            # Monitoring interval counter
            c_interval = 1

            # Doing until stop request
            while not self._stopped_event.isSet():

                # init of the monitoring interval
                t_init_interval = datetime.now()

                # end of the monitoring interval
                t_end_interval = t_init_interval + timedelta(seconds=timer)

                # max time for waiting a source
                t_max_interval = t_end_interval + timedelta(seconds=timeout)

                # ts associated to the current monitoring interval
                ts = dateutils.get_timestamp()

                # Start a thread to manage the sources ready for the current monitoring interval
                intervalSourceMonitoringThread = IntervalMonitoringSourceManagerThread(
                    self._sourceManager_instance, t_init_interval,
                    t_end_interval, t_max_interval, ts)
                intervalSourceMonitoringThread.setName("IntervalThread_" +
                                                       str(c_interval))
                intervalSourceMonitoringThread.start()

                # Wait for the end of the interval
                logging.debug("Waiting for the next interval ...")
                sleep(timer)

                # Monitoring interval counter
                c_interval = c_interval + 1

        except Exception as detail:
            logging.error(
                "Error in processing the data sources. Type: %s, msg: %s",
                sys.exc_info()[0], detail)
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type,
                                      exc_value,
                                      exc_traceback,
                                      limit=5,
                                      file=sys.stdout)
            raise DataSourceError(self, detail, method_name)
Exemplo n.º 3
0
    def send_msg_to_server(self, pack):
        """
        Sending packet to the server
        
        Parameters
        ----------
        packet: Packet
            The packet to send
        
        Return
        ------
        client: socket
            client socket
            
        Raise
        -----
        CommError
        
        """

        method_name = "send_msg_to_server()"

        # load config
        config = Configure()
        conn_timeout = config.get_config(
        )['GeneralParams']['serverConnectionTimeout']

        logging.info("Sending packet %s to server %s", pack._header['id'],
                     self._server_address)

        # Serialize the packet
        p_serialized = pickle.dumps(pack, 2)

        # IP and port of the server
        ip, port = self._server_address

        try:
            # Connect to the server
            client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            client.settimeout(conn_timeout)
            client.connect((ip, port))
            logging.debug("Connected to server %s", self._server_address)
        except socket.error as se:
            logging.error("Error creating a new connection to %s",
                          self._server_address)
            client.close()
            raise CommError(se, sys.exc_info()[1], method_name)

        # Send msg
        client.send(p_serialized)

        logging.info("Packet %s has been sent to server %s",
                     pack._header['id'], self._server_address)

        return client
Exemplo n.º 4
0
def get_timestamp():
    """
    Gets the current time within a specific format found in ``config/sensor.yaml``
        
    Return
    ------
    ts:
        Current time stamp with specific format as shown in the configuration file (sensor.yaml)
    """
    config = Configure()
    tstsDateFormat = config.get_config()['GeneralParams']['tsDateFormat']

    return datetime.strftime(datetime.now(), tstsDateFormat)
Exemplo n.º 5
0
    def get_number_source_variables(self, source, source_name):
        # TODO: get this parameter from the flow parser sources configuration
        config = Configure()

        vars_number = 0
        if source._type == Source.TYPE_L:
            vars_number = len(
                config.get_config()['DataSources'][source._type][source_name]
                ['parserContents'][Source.S_VARIABLES])
        else:
            vars_number = config.get_config()['DataSources'][
                source._type][source_name]['parserVariables']

        return vars_number
Exemplo n.º 6
0
    def get_synchronized_file(self, df, ts_master_source, sampling_rate):

        config = Configure()
        dateFormat = config.get_config()['GeneralParams']['dateFormat']
        dateFormatNfcapd = config.get_config(
        )['GeneralParams']['dateFormatNfcapdFiles']

        # Format date according the index in the dataframe. Upper timestamp to synchronize
        ts = datetime.strptime(ts_master_source, dateFormatNfcapd)
        end_ts = ts.strftime(dateFormat)

        # Lower timestamp to synchronize: upper timesatemp - samplinr_rate (in minutes)
        init_ts = ts.replace(minute=ts.minute - sampling_rate)
        init_ts = init_ts.strftime(dateFormat)

        # Get the dataframe rows according to the ts_master_source
        return df[str(init_ts):str(end_ts)]
Exemplo n.º 7
0
    def do_dynamic_calibration(self, **kwargs):
        """
        Starting point for the dynamic calibration procedure

        Raises
        ------
        SensorError, MSNMError

        """

        method_name = "do_dynamic_calibration()"

        logging.info("Doing dynamic calibration ...")

        # Time stamp
        ts = dateutils.get_timestamp()

        # Get configuration
        config = Configure()
        model_backup_path = config.get_config()['Sensor']['model']
        model_backup_file = model_backup_path + "model_" + ts + ".json"

        try:
            # Model calibration init
            self._model.calibrate_dynamically(self._data, **kwargs)

            # Get the JSON of the model
            logging.debug("Saving the current model")
            json_model = datautils.model2json(self._model, ts)

            # Save the model
            datautils.save2json(json_model, model_backup_file)

        except ModelError as me:
            logging.error("Error doing dynamic calibration: %s", me.get_msg())
            raise SensorError(self, me.get_msg(), method_name)
        except MSNMError as emsnm:
            logging.error("Error doing dynamic calibration: %s",
                          emsnm.get_msg())
            raise emsnm

        logging.info("End of doing dynamic calibration...")
Exemplo n.º 8
0
def getAllVarNames():

    """
    Concatenates all variable names from all data source

    """

    # Config params
    config = Configure()

    local_sources_vars_names = []
    remote_sources_vars_names = []

    try:
        local_sources = config.get_config()['DataSources']['local']

        # Get local sources var names
        for i_local in local_sources:
            local_sources_vars_names.extend(getVarNamesFromSource(local_sources[i_local]['parserContents']))

    except KeyError as ke:
            logging.warning("There are no local sources configured: %s", ke)

    try:
        remote_sources = config.get_config()['DataSources']['remote']

        # Get remote sources var names
        for i_remote in remote_sources:
            # Right now every remote sources has only two variables Q and D
            # TODO: to extend for more flexibility like in local sources
            remote_sources_vars_names.append('Q_'+ i_remote)
            remote_sources_vars_names.append('D_'+ i_remote)

    except KeyError as ke:
            logging.warning("There are no remote sources configured: %s", ke)

    all_vars = local_sources_vars_names + remote_sources_vars_names
    logging.debug("%s variables found",len(all_vars))

    return all_vars
Exemplo n.º 9
0
    def do_calibration(self, **kwargs):
        """
        Starting point of the calibration procedure

        Raises
        ------
        SensorError, MSNMError

        """

        method_name = "do_calibration()"

        # Time stamp
        ts = dateutils.get_timestamp()

        # Get configuration
        config = Configure()
        # Get root path for creating data files
        rootDataPath = config.get_config()['GeneralParams']['rootPath']
        model_backup_path = config.get_config()['Sensor']['model']

        model_backup_file = rootDataPath + model_backup_path + "model_" + ts + ".json"

        try:
            # Model calibration init
            self._model.calibrate(self._data, **kwargs)

            # Get the JSON of the model
            json_model = datautils.model2json(self._model, ts)

            # Save the model
            datautils.save2json(json_model, model_backup_file)

        except ModelError as eme:
            raise SensorError(self, eme.msg, method_name)
        except MSNMError as emsnm:
            raise emsnm
Exemplo n.º 10
0
def get_generated_file(ts, type):
    '''
    This method is initially devised to get any of the files generated by the sensor under /data folder
    
    TODO: To be tested 
    
    '''

    # Config params
    config = Configure()
    obs_generated_path = config.get_config()['Sensor'][
        type]  # path where we searching for the file

    file_found = None

    for file_name in os.listdir(obs_generated_path):
        if os.path.isfile(os.path.join(obs_generated_path, file)):
            file_ts = file_name.split('.')[0][3:]
            if file_ts == ts:
                file_found = file_name
                break
    if not file_found: logging.warn("The file with ts %s was not found :(", ts)

    return os.path.join(obs_generated_path, file_found)
Exemplo n.º 11
0
    def send_response(self, request, msg):
        """
        Sending the response to the client
        
        Parameters
        ----------
        request:
            client request
        msg: str
            response msg
            
        """

        # Sensor ID. The current sensor ID that sends the response
        config = Configure()
        sid = config.get_config()['Sensor']['sid']

        # sent timestamp
        ts = dateutils.get_timestamp()

        # build a response message
        pack_resp = ResponsePacket()
        pack_resp.fill_header({
            'id': self._packet_sent,
            'sid': sid,
            'ts': ts,
            'type': Packet.TYPE_R
        })
        pack_resp.fill_body({'resp': msg})

        # Response packet
        p_serialized = pickle.dumps(pack_resp, 2)
        request.send(p_serialized)

        # increment the number of packets sent
        self._packet_sent = self._packet_sent + 1
Exemplo n.º 12
0
 def run(self):
     # Get configuration
     config = Configure()
     
     # Load local data sources running in static Mode
     local_dict = {}        
     try:
         src_local = config.get_config()['DataSources']['local']
             
         logging.debug("Loading %s local sources %s.",len(src_local),src_local.keys())
             
         for i in src_local.keys():
             
             if src_local[i]['staticMode']:
                 
                 logging.debug("Local source %s is running in static mode.",i)                        
                 local_dict[i] = src_local[i]
                 
     except KeyError as ke:
         logging.warning("There are no local sources configured: %s", ke)
     
     
     #Test observations per source
     obsBySource = {}
     
     # Get external files
     for i in local_dict.keys():
         if i == 'Netflow':
             # Get all parsed *.dat files from a specific folder
             staticFiles = local_dict[i]['staticParsedFilesPath']
             
             # Get the name of the files ordered
             filesOrdered = np.sort(os.listdir(staticFiles))
             
             logging.debug("Got %s files from source %s ",len(filesOrdered),i)
             
             # Remove auxiliar files weights.dat and stats.log
             filesOrdered = filesOrdered[np.logical_not('stats.log' == filesOrdered)]
             filesOrdered = filesOrdered[np.logical_not('weights.dat' == filesOrdered)]
             
             logging.debug("Removed unuseless files from source %s. Total files to process: %s ",i,len(filesOrdered))
             
             # Generate a dataframe containing all *.dat (all observations)
             # Date range as index
             dstart = filesOrdered[0][7:][0:-4]# get initial timestamp from the first file name, e.g., output-20160209t1249.dat            
             dend = filesOrdered[-1][7:][0:-4]# get ending timestamp from the first file name, e.g., output-20160209t1249.dat
             d_range = pd.date_range(dstart,dend,freq='1min')
             dfAllObs = pd.DataFrame(filesOrdered,d_range,columns=['obs'])
             
             logging.debug("Got all obs from %s to %s",dstart,dend)
             
             # Get the test date range
             date_range_start = local_dict[i]['staticObsRangeStart']
             date_range_end = local_dict[i]['staticObsRangeEnd']
             obsBySource[i] = dfAllObs[date_range_start:date_range_end]
                         
             logging.debug("%s observations filtered from %s to %s",len(obsBySource[i]),date_range_start,date_range_end)
             
         else:
             logging.debug("TODO: managing %s local sources",i)
             
     
     # dataSourcesScheduling
     schedulingTimer = config.get_config()['GeneralParams']['dataSourcesScheduling']
     nfcapdTimeFormat = config.get_config()['GeneralParams']['dateFormatNfcapdFiles']
     netflow_parsed_folder = config.get_config()['DataSources']['local']['Netflow']['parsed']
     netflow_dat_manual_folder = config.get_config()['DataSources']['local']['Netflow']['staticParsedFilesPath']
     netflow_captures = config.get_config()['DataSources']['local']['Netflow']['captures']
 
     
     # flag to finish the simulation
     end = False
     
     # Observations counter
     obsCounter = 0
     
     # list of observations
     obsList = list(obsBySource['Netflow']['obs'])
     
     while not end and not self._stopped_event.isSet():
         
         try:
             # Process the observation
             obsToProcess = obsList[obsCounter]
             
             logging.debug("Observation to process: %s",obsToProcess)
             
             # ts from obs file
             tsFile = obsToProcess[7:][0:-4]
             
             # ts from the current host
             ts = dateutils.get_timestamp()
             
             # TODO ts for nfcap file 
             tsdatetime = parser.parse(ts)
             tsdatetimeFile = parser.parse(tsFile)
             tsFormatted = tsdatetime.strftime(nfcapdTimeFormat)
             tsFormattedFile = tsdatetimeFile.strftime(nfcapdTimeFormat)
             
             logging.debug("Creating nfcapd.current.%s", tsFormatted)               
                         
             # Generate nfcapd synthetic current filefile with equal timestamp as the observation
             nfcapdCurrent = netflow_captures + os.sep + "nfcapd.current." + tsFormatted            
             with open(nfcapdCurrent,'w') as f:
                 f.write("Dummy nfcapd for static mode current ")
             
             # Move current nfcapd file to emulate on_moved event in netflow source
             #In static mode the emulated nfcapd files has the name like  nfcapd.201701302000_2016001182312, where last ts after '_' is the ts of the statid *.dat
             # file                 
             nfcapdDummy = netflow_captures + os.sep + "nfcapd." + tsFormatted  + "_" + tsFormattedFile
             logging.debug("Renaming %s to %s",nfcapdCurrent,nfcapdDummy)
             shutil.move(nfcapdCurrent, nfcapdDummy)
             
             # Copy *.dat file to 'parsed' folder
             netflow_dat_manual_file = netflow_dat_manual_folder + obsToProcess
             netflow_parsed_file = netflow_parsed_folder + "output-netflow_" + tsFormattedFile + ".dat"
             logging.debug("Copying netflow manually generated file %s to %s ",netflow_dat_manual_file, netflow_parsed_file)
             shutil.copyfile(netflow_dat_manual_file, netflow_parsed_file)                
         
             logging.debug("Waiting for %s s ...",schedulingTimer)
             sleep(schedulingTimer)
             obsCounter = obsCounter + 1
             
             
         except KeyboardInterrupt:
             logging.info("KeyboardInterrupt received. Exiting ...")
             end = True
         except Exception:
             logging.warning("Probably we have reached the end of the observations. ERROR: %s", sys.exc_info()[1])
             end = True
Exemplo n.º 13
0
    def launch_monitoring(self, ts):
        """
        Once the parsing (flow parser) procedure is done, this method is in charge of to start the monitoring
        process

        Raises
        ------
        MSNMError

        """

        method_name = "launch_monitoring()"

        # Configuration
        config = Configure()
        # Get root path for creating data files
        rootDataPath = config.get_config()['GeneralParams']['rootPath']

        obs_generated_path = rootDataPath + config.get_config()['Sensor'][
            'observation']  # path to save the complete observation joining all data sources
        batch_obs = config.get_config()['Sensor']['dynamiCalibration'][
            'B']  # number of observation in a batch for EWMA calibration
        lambda_param = config.get_config()['Sensor']['dynamiCalibration'][
            'lambda']  # fogetting parameter for EWMA calibration
        dyn_cal_enabled = config.get_config()['Sensor']['dynamiCalibration'][
            'enabled']  # is the dynamic calibration activated?
        output_generated_path = rootDataPath + config.get_config()['Sensor'][
            'output']  # path to save the Q and T statistics obtained from the previous observation
        missingDataMethods = config.get_config()['Sensor']['missingData'][
            'missingDataMethods']  # Missing data available methods
        missingDataSelectedMethod = config.get_config()['Sensor'][
            'missingData']['selected']  # Get the selected missing data method
        missingDataModule = config.get_config()['Sensor']['missingData'][
            'missingDataModule']  # Missing data available methods
        valuesFormat = config.get_config()['GeneralParams'][
            'valuesFormat']  # how the variables of the complete observation are saved

        logging.debug("Launch monitoring for %s ", ts)

        try:
            logging.debug("Building the observation at %s for %s sources.", ts,
                          self._sources.keys())
            # Build the observation for monitoring
            test = []
            for i in self._sources.keys():
                # Get the number of variables of source i
                i_variables = self.get_number_source_variables(
                    self._sources[i], i)
                logging.debug("Source %s has %s variables.", i, i_variables)
                # Get the source output parsed file for the current
                i_parsed_file = self._sources[i]._files_generated[ts]
                logging.debug("File generated of source %s at %s: %s", i, ts,
                              i_parsed_file)

                if i_parsed_file:
                    # Load the file
                    if self._sources[i]._type == Source.TYPE_L:

                        # static mode?
                        # TODO: next version
                        #staticMode = config.get_config()['DataSources'][self._sources[i]._type][i]['staticMode'];
                        staticMode = False

                        if not staticMode:  # online or dynamic mode
                            i_test = np.loadtxt(i_parsed_file,
                                                comments="#",
                                                delimiter=",")
                        else:  # offline or static mode
                            # TODO it is just a patch to remove in_npackets_verylow e in_nbytes_verylow like in matlab experiment and just for Netflow!!!
                            # look for a more smart way to do this e.g., by configuration params
                            i_test = np.loadtxt(i_parsed_file,
                                                comments="#",
                                                delimiter=",",
                                                usecols=range(
                                                    1, i_variables + 1 + 2))

                            logging.debug(
                                "Offline mode for source %s. Observation size of %s",
                                i, i_test.shape)

                            mask = np.ones(i_test.shape, dtype=bool)
                            # in_npackets_verylow index in matlab is 119 --> 119 in numpy
                            # in_nbytes_verylow index in matlab is 129 --> 129 in numpy
                            mask[118] = False
                            mask[128] = False
                            i_test = i_test[mask]

                            logging.debug(
                                "Offline mode for source %s. Observation size of %s after removing unuseless variables.",
                                i, i_test.shape)

                    elif self._sources[i]._type == Source.TYPE_R:
                        i_test = np.loadtxt(i_parsed_file,
                                            comments="#",
                                            delimiter=",")
                    else:
                        logging.warn(
                            "Source %s does not has a valid type. Type: %s", i,
                            self._sources[i]._type)
                else:
                    # Missing values are replaced with NaN values
                    i_test = np.empty(i_variables)
                    i_test[:] = np.nan

                # Test observation
                test = np.concatenate((test, i_test), axis=0)

            # 1xM array
            test = test.reshape((1, test.size))

            # Dynamic invocation of the selected data imputation method if needed
            if np.isnan(test).any():
                missingDataMethod = getattr(
                    importlib.import_module(missingDataModule),
                    missingDataMethods[missingDataSelectedMethod])
                logging.debug(
                    "Invoking %s method for data imputation for observation at %s",
                    missingDataMethod.func_name, ts)
                # Calling the corresponding method
                test = missingDataMethod(obs=test, model=self._sensor._model)

            obs_generate_file = obs_generated_path + "obs_" + ts + ".dat"
            np.savetxt(obs_generate_file,
                       test,
                       fmt=valuesFormat,
                       delimiter=",",
                       header=str(datautils.getAllVarNames()),
                       comments="#")

            logging.debug("Observation generated of %s variables at %s.",
                          test.size, ts)

            # if the dynamic calibration enabled?
            if dyn_cal_enabled:

                # Increments the number of observation
                self._current_batch_obs = self._current_batch_obs + 1

                logging.debug("obs %s added to the batch as number %s.", ts,
                              self._current_batch_obs)

                # Add the observation
                self._batch[ts] = {}
                self._batch[ts]['file'] = obs_generate_file
                self._batch[ts]['data'] = test

                # Once we reached the number of batch observations, we can do the dynamic calibration
                if self._current_batch_obs == batch_obs:
                    # data for calibration
                    x = np.array([])
                    x = x.reshape((0, test.size))

                    # Build the [NxM] data for the calibration
                    #print(self._batch.keys())
                    for i in self._batch.keys():
                        logging.debug("batch at %s -> %s", i,
                                      self._batch[i]['data'].shape)
                        x = np.vstack((x, self._batch[i]['data']))

                    #print(x)
                    #print(type(x))

                    # Build the model
                    self._sensor.set_data(x)
                    self._sensor.do_dynamic_calibration(phase=2,
                                                        lv=3,
                                                        lamda=lambda_param)

                    # Reset the counter
                    self._current_batch_obs = 0

                    # Removing all batch observations
                    self._batch.clear()

            # Do monitoring
            Qst, Dst = self._sensor.do_monitoring(test)

        except SensorError as ese:
            raise MSNMError(self, ese.get_msg(), method_name)
        except MSNMError as emsnme:
            raise emsnme

        logging.debug("MONITORING --> UCLd: %s | Dst: %s",
                      self._sensor.get_model().get_mspc().getUCLD(),
                      self._sensor.get_mspc().getDst())
        logging.debug("MONITORING --> UCLq: %s | Qst: %s",
                      self._sensor.get_model().get_mspc().getUCLQ(),
                      self._sensor.get_mspc().getQst())

        # Save the generated statistics
        output_generated_file = output_generated_path + "output_" + ts + ".dat"
        header = "UCLq:" + str(
            self._sensor.get_model().get_mspc().getUCLQ()) + ", UCLd:" + str(
                self._sensor.get_model().get_mspc().getUCLD())
        list_array = [
            self._sensor.get_mspc().getQst(),
            self._sensor.get_mspc().getDst()
        ]
        statistics = np.array(list_array)
        statistics = statistics.reshape((1, statistics.size))
        np.savetxt(output_generated_file,
                   statistics,
                   fmt=valuesFormat,
                   delimiter=",",
                   header=header,
                   comments="#")

        # Gets the remote sensor addressed to send the packet
        remote_addresses = config.get_config()['Sensor']['remote_addresses']

        # Send packets is there are someone for sending it!
        if remote_addresses:

            # Send the data packet to the corresponding sensor.
            dataPacket = DataPacket()
            # Packet sent counter increments
            self._packet_sent = self._packet_sent + 1
            dataPacket.fill_header({
                'id': self._packet_sent,
                'sid': config.get_config()['Sensor']['sid'],
                'ts': dateutils.get_timestamp(),
                'type': Packet.TYPE_D
            })
            dataPacket.fill_body({
                'Q': self._sensor.get_mspc().getQst(),
                'D': self._sensor.get_mspc().getDst()
            })

            logging.debug("Remote sources to send the packet #%s: %s",
                          self._packet_sent, remote_addresses)

            for i in remote_addresses.keys():
                ip = remote_addresses[i]['ip']
                port = remote_addresses[i]['port']
                tcpClient = TCPClient()
                tcpClient.set_server_address((ip, port))
                tcpClient.set_packet_to_send(dataPacket)
                TCPClientThread(tcpClient).start()

        return test, Qst, Dst
Exemplo n.º 14
0
    def save_packet(self, pack, client_addres, ts_rec):
        """
        Save the packet to the file system
            
        Parameters
        ----------
        pack: Packet
            the packet received
        client_address: 
            client address
        ts_rec: str
            packet reception ts
            
        Raise
        -----
        CommError
    
        """

        # Source sensor ID
        sid = pack._header['sid']

        try:

            # check is the source exists
            if self._remotes.has_key(sid):

                logging.info("Saving packet from sensor %s (%s)", sid,
                             client_addres[0])

                config = Configure()
                valuesFormat = config.get_config()['GeneralParams'][
                    'valuesFormat']  # how the variables of the complete observation are saved
                # to save the complete remote packet received in JSON
                raw_remote_source_path = config.get_config()['DataSources'][
                    Source.TYPE_R][sid]['raw']
                raw_remote_source_file = raw_remote_source_path + sid + "_" + ts_rec + ".json"

                # Convert the packet to json format
                p_json = datautils.pack2json(pack)

                # Save raw data
                with open(raw_remote_source_file, 'w') as f:
                    f.write("# from: " + client_addres[0] + "\n")
                    f.writelines(p_json)

                logging.debug("\n%s", p_json)

                # to save the body of the packet --> Just for data packet
                parsed_remote_source_path = config.get_config()['DataSources'][
                    Source.TYPE_R][sid]['parsed']
                parsed_remote_source_file = parsed_remote_source_path + "output-" + sid + "_" + ts_rec + ".dat"

                # Save parsed data. The content of packet body
                statistics_values = np.array([i for i in pack._body.values()])
                name_statistics = [i for i in pack._body.keys()]
                # 1xM array
                statistics_values = statistics_values.reshape(
                    (1, statistics_values.size))
                np.savetxt(parsed_remote_source_file,
                           statistics_values,
                           valuesFormat,
                           delimiter=",",
                           header=str(name_statistics),
                           comments="#")

                # Add the *.dat output from parser to the dict of generated files
                # The key is the packet reception time stamp not the ts of received as packet field.
                self._remotes[sid]._files_generated[
                    ts_rec] = parsed_remote_source_file

                logging.info("Ending saving packet from sensor %s (%s)", sid,
                             client_addres[0])
            else:
                logging.warn(
                    "The data source %s is not a known remote data source or it has not been configured correctly.",
                    sid)
                logging.warn("The received packet will not be processed :(")
        except IOError as ioe:
            logging.error("Error opening writing in file %s : %s",
                          raw_remote_source_file,
                          sys.exc_info()[1])
            raise CommError(ioe, sys.exc_info()[1], "save_packet()")
        except Exception as e:
            logging.error("Error when saving packet received with ID=%s : %s",
                          sid,
                          sys.exc_info()[1])
            raise CommError(e, sys.exc_info()[1], "save_packet()")
Exemplo n.º 15
0
def main(config_file):
    # Get the configuration params and load it into a singleton pattern
    sensor_config_params = Configure()

    try:
        # Application config params
        sensor_config_params.load_config(config_file)

    except ConfigError as ece:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=10,
                                  file=sys.stdout)
        ece.print_error()
        exit(1)

    try:

        # Logging config
        logging.config.dictConfig(
            yaml.load(
                open(
                    sensor_config_params.get_config()['GeneralParams']
                    ['logConfigFile'], 'r')))
    except ConfigError as ece:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=10,
                                  file=sys.stdout)
        ece.print_error()
        exit(1)

    # Create a Sensor
    sensor = Sensor()

    # Model variables and observation calibration.
    var_names = datautils.getAllVarNames()
    x = np.empty(0)

    if sensor_config_params.get_config(
    )['Sensor']['staticCalibration']['randomCalibration']:
        # Random generated static calibration matix
        nobs = sensor_config_params.get_config(
        )['Sensor']['staticCalibration']['randomCalibrationObs']
        x = datautils.generateRandomCalObsMatrix(nobs, len(var_names))
    else:
        # Get calibration matrix from a CSV file
        x = pd.read_csv(sensor_config_params.get_config()['Sensor']
                        ['staticCalibration']['calibrationFile'],
                        index_col=0).values

    # Get root path for creating data files
    rootDataPath = sensor_config_params.get_config(
    )['GeneralParams']['rootPath']

    try:
        try:
            # Create monitoring dirs to save the results and the observations created
            if not os.path.exists(rootDataPath + sensor_config_params.
                                  get_config()['Sensor']['observation']):
                os.makedirs(
                    rootDataPath +
                    sensor_config_params.get_config()['Sensor']['observation'])
            if not os.path.exists(
                    rootDataPath +
                    sensor_config_params.get_config()['Sensor']['output']):
                os.makedirs(
                    rootDataPath +
                    sensor_config_params.get_config()['Sensor']['output'])
            if not os.path.exists(
                    rootDataPath +
                    sensor_config_params.get_config()['Sensor']['model']):
                os.makedirs(
                    rootDataPath +
                    sensor_config_params.get_config()['Sensor']['model'])
            if not os.path.exists(
                    rootDataPath +
                    sensor_config_params.get_config()['Sensor']['diagnosis']):
                os.makedirs(
                    rootDataPath +
                    sensor_config_params.get_config()['Sensor']['diagnosis'])
        except OSError as oe:
            logging.error("Sensor results directory cannot be created: %s", oe)
            exit(1)
        except Exception as e:
            traceback.print_exc()
            exit(1)

        # CALIBRATION PHASE
        # Get the number of latent variables configured
        lv = sensor_config_params.get_config()['Sensor']['lv']
        # Preprocessing method
        prep = sensor_config_params.get_config()['Sensor']['prep']
        # Phace to compute UCLD
        phase = sensor_config_params.get_config()['Sensor']['phase']

        sensor.set_data(x)
        sensor.do_calibration(phase=phase, lv=lv, prep=prep)
        logging.debug("UCLd = %s", sensor.get_model().get_mspc().getUCLD())
        logging.debug("UCLq = %s", sensor.get_model().get_mspc().getUCLQ())

        # Load local data sources
        local_dict = {}
        try:
            src_local = sensor_config_params.get_config(
            )['DataSources']['local']

            logging.debug("Loading %s local sources %s.", len(src_local),
                          src_local.keys())

            for i in src_local.keys():

                # Create the associated directories
                if not os.path.exists(rootDataPath + src_local[i]['raw']):
                    os.makedirs(rootDataPath + src_local[i]['raw'])
                if not os.path.exists(rootDataPath +
                                      src_local[i]['processed']):
                    os.makedirs(rootDataPath + src_local[i]['processed'])
                if not os.path.exists(rootDataPath + src_local[i]['parsed']):
                    os.makedirs(rootDataPath + src_local[i]['parsed'])

                # Create a dynamic instance
                # Example: MyClass = getattr(importlib.import_module(module_name), class_name)
                LocalSource = getattr(
                    importlib.import_module(src_local[i]['moduleName']), i)
                local_dict[i] = LocalSource()
                local_dict[i].start()  # Run the thread associated

        except KeyError as ke:
            logging.warning("There are no local sources configured: %s", ke)
        except Exception as e:
            traceback.print_exc()
            exit(1)

        # Load remote data sources
        remote_dict = {}
        try:
            src_remote = sensor_config_params.get_config(
            )['DataSources']['remote']

            logging.debug("Loading %s remote sources %s.", len(src_remote),
                          src_remote.keys())

            for i in src_remote.keys():

                # Create the associated directories
                if not os.path.exists(rootDataPath + src_remote[i]['raw']):
                    os.makedirs(rootDataPath + src_remote[i]['raw'])
                if not os.path.exists(rootDataPath + src_remote[i]['parsed']):
                    os.makedirs(rootDataPath + src_remote[i]['parsed'])

                # Add the remote source
                remote_dict[i] = RemoteSource()

        except KeyError as ke:
            logging.warning("There are no remote sources configured: %s", ke)
        except OSError as oe:
            logging.error(
                "Remote data source directory can not be created: %s", oe)
            exit(1)

        # Listening for incoming packets from remote sensors
        server_address = sensor_config_params.get_config(
        )['Sensor']['server_address']
        server = MSNMTCPServer((server_address['ip'], server_address['port']),
                               MSNMTCPServerRequestHandler)
        server.set_remotes(remote_dict)
        tcpServer = TCPServerThread(server)
        tcpServer.setName("TCPServer")
        tcpServer.start()

        # All data sources
        sources_dict = local_dict
        sources_dict.update(remote_dict)
        sources_dict = datautils.sort_dictionary(sources_dict, order='asc')

        # Source management
        manager = SourceManager(sensor)
        manager.set_data_sources(sources_dict)
        managerThread = SourceManagerMasterThread(manager)
        managerThread.setName("SourceManagerMasterThread")
        managerThread.start()

        # Default static mode
        staticMode = False

        # Are there local sources?
        if 'local' in sensor_config_params.get_config()['DataSources'].keys():
            # Check if the offline mode is enabled
            # TODO: enable this funcionality and extend this functionality to all available data sources
            # staticMode = sensor_config_params.get_config()['DataSources']['local']['Netflow']['staticMode']
            # If we are in static mode we launch the offilne thread

            offlineThread = OfflineThread()
            # TODO: enable this funcionality
            if staticMode:
                offlineThread.setName("OffLineThread")
                offlineThread.start()
                logging.debug("Offline thread has been launched ...")

        # To stop the main thread
        continueMainThread = True

        # Main loop
        while continueMainThread:
            # static mode?
            if staticMode:
                if not offlineThread.isAlive():
                    offlineThread.stop()
                    continueMainThread = False

            time.sleep(1)

    except KeyboardInterrupt:
        logging.info("KeyboardInterrupt received ...")
        exc_type, exc_value, exc_traceback = sys.exc_info()
    except ConfigError as ece:
        logging.error(ece.print_error())
    except MSNMError as se:
        logging.error(se.print_error())
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=5,
                                  file=sys.stdout)
    finally:
        logging.info("Stopping all services ...")

        # Stop all local data sources threads
        for i in local_dict.values():
            i.stop()
        try:
            if isinstance(tcpServer, TCPServerThread): tcpServer.stop()
        except UnboundLocalError:
            logging.warning(
                "TCPServerThread is not running, so it will not stopped.")
        try:
            if isinstance(managerThread, SourceManagerMasterThread):
                managerThread.stop()
        except UnboundLocalError:
            logging.warning(
                "SourceMangerMasterThread is not running, so it will not stopped."
            )
        try:
            if isinstance(offlineThread, OfflineThread): offlineThread.stop()
        except UnboundLocalError:
            logging.warning(
                "Offline thread is not running, so it will not stopped.")

        for i in threading.enumerate():
            if i is not threading.currentThread():
                logging.debug("Waiting for %s thread ...", i.name)
                i.join()
        logging.info("Exiting ...")
        exit(1)
Exemplo n.º 16
0
    def on_moved(self, event):
        """
            Called when a new file is renamed in the nfcapd output folder.
            This method is in charge of launch all sensor tasks which are mentioned
            in this class description
        """
        super(NetFlowFileEventHandler, self).on_moved(event)

        logging.info("Running netflow procedure ...")

        method_name = "on_moved()"

        # Get configuration
        config = Configure()
        # Get root path for creating data files
        rootDataPath = config.get_config()['GeneralParams']['rootPath']

        netflow_log_raw_folder = rootDataPath + config.get_config(
        )['DataSources'][self._netflow_instance._type][
            self._netflow_instance.__class__.__name__]['raw']
        netflow_log_processed_folder = rootDataPath + config.get_config(
        )['DataSources'][self._netflow_instance._type][
            self._netflow_instance.__class__.__name__]['processed']
        netflow_log_parsed_folder = rootDataPath + config.get_config(
        )['DataSources'][self._netflow_instance._type][
            self._netflow_instance.__class__.__name__]['parsed']
        netflow_flow_parser_config_file = config.get_config()['DataSources'][
            self._netflow_instance._type][
                self._netflow_instance.__class__.__name__]['parserConfig']
        # Parser configuration file for netflow

        #TODO: to be enabled
        #staticMode = config.get_config()['DataSources'][self._netflow_instance._type][self._netflow_instance.__class__.__name__]['staticMode'];
        staticMode = False

        try:
            # Time stamp
            #ts = dateutils.get_timestamp()

            # Get the ts provided by the recently generated nfcapd file
            list_splitted = event.dest_path.split('/')
            nfcapd_file_name = list_splitted[len(list_splitted) - 1]
            ts = nfcapd_file_name.split('.')[1]

            if not staticMode:  # dynamic mode

                # Get *.csv from nfcapd file
                netflow_log_processed_file = netflow_log_processed_folder + "netflow_" + ts + ".csv"
                self._netflow_instance.run_nfdump(event.dest_path,
                                                  netflow_log_processed_file)

                # Copy nfcapd file recently generated in raw folder
                netflow_log_raw_file = netflow_log_raw_folder + "nfcapd_" + ts
                logging.debug("Copying netflow raw file %s to %s ",
                              event.dest_path, netflow_log_raw_file)
                shutil.copyfile(event.dest_path, netflow_log_raw_file)

                # Copy CSV file to parsed folder to be parsed by the flow parsed
                netflow_log_parsed_file = netflow_log_parsed_folder + "netflow_" + ts + ".csv"
                logging.debug("Copying netflow processed file %s to %s ",
                              netflow_log_processed_file,
                              netflow_log_parsed_file)
                shutil.copyfile(netflow_log_processed_file,
                                netflow_log_parsed_file)

                # Flow parser
                logging.debug("Running flow parser for %s file config.",
                              netflow_flow_parser_config_file)
                self._netflow_instance.launch_flow_parser(
                    netflow_flow_parser_config_file)

                # Add the *.dat output from parser to the dict of generated files
                self._netflow_instance._files_generated[
                    ts] = netflow_log_parsed_folder + "output-netflow_" + ts + ".dat"

                # Remove CSV file once it is parsed succesfully
                logging.debug("Deleting file %s", netflow_log_parsed_file)
                os.remove(netflow_log_parsed_file)

            else:  # static mode

                # Add the *.dat output from parser to the dict of generated files
                # output-netflow_201701231335.dat file have previously copied from static_simulation.py script
                # In static mode the emulated nfcapd files has the name like  nfcapd.201701302000_2016001182312, where last ts after '_' is the ts of the statid *.dat
                # file
                tsFile = ts.split('_')[1]
                ts = ts.split('_')[0]

                self._netflow_instance._files_generated[
                    ts] = netflow_log_parsed_folder + "output-netflow_" + tsFile + ".dat"

        #TODO when an exception is raised is not correctly caugh outside :(
        except DataSourceError as edse:
            logging.error("DataSourceError processing netflow source: %s",
                          edse.get_msg())
            #raise edse
            # remove wrong file generated
#             logging.debug("Deleting file %s",netflow_log_parsed_file)
#             os.remove(netflow_log_parsed_file)
#             logging.debug("Deleting file %s",netflow_log_raw_file)
#             os.remove(netflow_log_raw_file)
#             logging.debug("Deleting file %s",netflow_log_processed_file)
#             os.remove(netflow_log_processed_file)
        except IOError:
            logging.error("Error managing files in netflow source: %s",
                          sys.exc_info()[0])
Exemplo n.º 17
0
    def run(self):

        logging.info(
            "Monitoring sources from %s to %s with maximum time until %s",
            self._t_init, self._t_end, self._t_max)

        method_name = "run()"

        # Get configuration
        config = Configure()
        # root path for the data
        rootDataPath = config.get_config()['GeneralParams']['rootPath']

        timer = config.get_config()['GeneralParams']['dataSourcesPolling']
        diagnosis_backup_path = rootDataPath + config.get_config()['Sensor'][
            'diagnosis']  # path to save diagnosis vector output
        valuesFormat = config.get_config()['GeneralParams'][
            'valuesFormat']  # how the variables of the complete observation are saved

        try:

            # Set all data sources as non-ready
            for i in self._sourceManager_instance._sources.keys():
                self._sources_ready[i] = False

            logging.debug("Checking sources at %s time interval.", self._ts)

            # End thread
            finish = False

            while not finish:

                logging.debug("Data sources not ready at interval %s: %s",
                              self._ts,
                              self.get_not_ready().keys())

                # Current time
                tc = datetime.now()

                # for each source
                for i in self._sourceManager_instance._sources.keys():

                    # If the max time to wait is not reached and not all sources are ready
                    if tc <= self._t_max and not self.are_ready(self._ts):
                        # Source i is ready?
                        if self.is_ready(i, self._ts):
                            # Source 'i' is ready
                            self._sources_ready[i] = True

                    else:
                        # Get not ready sources for that ts
                        src_not_ready = self.get_not_ready()

                        # Create an empty dummy *.dat file for the missing sources
                        logging.debug("Data sources not ready: %s",
                                      src_not_ready.keys())

                        for i in src_not_ready.keys():
                            if src_not_ready[i]._type == Source.TYPE_R:
                                parsed_file_path = rootDataPath + config.get_config(
                                )['DataSources'][Source.TYPE_R][i]['parsed']
                            else:
                                parsed_file_path = rootDataPath + config.get_config(
                                )['DataSources'][Source.TYPE_L][i]['parsed']

                            dummy_file = parsed_file_path + "dummy_" + self._ts + ".dat"

                            # Creates a dummy empty file
                            with open(dummy_file, 'w') as fw:
                                fw.write(
                                    "Empty dummy file indicating that there was no data available for that source at "
                                    + self._ts)

                            # For this ts the source is not ready
                            self._sourceManager_instance._sources[
                                i]._files_generated[self._ts] = None

                            #logging.debug("Dummy file created : %s", dummy_file)
                            logging.debug(
                                "Files generated for source %s at %s: %s", i,
                                self._ts, self._sourceManager_instance.
                                _sources[i]._files_generated)

                        # if the sensor has no remote sensor to send the statistics means that it is the root in the sensor hierarchy,
                        # so launch_monitoring is not necessary
                        remote_addresses = config.get_config(
                        )['Sensor']['remote_addresses']

                        # Do monitoring
                        test, Qst, Dst = self._sourceManager_instance.launch_monitoring(
                            self._ts)

                        # Set up which observations are compared
                        dummy = np.zeros((1, test.shape[0]))
                        # We evaluate the observation 1
                        dummy[0, 0] = 1

                        # Do diagnosis
                        diagnosis_vec = self._sourceManager_instance._sensor.do_diagnosis(
                            test, dummy)

                        # Save the diagnosis
                        diagnosis_backup_file = diagnosis_backup_path + "diagnosis_" + self._ts + ".dat"
                        #datautils.save2json(diagnosis_vec.tolist(), diagnosis_backup_file)
                        np.savetxt(diagnosis_backup_file,
                                   diagnosis_vec,
                                   fmt=valuesFormat,
                                   delimiter=",",
                                   header=str(datautils.getAllVarNames()),
                                   comments="#")

                        if not remote_addresses:
                            logging.warning(
                                "There are no remote addresses configured. This sensor should be the root in the sensor hierarchy."
                            )

                        # Finish the thread after launch the monitoring procedure
                        finish = True

                        # Exit to start the new monitoring interval
                        break

                # wait for polling time: waiting time for checking if the sources are ready
                sleep(timer)  # TODO: to customize on demand

        except Exception as detail:
            logging.error(
                "Error in processing the data sources. Type: %s, msg: %s",
                sys.exc_info()[0], detail)
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type,
                                      exc_value,
                                      exc_traceback,
                                      limit=5,
                                      file=sys.stdout)
            raise DataSourceError(self, detail, method_name)
Exemplo n.º 18
0
class Source(object):
    """

    Represents a general data source

    Attributes
    ----------
    _files_generated: dict
        Contains the observation file generated (*.dat) by each data source at a specific timestamp
    _type: str
        Data source type:
            'local': Local source e.g., netflow, iptables, IDS, syslog, etc. that is located in the host where the sensor is deployed
            'remote': Remote source (i.e., remote sensors) from the Q and D statistics are received.

    See Also
    --------
    msnm.modules.source.netflow
    msnm.modules.source.iptables
    """

    # TCP flags formats. Type netflow means that all treated sources will have the same TCP flags as nfdump output
    FORMAT_TCP_FLAGS_NETFLOW_TYPE = "netflow"

    # Source configurations according to the FCParser configuration
    S_VARIABLES = 'FEATURES'

    # Types of sources
    TYPE_L = "local"
    TYPE_R = "remote"

    def __init__(self):
        # TODO: add common attributes among data sources
        self._files_generated = {}
        self._type = self.TYPE_L  # Local source by default
        # Configuration
        self.config = Configure()
        # Get root path for creating data files
        self.rootDataPath = self.config.get_config(
        )['GeneralParams']['rootPath']

    def parse(self, file_to_parse):
        """
        Parsing the information from a specific data source

        Parameters
        ----------
        file_to_parse: str
            Path to the file to be parsed

        Raises
        ------
        DataSourceError

        """

        # To be overridden in child classes
        # TODO: it could have a default behavior
        pass

    def start(self):
        """
        This method runs the information gathering of a specific data source

        """
        # To be overridden in child classes
        # TODO: it could have a default behavior
        pass

    def stop(self):
        """
        This method stop the information gathering of a specific data source

        """
        # To be overridden in child classes
        # TODO: it could have a default behavior
        pass

    def format_tcp_flags(self, list_tcp_flags, format_type):
        """
        Normalizing the TCP flags format

        Parameters
        ----------
        list_tcp_flags: list
            TCP flags list in this way: ['ACK','PSH','SYN'] (see iptables module)

        Return
        ------
        formatted_string: str
            A string accordingly formated.

        Example
        -------
        >>> formatted_string = format_tcp_flags(['ACK','PSH','SYN'],format_type=self.FORMAT_TCP_FLAGS_NETFLOW_TYPE)
        >>> print(formatted_string)
        >>> # it should returns '.AP.S.' as nfdump formats the TCP flags


        """
        # TODO: To be extended to another format
        if format_type == self.FORMAT_TCP_FLAGS_NETFLOW_TYPE:
            formatted_string = self.format_tcp_flag_as_netflow(list_tcp_flags)
        else:
            # Default format as netflow
            formatted_string = self.format_tcp_flag_as_netflow(list_tcp_flags)

        return formatted_string

    def format_tcp_flag_as_netflow(self, list_tcp_flags):
        """
        Netflow way to format TCP flags

        Parameters
        ----------
        list_tcp_flags: list
            TCP flags list in this way: ['ACK','PSH','SYN'] (see iptables module)

        Return
        ------
        formatted_string: str
            A string accordingly formated.

        Example
        -------
        >>> formatted_string = format_tcp_flags_as_netflow(['ACK','PSH','SYN'])
        >>> print(formatted_string)
        >>> # it should returns '.AP.S.' as nfdump formats the TCP flags


        """

        # Formatted string of flags
        formatted_string = ""

        for flag in list_tcp_flags:
            if flag:
                formatted_string = formatted_string + flag[0]
            else:
                formatted_string = formatted_string + '.'

        return formatted_string

    def get_file_to_parse(self, file_to_parse, backups_path, nlastlines):
        """
        Obtaining and saving the 'n' last lines of the iptables logs to be parsed.
        Although this method is initially conceived to work with iptables logs it could be
        used with similar log files where

        Parameters
        ----------
        file_to_parse: str
            Path to the whole iptables log file
        backups_path: str
            Path to save the 'n' lines extracted
        nlastlines: int
            The 'n' last lines to be extracted

        Return
        ------
        d:
            A string accordingly formated.

        Example
        -------
        >>> formatted_string = format_tcp_flags_as_netflow(['ACK','PSH','SYN'])
        >>> print(formatted_string)
        >>> # it should returns '.AP.S.' as nfdump formats of the TCP flags


        """

        method_name = "get_file_to_parse()"

        try:

            # Read only the n last lines of the file
            with open(file_to_parse, 'r') as f:
                d = deque(f, nlastlines)
                #print d

            # Save the piece of file that is going to be parsed
            with open(backups_path, 'w') as fw:
                fw.writelines(list(d))

        except Exception:
            raise DataSourceError(self, sys.exc_info()[0], method_name)

        return d

    def get_file_to_parse_time(self, file_to_parse, timer):
        """
        Getting last lines of specific file during a certain of the iptables logs to be parsed.
        Although this method is initially conceived to work with iptables logs it could be
        used with similar files.

        Parameters
        ----------
        file_to_parse: str
            Path to the whole iptables log file
        backups_path: str
            Path to save the 'n' lines extracted
        nlastlines: int
            The 'n' last lines to be extracted

        Return
        ------
        d:
            A string accordingly formated.

        Example
        -------
        >>> formatted_string = format_tcp_flags_as_netflow(['ACK','PSH','SYN'])
        >>> print(formatted_string)
        >>> # it should returns '.AP.S.' as nfdump formats of the TCP flags


        """

        method_name = "get_file_to_parse_time()"

        try:

            # Log lines gathered from the file
            log_lines = []

            # Read only the n last lines of the file
            with open(file_to_parse, 'r') as f:
                # Goes to the end of the file
                f.seek(0, 2)
                # Computes the ending time in seconds
                t_end = time.time() + timer
                while time.time() < t_end:
                    line = f.readline()
                    if line:
                        log_lines.append(line)

        except Exception:
            raise DataSourceError(self, sys.exc_info()[0], method_name)

        return log_lines

    def get_synchronized_file(self, df, ts_master_source, sampling_rate):

        config = Configure()
        dateFormat = config.get_config()['GeneralParams']['dateFormat']
        dateFormatNfcapd = config.get_config(
        )['GeneralParams']['dateFormatNfcapdFiles']

        # Format date according the index in the dataframe. Upper timestamp to synchronize
        ts = datetime.strptime(ts_master_source, dateFormatNfcapd)
        end_ts = ts.strftime(dateFormat)

        # Lower timestamp to synchronize: upper timesatemp - samplinr_rate (in minutes)
        init_ts = ts.replace(minute=ts.minute - sampling_rate)
        init_ts = init_ts.strftime(dateFormat)

        # Get the dataframe rows according to the ts_master_source
        return df[str(init_ts):str(end_ts)]

    def get_synchronized_file_from_netflow_dump(self, df, netflow_dumps_path):

        method_name = "get_synchronized_file_from_netflow_dump()"

        try:
            with open(netflow_dumps_path, 'r') as f:
                init_ts = f.readline().split(',')[
                    0]  # Get the ts of the first line in nfdump *.csv file
                # Get the ts of the last line in nfdump *.csv file
                d = deque(f, 1)  # Get the las line
                end_ts = list(d)[0].split(',')[0]  # Get the ts

        except Exception:
            raise DataSourceError(self, sys.exc_info()[0], method_name)
        # Get the dataframe rows according to the ts_master_source
        return df[str(init_ts):str(end_ts)]

    def launch_flow_parser(self, flow_parser_config):
        """
        Launch the parsing procedure (flow parser)

        Raises
        ------
        MSNMError

        """

        method_name = "launch_flow_parser()"

        #TODO: modify parser source to raise ParseError or some like that as in the MSNM sensor.

        try:
            logging.debug("Parsing from %s configuration.", flow_parser_config)
            fcparser.main(call='internal', configfile=flow_parser_config)
        except Exception:
            logging.error("Error parsing data: %s", sys.exc_info()[2])
            traceback.print_exc()
            raise DataSourceError(self, sys.exc_info()[1], method_name)

    def save_file(self, log_lines, path_to_save):

        method_name = "save_file()"

        try:
            logging.debug("Saving file lines in %s", path_to_save)
            # Save the piece of file that is going to be parsed
            with open(path_to_save, 'w') as fw:
                fw.writelines(log_lines)

        except Exception:
            raise DataSourceError(self, sys.exc_info()[0], method_name)