class Main:
    """ Main.
	"""
    def __init__(self):
        """ Initializer.
		"""

        # Network Settings.
        self.iface = None
        self.addr1 = None  # Destination
        self.addr2 = None  # Source
        self.addr3 = None  # Basic Service Set ID (BSSID)
        self.ssid = None
        self.channel = None
        self.wepKey = None
        self.passphrase = None
        self.arpIpSource = None
        self.arpIpDestination = None
        self.__initNetworkSettings()

        # Initialize the Operating System.
        self.__initOperatingSystem()

        # Signal Settings for the timeout interval (in seconds).
        signal.signal(signal.SIGALRM, self.__signal_handler)
        self.signalInterval = 0.4

        # Logger, and Handlers for management, EAPoL and ICMP frames.
        self.logger = Logger(filename='log.txt', terminal=True)
        self.handleMgmt = HandleManagement(self.logger, self.iface, self.addr1,
                                           self.addr2, self.addr3, self.ssid,
                                           self.channel)
        self.handleEAPoL = HandleEAPoL(self.logger, self.iface, self.addr1,
                                       self.addr2, self.addr3, self.ssid)
        self.handleARP = HandleARP(self.logger, self.iface, self.addr1,
                                   self.addr2, self.addr3, self.arpIpSource,
                                   self.arpIpDestination)

        # Set the keys and passphrases used in the above handlers.
        self.handleMgmt.setWEPKey(self.wepKey)
        self.handleEAPoL.setPassphrase(self.passphrase)

        # Cryptographic handlers for encapsulation and decapsulation.
        self.handleWEP = HandleWEP()
        self.handleTKIP = HandleTKIP()
        self.handleAES = HandleAES()

        # Set the cryptographic handlers.
        self.handleMgmt.setCryptographicHandlers(wep=self.handleWEP)
        self.handleEAPoL.setCryptographicHandlers(tkip=self.handleTKIP,
                                                  aes=self.handleAES)
        self.handleARP.setCryptographicHandlers(wep=self.handleWEP,
                                                tkip=self.handleTKIP,
                                                aes=self.handleAES)

        # Traces holding the list of all traces, and its Trace Manager.
        self.traceManager = TraceManager(self.handleMgmt, self.handleEAPoL)
        self.traces = self.traceManager.getTraces()

        # Trace and helpers, holding information about the trace under test.
        self.trace = None
        self.tracePosition = None
        self.traceBeaconed = False
        self.traceFinished = False

    def printResults(self):
        """ Print the obtained results.
		"""
        self.traceManager.printResults()

    def __initOperatingSystem(self):
        """ Initialize requirements in the Operating System (OS).
			NOTE: 	It might be necessary to restart the interface with:
				ifconfig self.iface down; ifconfig self.iface up
		"""
        # Set the requested channel on the interface.
        os.system('iwconfig ' + self.iface + ' channel ' + str(self.channel))

    def __initNetworkSettings(self):
        """ Initialize the network settings; select the simulated or one of the physical
			routers. 
		"""
        #self.__setSettingsSimulation()
        self.__setSettingsForSomeRouter()

    def __setSettingsSimulation(self):
        self.iface = 'wlan1'
        self.addr1 = '02:00:00:00:00:00'  # Destination MAC
        self.addr2 = '02:00:00:00:01:00'  # Source MAC
        self.addr3 = '02:00:00:00:00:00'  # Basic Service Set ID (BSSID)
        self.ssid = 'TEST_NETWORK'
        self.channel = 1
        self.wepKey = 'abcde'
        self.passphrase = 'abcdefgh'
        self.arpIpSource = '192.168.1.2'  # Within the same subnet of the destination.
        self.arpIpDestination = '192.168.1.1'

    def __setSettingsForSomeRouter(self):
        self.iface = 'wlan0'
        self.addr1 = 'aa:bb:cc:dd:ee:ff'  # Destination MAC
        self.addr2 = 'ff:ee:dd:cc:bb:aa'  # Source MAC
        self.addr3 = self.addr1  # Basic Service Set ID (BSSID)
        self.ssid = 'TEST_NETWORK'
        self.channel = 1
        self.passphrase = 'abcdefgh'
        self.arpIpSource = '192.168.1.2'  # Within the same subnet of the destination.
        self.arpIpDestination = '192.168.1.1'

    def __signal_handler(self, signum, frame):
        """ Handler for signal events.
		"""
        raise Exception("Timeout.")

    def __isPacketIntendedForUs(self, packet):
        """ Check if we are the intended receivers of the packet by comparing the MAC
			addresses in the given packet against our own MAC addresses.
		"""
        # The packet destination address (addr1) must equal our source address (addr2).
        if packet.addr1 != self.addr2: return False
        # The packet source address (addr2) must equal our destination address (addr1).
        if packet.addr2 != self.addr1: return False
        return True

    def run(self):
        """ Run all available traces.
		"""
        assert( self.traces is not None ), \
         'There are no traces available to be executed.'
        printTerminalLine('=')
        for x, trace in enumerate(self.traces):
            self.__traceRun(trace[0], trace[1])
            if x < len(self.traces) - 1:
                printTerminalLine('-')

    def __traceRun(self, trace, validationType):
        """ Run the given trace, from now on known as the trace under test.
			Note: 	The current implementation assumes that the first message in the trace
				under test waits for a message to be received. Most commonly this will
				be a beacon frame from the access point.
		"""
        assert( trace is not None and trace is not [] ), \
         'There is no trace given, or it is empty, and can therefore not be executed.'
        self.trace = trace
        self.tracePosition = 0
        self.traceBeaconed = False
        self.traceFinished = False

        # Run the trace and upon completion validate the connection. If either of them
        # fails the exception handler will catch the error message and mark the trace as
        # failed. If the connection is validated positively, the trace is marked as
        # success.
        # FIXME: http://stackoverflow.com/questions/1112343/how-do-i-capture-sigint-in-python
        try:
            self.logger.log(self.logger.ACTION, 'Starting Trace...')
            sniff(iface=self.iface,
                  lfilter=self.__traceFilter,
                  stop_filter=self.__traceStopCondition,
                  prn=self.__traceHandler)
            signal.setitimer(signal.ITIMER_REAL, 0.0)  # Reset the timer.
            if validationType is None:
                raise Exception('No Validation Type was given.')
            sleep(100.00 / 1000.00)
            # Sleep for 100ms before validation.
            self.__traceValidateConnection(validationType)
            self.traceManager.markTrace(type='SUCCESS',
                                        trace=self.trace,
                                        validation=validationType)

        # Catch exceptions; log the message and mark the trace as failed.
        except Exception, message:
            self.logger.log(self.logger.EXCEPTION, str(message))
            self.traceManager.markTrace(type='FAILURE',
                                        trace=self.trace,
                                        validation=validationType)

        # Reset the connection state with the access point.
        self.__traceReset()
    logger.time_log('Starting Category Transformation.')
    with open('data_scratch/Crimes_-_2001_to_present.csv') as input_file:
        reader = csv.reader(input_file)

        for i, line in enumerate(reader):
            raw_record_count += 1
            if raw_record_count == 0:
                continue

            iucr = line[4]
            type = line[5]
            description = line[6]
            location = line[7]
            fbi_code = line[14]

            iucr_codes[iucr] = 1
            type_codes[type] = 1
            description_codes[description] = 1
            location_codes[location] = 1
            fbi_codes[fbi_code] = 1

        logger.time_log('Category Transformation Complete.\n')
        logger.log('Processed %s total records' % raw_record_count)
        serialize_codes(iucr_codes, 'data_scratch/iucr_codes.json', logger)
        serialize_codes(type_codes, 'data_scratch/type_codes.json', logger)
        serialize_codes(description_codes,
                        'data_scratch/description_codes.json', logger)
        serialize_codes(location_codes, 'data_scratch/location_codes.json',
                        logger)
        serialize_codes(fbi_codes, 'data_scratch/fbi_codes.json', logger)
Exemplo n.º 3
0
                    longitude = None

                if (violation_code is not None and issue_month is not None
                        and issue_weekday is not None
                        and issue_hour is not None and issue_time is not None
                        and car_state is not None and latitude is not None
                        and longitude is not None):

                    # With required source fields scrubbed, it is time to engineer a couple of features.
                    out_of_state = 0
                    if car_state != 5:
                        out_of_state = 1

                    luxury_make = luxury_make_types[car_make]
                    domestic_make = domestic_make_types[car_make]

                    writer.writerow([
                        violation_code, issue_month, issue_weekday, issue_hour,
                        car_state, car_make, car_color, latitude, longitude,
                        out_of_state, luxury_make, domestic_make
                    ])
                    processed_records += 1

    logger.time_log('Data Pre-Processing Complete.\n')
    logger.log('    Total Records: %s' % raw_record_count)
    logger.log('Processed Records: %s\n' % processed_records)
    logger.log('Records with missing or incomplete data:')
    for column in column_names:
        logger.log("%s: %s" % (column, missing_data[column]))
    logger.close()
    sample_seed = 1029
    sample_rate = 0.10
    raw_record_count = -1
    processed_records = 0

    random.seed(sample_seed)

    logger.time_log('Starting Data Sampling.')

    with open('data_scratch/cleaned_crimes.csv') as input_file:
        with open('data_scratch/sampled_cleaned_crimes.csv', 'w',
                  newline='') as output_file:
            reader = csv.reader(input_file)
            writer = csv.writer(output_file)

            writer.writerow(column_names)

            for i, line in enumerate(reader):
                raw_record_count += 1
                if raw_record_count == 0:
                    continue

                if random.random() < sample_rate:
                    writer.writerow(line)
                    processed_records += 1

    logger.time_log('Data Sampling Complete.\n')
    logger.log('Total Records: %s' % raw_record_count)
    logger.log('Sampled Records: %s' % processed_records)
Exemplo n.º 5
0
        with open('model/experiment/output/%s_predict_proba.p' % predictions,
                  'rb') as file:
            frame = pickle.load(file)
        optimal_threshold, optimal_f1_score = find_optimal_f1_threshold(frame)
        y_actual = frame.y_actual
        y_predict_proba = frame.y_predict
        y_predict = (y_predict_proba[:, 1] >= optimal_threshold).astype(bool)

        score = [
            predictions, optimal_threshold, optimal_f1_score,
            accuracy_score(y_actual, y_predict),
            precision_score(y_actual, y_predict),
            recall_score(y_actual, y_predict)
        ]
        scores.append(score)
        print(score)

    score_frame = pd.DataFrame(scores,
                               columns=[
                                   'model', 'threshold', 'f1', 'accuracy',
                                   'precision', 'recall'
                               ])

    logger.log("Sorted For Accuracy:")
    score_frame.sort_values('accuracy', ascending=False, inplace=True)
    logger.log(score_frame)

    logger.log("Sorted For F1:")
    score_frame.sort_values('f1', ascending=False, inplace=True)
    logger.log(score_frame)
                        year,
                        month,
                        weekday,
                        hour,
                        iucr_codes[iucr],
                        type_codes[type],
                        description_codes[description],
                        location_codes[location],
                        domestic,
                        beat,
                        district,
                        ward,
                        community,
                        fbi_codes[fbi_code],
                        latitude,
                        longitude,
                        index_crime,
                        non_index_crime,
                        violent_crime,
                        property_crime,
                        public_violence,
                    ])
                    processed_records += 1

    logger.time_log('Pre-Processing Complete.\n')
    logger.log('Total Records: %s' % raw_record_count)
    logger.log('Processed Records: %s\n' % processed_records)
    logger.log('Missing Data:')
    for column in missing_columns.keys():
        logger.log('    %s: %s' % (column, missing_columns[column]))
Exemplo n.º 7
0
    def run_classification_experiment(self,
                                      sample=None,
                                      random_state=None,
                                      test_size=0.20,
                                      multiclass=False,
                                      record_predict_proba=False,
                                      sampling=None,
                                      cv=5,
                                      verbose=True,
                                      transformer=None,
                                      fit_increment=None,
                                      warm_start=False,
                                      max_iters=None,
                                      n_jobs=-1):
        use_project_path()

        logger = Logger('%s.txt' % self.name)
        evaluator = Evaluator(logger)

        data_frame = self.df

        if sample is not None:
            data_frame = data_frame.sample(n=sample, random_state=random_state)

        x_train, x_test, y_train, y_test = train_test_split(
            data_frame, data_frame[self.target], test_size=test_size)

        if transformer is not None:
            logger.time_log('Fitting Transformer...')
            transformer.fit(x_train)
            logger.time_log('Transformer Fit Complete.\n')

        if sampling is not None:
            logger.time_log('Starting Data Re-Sampling...')
            logger.log('Original Training Shape is %s' % Counter(y_train))
            x_new, y_new = sampling.fit_resample(x_train, y_train)
            logger.log('Balanced Training Shape is %s' % Counter(y_new))
            if hasattr(x_train, 'columns'):
                x_new = pd.DataFrame(x_new, columns=x_train.columns)
            x_train, y_train = x_new, y_new
            logger.time_log('Re-Sampling Complete.\n')
            logger.time_log('Shuffling Re-Sampled Data.\n')
            x_train, y_train = shuffle(x_train,
                                       y_train,
                                       random_state=random_state)
            logger.time_log('Shuffling Complete.\n')

        if self.hyper_parameters is not None:
            self.estimator.set_params(**self.hyper_parameters.params)

        if cv is not None:
            kfold = StratifiedKFold(n_splits=cv, random_state=random_state)
            logger.time_log('Cross Validating Model...')
            fold_scores = Parallel(n_jobs=n_jobs, verbose=3)(
                delayed(crossfold_classifier)
                (clone(self.estimator), transformer, x_train, y_train,
                 train_index, test_index, record_predict_proba, verbose,
                 fit_increment, warm_start, max_iters, random_state)
                for train_index, test_index in kfold.split(x_train, y_train))
            logger.time_log('Cross Validation Complete.\n')

        logger.time_log('Training Model...')
        if fit_increment is not None:
            if max_iters is not None:
                for iter in range(max_iters):
                    x_iter_train, y_iter_train = shuffle(
                        x_train, y_train, random_state=random_state)
                    batch_fit_classifier(self.estimator,
                                         x_iter_train,
                                         y_iter_train,
                                         transformer=transformer,
                                         increment=fit_increment,
                                         verbose=verbose)
            else:
                batch_fit_classifier(self.estimator,
                                     x_train,
                                     y_train,
                                     transformer=transformer,
                                     increment=fit_increment,
                                     verbose=verbose)
        else:
            if transformer is not None:
                x_train_transformed = transformer.transform(x_train)
                self.estimator.fit(x_train_transformed, y_train)
            else:
                self.estimator.fit(x_train, y_train)
        logger.time_log('Training Complete.\n')

        logger.time_log('Testing Training Partition...')
        y_train_predict = batch_predict(self.estimator,
                                        x_train,
                                        transformer=transformer,
                                        verbose=verbose)
        logger.time_log('Testing Complete.\n')

        train_evaluation_frame = EvaluationFrame(y_train, y_train_predict)

        logger.time_log('Testing Holdout Partition...')
        y_test_predict = batch_predict(self.estimator,
                                       x_test,
                                       transformer=transformer,
                                       verbose=verbose)
        logger.time_log('Testing Complete.\n')

        test_evaluation_frame = EvaluationFrame(y_test, y_test_predict)
        test_evaluation_frame.save('%s_predict.p' % self.name)

        test_proba_evaluation_frame = None
        if record_predict_proba:
            logger.time_log('Testing Holdout Partition (probability)...')
            y_test_predict_proba = batch_predict_proba(self.estimator,
                                                       x_test,
                                                       transformer=transformer,
                                                       verbose=verbose)
            test_proba_evaluation_frame = EvaluationFrame(
                y_test, y_test_predict_proba)
            test_proba_evaluation_frame.save('%s_predict_proba.p' % self.name)
            logger.time_log('Testing Complete.\n')

        if cv is not None:
            evaluator.evaluate_fold_scores(fold_scores)

        evaluator.evaluate_classifier_result(
            self.estimator,
            test_evaluation_frame,
            train=train_evaluation_frame,
            test_proba=test_proba_evaluation_frame,
            multiclass=multiclass)

        logger.close()

        if self.hyper_parameters is not None:
            self.hyper_parameters.save('%s_params.p' % self.name)

        self.trained_estimator = self.estimator
Exemplo n.º 8
0
    def run_classification_search_experiment(self,
                                             scoring,
                                             sample=None,
                                             random_state=None,
                                             test_size=0.20,
                                             n_jobs=-1,
                                             n_iter=2,
                                             cv=5,
                                             verbose=3,
                                             multiclass=False,
                                             record_predict_proba=False,
                                             sampling=None):
        use_project_path()

        logger = Logger('%s.txt' % self.name)

        search = BayesSearchCV(self.estimator,
                               self.hyper_parameters.search_space,
                               n_jobs=n_jobs,
                               n_iter=n_iter,
                               cv=cv,
                               verbose=verbose,
                               scoring=scoring,
                               return_train_score=True)

        data_frame = self.df

        if sample is not None:
            data_frame = data_frame.sample(n=sample, random_state=random_state)

        x_train, x_test, y_train, y_test = train_test_split(
            data_frame, data_frame[self.target], test_size=test_size)

        if sampling is not None:
            logger.time_log('Starting Data Re-Sampling...')
            logger.log('Original Training Shape is %s' % Counter(y_train))
            x_new, y_new = sampling.fit_resample(x_train, y_train)
            logger.log('Balanced Training Shape is %s' % Counter(y_new))
            if hasattr(x_train, 'columns'):
                x_new = pd.DataFrame(x_new, columns=x_train.columns)
            x_train, y_train = x_new, y_new
            logger.time_log('Re-Sampling Complete.\n')
            logger.time_log('Shuffling Re-Sampled Data.\n')
            x_train, y_train = shuffle(x_train,
                                       y_train,
                                       random_state=random_state)
            logger.time_log('Shuffling Complete.\n')

        logger.time_log('Starting HyperParameter Search...')
        results = search.fit(x_train, y_train)
        logger.time_log('Search Complete.\n')

        logger.time_log('Testing Training Partition...')
        y_train_predict = batch_predict(results.best_estimator_, x_train)
        logger.time_log('Testing Complete.\n')

        train_evaluation_frame = EvaluationFrame(y_train, y_train_predict)

        logger.time_log('Testing Holdout Partition...')
        y_test_predict = batch_predict(results.best_estimator_, x_test)
        logger.time_log('Testing Complete.\n')

        test_evaluation_frame = EvaluationFrame(y_test, y_test_predict)
        test_evaluation_frame.save('%s_predict.p' % self.name)

        test_proba_evaluation_frame = None
        if record_predict_proba:
            logger.time_log('Testing Holdout Partition (probability)...')
            y_test_predict_proba = batch_predict_proba(results.best_estimator_,
                                                       x_test)
            test_proba_evaluation_frame = EvaluationFrame(
                y_test, y_test_predict_proba)
            test_proba_evaluation_frame.save('%s_predict_proba.p' % self.name)
            logger.time_log('Testing Complete.\n')

        evaluator = Evaluator(logger)
        evaluator.evaluate_classifier_result(
            results,
            test_evaluation_frame,
            train=train_evaluation_frame,
            test_proba=test_proba_evaluation_frame,
            multiclass=multiclass)

        logger.close()

        self.hyper_parameters.params = results.best_params_
        self.hyper_parameters.save('%s_params.p' % self.name)

        self.trained_estimator = results.best_estimator_