Exemplo n.º 1
0
        description="Machine Learning AI for FIND")
    parser.add_argument('-p', '--port', type=int, help='port', default=7005)
    parser.add_argument('--host', type=str, help='host', default='localhost')
    parser.add_argument('-D',
                        '--data_directory',
                        type=str,
                        help='data directory',
                        default='.')
    args = parser.parse_args()

    api.DEFAULT_DATA_DIRECTORY = args.data_directory

    try:
        logger.info("starting up on {0} port {1}".format(args.host, args.port))
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.bind((args.host, args.port))
        sock.listen(1)

        logger.debug('waiting for connections')
        while True:
            conn, addr = sock.accept()
            logger.debug('Connected address: {0}'.format(addr))
            t = threading.Thread(target=on_new_client, args=(
                conn,
                addr,
            ))
            t.start()

    except Exception as e:
        logger.error(e)
Exemplo n.º 2
0
class AI(object):
    def __init__(self, family):
        # self.logger = logging.getLogger('learn.AI')
        self.logger = NewLogger("learn.AI")
        self.naming = {'from': {}, 'to': {}}
        self.family = family
        # self.path_to_data = path_to_data

    def classify(self, sensor_data):
        # print(self.header)
        # print(sensor_data)

        header = self.header[1:]
        is_unknown = True
        csv_data = numpy.zeros(len(header))
        for sensorType in sensor_data['s']:
            if sensor_data['s'][sensorType]:
                for sensor in sensor_data['s'][sensorType]:
                    sensorName = sensorType + "-" + sensor
                    # print(sensorName, self.header)
                    if sensorName in header:
                        is_unknown = False
                        csv_data[header.index(
                            sensorName)] = sensor_data['s'][sensorType][sensor]
        self.headerClassify = header
        self.csv_dataClassify = csv_data.reshape(1, -1)
        payload = {'location_names': self.naming['to'], 'predictions': []}

        threads = [None] * len(self.algorithms)
        self.results = [None] * len(self.algorithms)

        for i, alg in enumerate(self.algorithms.keys()):
            threads[i] = Thread(target=self.do_classification, args=(i, alg))
            threads[i].start()

        for i, _ in enumerate(self.algorithms.keys()):
            threads[i].join()

        # print(csv_data)
        # print(self.results)

        for result in self.results:
            if result != None:
                payload['predictions'].append(result)
        payload['is_unknown'] = is_unknown
        return payload

    def do_classification(self, index, name):
        """
        header = ['wifi-a', 'wifi-b']
        csv_data = [-67 0]
        """
        if name == 'Gaussian Process':
            return

        t = time.time()
        try:
            prediction = self.algorithms[name].predict_proba(
                self.csv_dataClassify)
        except Exception as e:
            logger.error(self.csv_dataClassify)
            logger.error(str(e))
            return
        predict = {}
        for i, pred in enumerate(prediction[0]):
            predict[i] = pred
        predict_payload = {'name': name, 'locations': [], 'probabilities': []}
        badValue = False
        for tup in sorted(predict.items(),
                          key=operator.itemgetter(1),
                          reverse=True):
            predict_payload['locations'].append(str(tup[0]))
            predict_payload['probabilities'].append(round(float(tup[1]), 2))
            if math.isnan(tup[1]):
                badValue = True
                break
        if badValue:
            return

        # try:
        #     t2 = time.time()
        #     name = "Extended Naive Bayes"
        #     clf = ExtendedNaiveBayes(self.family,path_to_data=self.path_to_data)
        #     predictions = clf.predict_proba(header,csv_data)
        #     predict_payload = {'name': name,'locations': [], 'probabilities': []}
        #     for tup in predictions:
        #         predict_payload['locations'].append(str(self.naming['from'][tup[0]]))
        #         predict_payload['probabilities'].append(round(tup[1],2))
        #     payload['predictions'].append(predict_payload)
        #     self.logger.debug("{} {:d} ms".format(name,int(1000 * (t2 - time.time()))))
        # except Exception as e:
        #     self.logger.error(str(e))

        # try:
        #     t2 = time.time()
        #     name = "Extended Naive Bayes2"
        #     clf = ExtendedNaiveBayes2(self.family, path_to_data=self.path_to_data)
        #     predictions = clf.predict_proba(header, csv_data)
        #     predict_payload = {'name': name, 'locations': [], 'probabilities': []}
        #     for tup in predictions:
        #         predict_payload['locations'].append(
        #             str(self.naming['from'][tup[0]]))
        #         predict_payload['probabilities'].append(round(tup[1], 2))
        #     payload['predictions'].append(predict_payload)
        #     self.logger.debug("{} {:d} ms".format(
        #         name, int(1000 * (t2 - time.time()))))
        # except Exception as e:
        #     self.logger.error(str(e))

        # self.logger.debug("{} {:d} ms".format(
        #     name, int(1000 * (t - time.time()))))
        self.results[index] = predict_payload

    @timeout(10)
    def train(self, clf, x, y):
        return clf.fit(x, y)

    def learn(self, fname, file_data=None):
        csvfile = None
        if file_data:
            # base64 and gzipped file
            data = base64.b64decode(file_data)
            # data = zlib.decompress(data, 16 + zlib.MAX_WBITS)
            data = gzip.decompress(data)
            csvfile = StringIO(data.decode('utf-8'))
        else:
            csvfile = open(fname, 'r')

        t = time.time()
        # load CSV file
        self.header = []
        rows = []
        naming_num = 0
        # with open(fname, 'r') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for i, row in enumerate(reader):
            self.logger.debug(row)
            if i == 0:
                self.header = row
            else:
                for j, val in enumerate(row):
                    if j == 0:
                        # this is a name of the location
                        if val not in self.naming['from']:
                            self.naming['from'][val] = naming_num
                            self.naming['to'][naming_num] = val
                            naming_num += 1
                        row[j] = self.naming['from'][val]
                        continue
                    if val == '':
                        row[j] = 0
                        continue
                    try:
                        row[j] = float(val)
                    except:
                        self.logger.error("problem parsing value " + str(val))
                rows.append(row)
        csvfile.close()

        # first column in row is the classification, Y
        y = numpy.zeros(len(rows))
        x = numpy.zeros((len(rows), len(rows[0]) - 1))

        # shuffle it up for training
        record_range = list(range(len(rows)))
        shuffle(record_range)
        for i in record_range:
            y[i] = rows[i][0]
            x[i, :] = numpy.array(rows[i][1:])

        names = [
            "Nearest Neighbors",
            "Linear SVM",
            "RBF SVM",
            # "Gaussian Process",
            "Decision Tree",
            "Random Forest",
            "Neural Net",
            "AdaBoost",
            "Naive Bayes",
            "QDA"
        ]
        classifiers = [
            KNeighborsClassifier(3),
            SVC(kernel="linear", C=0.025, probability=True),
            SVC(gamma=2, C=1, probability=True),
            # GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
            DecisionTreeClassifier(max_depth=5),
            RandomForestClassifier(max_depth=5,
                                   n_estimators=10,
                                   max_features=1),
            MLPClassifier(alpha=1),
            AdaBoostClassifier(),
            GaussianNB(),
            QuadraticDiscriminantAnalysis()
        ]
        self.algorithms = {}
        # split_for_learning = int(0.70 * len(y))
        for name, clf in zip(names, classifiers):
            t2 = time.time()
            self.logger.debug("learning {}".format(name))
            try:
                self.algorithms[name] = self.train(clf, x, y)
                # score = self.algorithms[name].score(x,y)
                # logger.debug(name, score)
                self.logger.debug("learned {}, {:d} ms".format(
                    name, int(1000 * (t2 - time.time()))))
            except Exception as e:
                self.logger.error("{} {}".format(name, str(e)))

        self.logger.debug("{:d} ms".format(int(1000 * (t - time.time()))))

    def save(self, save_file, redis_cache=False):
        if redis_cache:
            print(redis_cache)
        t = time.time()
        f = gzip.open(save_file, 'wb')
        pickle.dump(self.header, f)
        pickle.dump(self.naming, f)
        pickle.dump(self.algorithms, f)
        pickle.dump(self.family, f)
        f.close()
        self.logger.debug("{:d} ms".format(int(1000 * (t - time.time()))))

    def load(self, save_file, redis_cache=False):
        if redis_cache:
            print(redis_cache)
        t = time.time()
        f = gzip.open(save_file, 'rb')
        self.header = pickle.load(f)
        self.naming = pickle.load(f)
        self.algorithms = pickle.load(f)
        self.family = pickle.load(f)
        f.close()
        self.logger.debug("{:d} ms".format(int(1000 * (t - time.time()))))