Пример #1
0
class TestJSON(TestCase):
    def setUp(self):
        for f in "testfile1.json", "testfile2.JSON", "fakefile.txt":
            handle = open(f, "w+")
            handle.write('{"a": 1, "b": 2}')
            handle.close()
        self.j = JsonHandler(".", "unittest", "unittest")
        self.client = MongoClient()
        self.client.drop_database("unittest")
        self.coll = self.client["unittest"]["unittest"]

    def test_can_make_pathlib_object_of_json_files(self):
        self.assertIn(PosixPath("testfile1.json"), self.j.jsons)
        self.assertIn(PosixPath("testfile2.JSON"), self.j.jsons)
        self.assertNotIn("fakefile.txt", self.j.jsons)

    def test_can_parse_json(self):
        self.assertEqual(self.j.parse_one_json(self.j.jsons[0]), {"a": 1, "b": 2})

        def test_can_put_files_in_database(self):
            self.assertEqual(self.coll.count(), 0)

        self.j.put_jsons_in_database(self.j.jsons, self.j.db_name, self.j.coll_name)
        self.assertEqual(self.coll.count(), 2)

    def test_inserted_docs_can_be_retrieved(self):
        self.j.put_jsons_in_database(self.j.jsons, self.j.db_name, self.j.coll_name)
        self.assertEqual(self.coll.find({"a": 1}).count(), 2)

    def tearDown(self):
        for f in "testfile1.json", "testfile2.JSON", "fakefile.txt":
            os.remove(f)
        self.client.drop_database("unittest")
        self.client.close()
Пример #2
0
 def __init__(self, sizes, dataHandler, save_rate, checkpoints_dir,
              activation_function):
     self.num_layers = len(sizes)
     self.sizes = sizes
     self.dataHandler = dataHandler
     self.layers = layer.create_layers(self.sizes, activation_function)
     self.save_rate = save_rate
     self.checkpoints_dir = checkpoints_dir
     self.json_handler = JsonHandler()
Пример #3
0
	def do_GET(self):
		if self.path=="/api/data/total_faces":
			self._set_response()
			self.wfile.write(JsonHandler(filename).get_data_from_file("totalFaces"))

		elif self.path=="/api/data/avg_age":
			self._set_response()
			self.wfile.write(JsonHandler(filename).get_data_from_file("currentAverageAge"))

		elif self.path=="/api/data/parity":
			self._set_response()
			self.wfile.write(JsonHandler(filename).get_data_from_file("parity"))

		elif self.path=="/api/data/expressions":
			self._set_response()
			self.wfile.write(JsonHandler(filename).get_data_from_file("expressions"))
Пример #4
0
	def do_POST(self):
		if self.path=="/compute_stats":
			content_length = int(self.headers['Content-Length']) # <--- Gets the size of data
			post_data = self.rfile.read(content_length) # <--- Gets the data itself
			self._set_response()

			JsonHandler(filename).process_data_from_json(post_data)

			self.wfile.write(post_data)
Пример #5
0
 def setUp(self):
     for f in "testfile1.json", "testfile2.JSON", "fakefile.txt":
         handle = open(f, "w+")
         handle.write('{"a": 1, "b": 2}')
         handle.close()
     self.j = JsonHandler(".", "unittest", "unittest")
     self.client = MongoClient()
     self.client.drop_database("unittest")
     self.coll = self.client["unittest"]["unittest"]
Пример #6
0
def mc_worker(jobs, stats, ctl, store, timeout=5):
    logging.info("mc_worker started")

    while ctl["run_ok"]:
        try:
            root, parents, val = jobs.get(block=True, timeout=timeout)
        except Queue.Empty:
            logging.debug("mc_worker hasn't received jobs for %s seconds" %
                          timeout)
            continue

        start = time.time()

        for server in val:

            try:
                ip, port = server.split(":")
            except (ValueError, AttributeError), e:
                logging.error("unable to collect mc stats from %s : %s" %
                              (server, e))
                continue

            mc_server = Server(ip)

            # get bucket name from root and parent nodes
            bucket = DataHelper.get_bucket(root, parents)

            # initialize memcached source
            mc_source = MemcachedSource(mc_server, bucket)

            # initialize handlers to dump data json doc
            j_handler = JsonHandler()
            s_hanlder = SerieslyHandler(store)

            # collect data from source and emit to handlers
            mc_coll = MemcachedCollector([mc_source], [j_handler, s_hanlder])
            mc_coll.collect()
            mc_coll.emit()
            stats.put([mc_source.fast, mc_source.meta], block=True)
            stats.put([mc_source.slow, mc_source.meta], block=True)

        delta = time.time() - start
        logging.debug("collected mc stats from %s, took %s seconds" %
                      (val, delta))

        if delta < timeout:
            logging.debug("mc_worker sleep for %s seconds" % (timeout - delta))
            time.sleep(timeout - delta)
Пример #7
0
async def init(loop, args):
    if args.tags:
        tags = args.tags.split(',')
    else:
        tags = [Model.default_tag]
    model = Model(loop)
    if args.model:
        await model.set_model(args.model, tags)
    batcher = Batcher(model, loop, args.batch_size)

    web_app = web.Application(loop=loop, client_max_size=args.request_size)
    web_app.on_shutdown.append(on_shutdown)
    web_app.router.add_get('/stats', batcher.stats_handler)

    json_handler = JsonHandler(model, batcher, args.batch_transpose)

    if args.no_cors:
        web_app.router.add_get('/', batcher.info_handler)
        web_app.router.add_post('/{method}', json_handler.handler)
        web_app.router.add_post('/', json_handler.handler)
    else:
        cors = aiohttp_cors.setup(web_app,
                                  defaults={
                                      "*":
                                      aiohttp_cors.ResourceOptions(
                                          allow_credentials=True,
                                          expose_headers="*",
                                          allow_headers="*")
                                  })

        get_resource = cors.add(web_app.router.add_resource('/'))
        cors.add(get_resource.add_route("GET", batcher.info_handler))

        post_resource = cors.add(web_app.router.add_resource('/{method}'))
        cors.add(post_resource.add_route("POST", json_handler.handler))

        post_resource = cors.add(web_app.router.add_resource('/'))
        cors.add(post_resource.add_route("POST", json_handler.handler))

    if args.static_path:
        web_app.router.add_static('/web/',
                                  path=args.static_path,
                                  name='static')

    grpc_app = Server([GrpcHandler(model, batcher)], loop=loop)

    return web_app, grpc_app
Пример #8
0
    parser.add_option('-j', '--jsonFile', dest='json_file')

    options, otherjunk = parser.parse_args(argv)
    return options


options = parse_commands(sys.argv[1:])

header_list = [
    "person_id", "level", "chapterIndex", "contentIndex", "questionIndex",
    "derivedIndex", "duration", "point", "clearDateTime",
    "incorrectAnswerCount"
]

rest_handler = RestHandler()
json_handler = JsonHandler()
csv_handler = CsvHandler(filepath=options.drag_file, header_list=header_list)

#f = open(options.json_file, 'w')
date_list_json = rest_handler.get_json_of_date_list()
date_list = json_handler.json_to_date_list(date_list_json)
for idx, date in enumerate(date_list):
    #for idx, date in enumerate(['1970-01-01','2019-02-07']):
    #result_dict_list = []
    if date == '1970/01/01':
        print('Date 1970/01/01')
        continue
    print('[{}], ({}/{}) Now Collecting'.format(date, idx + 1, len(date_list)))
    for mobile_os in ('iOS', 'Android'):
        person_list_json = rest_handler.get_json_of_person_id_by_date(
            date, mobile_os)
Пример #9
0
from rest_handler import RestHandler
from json_handler import JsonHandler
from csv_handler import CsvHandler

def parse_commands(argv):
    from optparse import OptionParser
    parser = OptionParser('"')
    parser.add_option('-o', '--userFile', dest='user_file')
    parser.add_option('-p', '--personFile', dest='person_file')
    parser.add_option('-m', '--mobileOS', dest='mobile_os')
    options, otherjunk = parser.parse_args(argv)
    return options

options = parse_commands(sys.argv[1:])

with open(options.person_file) as person_file:
    person_list = person_file.read().splitlines()

options = parse_commands(sys.argv[1:])

header_list = ['person_id', 'level']

rest_handler = RestHandler(mobile_os=options.mobile_os)
json_handler = JsonHandler()
csv_handler = CsvHandler(filepath=options.user_file,header_list=header_list)

for person_id in person_list:
    json_result = rest_handler.get_user_data_by_person_id(person_id)
    result_dict_list = json_handler.json_user_data_to_dict_list(json_result,person_id)
    print(result_dict_list)
    csv_handler.dict_to_csv(dict_list=result_dict_list)
Пример #10
0
home = expanduser("~")
sys.path.append('{}/ProjectDoBrain/codes/Modules'.format(home))
from rest_handler import RestHandler
from json_handler import JsonHandler
from csv_handler import CsvHandler


def parse_commands(argv):
    from optparse import OptionParser
    parser = OptionParser('"')
    parser.add_option('-s', '--surveyFile', dest='survey_file')
    parser.add_option('-b', '--birthdayFile', dest='birthday_file')
    parser.add_option('-m', '--mobileOS', dest='mobile_os')
    options, otherjunk = parser.parse_args(argv)
    return options


options = parse_commands(sys.argv[1:])

header_list = ['person_id', 'birthday']
survey_df = pd.read_csv(options.survey_file)

rest_handler = RestHandler(mobile_os=options.mobile_os)
json_handler = JsonHandler()
csv_handler = CsvHandler(filepath=options.birthday_file,
                         header_list=header_list)

for person in survey_df.person_id.unique():
    json_result = rest_handler.get_user_data_by_person_id(person)
    result_dict_list = json_handler.user_json_to_birthday(person, json_result)
    csv_handler.dict_to_csv(dict_list=result_dict_list)
Пример #11
0
    parser.add_option('-s', '--scoreFile', dest='score_file')
    parser.add_option('-j', '--jsonFile', dest='json_file')
    options, otherjunk = parser.parse_args(argv)
    return options


options = parse_commands(sys.argv[1:])

header_list = [
    "person_id", "level", "chapterIndex", "contentIndex", "questionIndex",
    "derivedIndex", "duration", "point", "clearDateTime",
    "incorrectAnswerCount"
]

rest_handler = RestHandler()
json_handler = JsonHandler()
csv_handler = CsvHandler(filepath=options.score_file, header_list=header_list)

#f = open(options.json_file, 'w')
date_list_json = rest_handler.get_score_json_of_date_list()
date_list = json_handler.json_to_date_list(date_list_json)
survey_person_list = []
for mobile_os in ('iOS', 'Android'):
    survey_rest_handler = RestHandler(mobile_os=mobile_os)
    survey_json = survey_rest_handler.get_survey_data()
    temp_person_list = json_handler.json_survey_data_to_person_list(
        survey_json)
    survey_person_list += temp_person_list
survey_person_set = set(survey_person_list)
survey_person_set.add('3c234013d1ec58644fe3779b67542e45')
survey_person_set.add('48f116a8d36c91fa878653d625b75102')
Пример #12
0
import click
import os
import copy

import convert

from abc import ABCMeta
from itertools import groupby
from collections import namedtuple
from datetime import datetime

from json_handler import JsonHandler

jh = JsonHandler()
LEAGUES_DATA = jh.load_leagues()


def get_writer(output_format='stdout', output_file=None):
    return globals()[output_format.capitalize()](output_file)


class BaseWriter(object):
    __metaclass__ = ABCMeta

    def __init__(self, output_file):
        self.output_filename = output_file


class Stdout(BaseWriter):
    def __init__(self, output_file):
        super().__init__(output_file)
Пример #13
0
class Network(object):
    def __init__(self, sizes, dataHandler, save_rate, checkpoints_dir,
                 activation_function):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.dataHandler = dataHandler
        self.layers = layer.create_layers(self.sizes, activation_function)
        self.save_rate = save_rate
        self.checkpoints_dir = checkpoints_dir
        self.json_handler = JsonHandler()

    def feedforward(self, a, keep_z=False):
        self.layers[0].activation = a.reshape(a.shape[0], 1)
        i = 1
        for layer in self.layers[1:]:
            layer.update_layer(self.layers[i - 1].activation, keep_z)
            i += 1
        return layer.activation

    def update_mini_batch(self, mini_batch, eta):
        # nablas terão formato de acordo com seus respectivos layers
        nabla_b = [np.zeros(layer.bias.shape) for layer in self.layers[1:]]
        nabla_w = [np.zeros(layer.weight.shape) for layer in self.layers[1:]]
        mini_batch_length = len(mini_batch)

        for k in xrange(
                mini_batch_length
        ):  # para cada exemplo de treino da mini batch, calcula o ajuste necessário
            x, y = self.dataHandler.get_example(update_batch=True)
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb
                       for nb, dnb in zip(nabla_b, delta_nabla_b)]  # dC/db
            nabla_w = [nw + dnw
                       for nw, dnw in zip(nabla_w, delta_nabla_w)]  # dC/dw

        for i, (nw, nb) in enumerate(zip(nabla_w, nabla_b)):
            self.layers[i +
                        1].weight -= (eta /
                                      mini_batch_length) * nw  # update weight
            self.layers[i + 1].bias -= (eta /
                                        mini_batch_length) * nb  # update bias

    def backprop(self, x, y):
        # feedforward, passa pela rede indo em direção a ultima camada, calculando os zs e as ativações
        self.feedforward(x, keep_z=True)

        # output error (calcula a última camada "na mão")
        nabla_b = []
        nabla_w = []

        for l in self.layers[1:]:
            nabla_b.append(np.zeros(l.bias.shape))
            nabla_w.append(np.zeros(l.weight.shape))

        activation = self.layers[-1].activation
        z = self.layers[-1].z

        delta = self.cost_derivative(activation,
                                     y) * self.layers[-1].activation_function(
                                         z, prime=True)  # (BP1)
        nabla_b[-1] = delta  # (BP3)
        nabla_w[-1] = np.dot(delta,
                             self.layers[-2].activation.transpose())  # (BP4)

        # backpropagate the error, l é usado de forma crescente, mas como acessar posições
        # negativas significa acessar de trás pra frente, o erro é propagado do fim ao começo da rede
        for l in xrange(2, self.num_layers):
            z = self.layers[-l].z
            afp = self.layers[-l].activation_function(z, prime=True)
            delta = np.dot(self.layers[-l + 1].weight.transpose(),
                           delta) * afp  # (BP2)
            nabla_b[-l] = delta  # (BP3)
            nabla_w[-l] = np.dot(
                delta, self.layers[-l - 1].activation.transpose())  # (BP4)
        return (nabla_b, nabla_w)

    def cost_derivative(self, output_activations, y):
        return (output_activations - y)

    def evaluate(self, test_data):
        # guarda resultados passando o conjunto de teste pela rede
        # e assume o maior resultado como resposta da rede
        test_results = []
        for i in test_data:
            x, y = self.dataHandler.get_example(i)
            test_results.append((np.argmax(self.feedforward(x)), np.argmax(y)))

        # test_results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x,y) in test_data]
        n = len(test_data)
        hit = sum(int(x == y) for (x, y) in test_results)
        # retorna taxa de acerto
        return (float(hit) / float(n))

    def SGD(self, training_data, epochs, mini_batch_size, eta, val_data=None):
        # n = len(training_data)
        # para cada epoch, embaralha o conjunto de treino, faz mini batches de tamanho definido, recalcula pesos e biases
        for j in xrange(epochs):
            # each mini_batch contains a list of indexes, each index corresponds
            # to an example
            mini_batches = self.dataHandler.get_mini_batches(
                minBatch_size=mini_batch_size)
            for i, mini_batch in enumerate(mini_batches):
                self.update_mini_batch(mini_batch, eta)

            if j > 0 and j % self.save_rate == 0:
                self.save_learning(self.checkpoints_dir + 'epoch' + str(j) +
                                   '.json')

            # se houver conjunto de teste, usa a rede atual para ver o hit rate
            if val_data:
                print "Epoch {0} - hit rate: {1}".format(
                    j, self.evaluate(self.dataHandler.val_set))
            # senão, a epoch acabou e vamos para a próxima
            else:
                print "Epoch {0} complete.".format(j)

    def save_learning(self, ckpt_name):
        # Dict to save the params from the checkpoint
        ckpt = {}
        weights = {}
        biases = {}

        for i, layer in enumerate(self.layers):
            weights['l' + str(i)] = layer.weight.tolist()
            biases['l' + str(i)] = layer.bias.tolist()

        ckpt = {"weights": weights, "biases": biases}

        self.json_handler.write(ckpt, ckpt_name)

    def load_learning(self, ckpt_file):
        # Loads the parameters from the checkpoint file
        params = json_handler.read(ckpt_file)
        weights = params['weights']
        biases = params['biases']

        # Fills the layers with the
        for i in xrange(len(self.layers)):
            self.layers[i].weight = np.asarray(
                weights['l' + str(i)]).astype("float64")
            self.layers[i].bias = np.asarray(biases['l' +
                                                    str(i)]).astype("float64")

    def predict(self, x):
        # x must have the same size of the input layer
        self.feedforward(x)
        return np.argmax(self.layers[-1].activation)

    def weights_for_humans(self, img_dir):
        # This function saves the weights in image format
        # So humans can try to see the magic better
        # But for now, it is only possible if the number of neurons in a layer
        # is a square number
        for i, layer in enumerate(self.layers[1:]):
            k = len(layer.weight[0])
            sqrt_k = int(math.sqrt(k))
            # checks if it has a square size
            if sqrt_k * sqrt_k == k:
                img = np.zeros((k, 3), dtype="uint8")
                for j, weight in enumerate(layer.weight):
                    p = np.argwhere(weight >= 0)
                    n = np.argwhere(weight < 0)

                    img[p[:, 0], :] = np.absolute(weight[p]) * np.array(
                        [0, 255, 0])  # green
                    img[n[:, 0], :] = np.absolute(weight[n]) * np.array(
                        [0, 0, 255])  # red

                    if img_dir[-1] == '/':
                        img_name = img_dir + "l" + str(i + 1) + "w" + str(
                            j) + ".jpg"
                    else:
                        img_name = img_dir + '/' + "l" + str(
                            i + 1) + "w" + str(j) + ".jpg"

                    cv2.imwrite(img_name, img.reshape(sqrt_k, sqrt_k, 3))
Пример #14
0
from json_handler import JsonHandler
from csv_handler import CsvHandler


def parse_commands(argv):
    from optparse import OptionParser
    parser = OptionParser('"')
    parser.add_option('-s', '--surveyFile', dest='survey_file')
    parser.add_option('-m', '--mobileOS', dest='mobile_os')
    parser.add_option('-j', '--jsonFile', dest='json_file')
    options, otherjunk = parser.parse_args(argv)
    return options


options = parse_commands(sys.argv[1:])
f = open(options.json_file, 'w')
header_list = [
    'person_id', 'wonDiagnosis', 'worryingCategory', 'diagnosedDisease'
]

rest_handler = RestHandler(mobile_os=options.mobile_os)
json_handler = JsonHandler()
csv_handler = CsvHandler(filepath=options.survey_file, header_list=header_list)

json_result = rest_handler.get_survey_data()
f.write(json.dumps(json_result) + '\n')

result_dict_list = json_handler.json_survey_data_to_dict(json_result)
csv_handler.dict_to_csv(dict_list=result_dict_list)

f.close()
Пример #15
0
options = parse_commands(sys.argv[1:])

with open(options.person_file) as person_file:
    person_list = person_file.read().splitlines()

options = parse_commands(sys.argv[1:])

header_list = [
    'person_id', 'level', 'game_level', 'clear_date_time', 'Memory',
    'VelocityPerceptual', 'Numerical', 'Discrimination', 'SpacePerceptual',
    'Inference', 'Organizing', 'Creative'
]

rest_handler = RestHandler(mobile_os=options.mobile_os)
json_handler = JsonHandler()
csv_handler = CsvHandler(filepath=options.user_score_file,
                         header_list=header_list)

f = open(options.json_file, 'w')
content_num = 0
for person_id in person_list:
    try:
        json_result = rest_handler.get_user_score_data_by_person_id(person_id)
    except:
        continue
    f.write(person_id + '\t' + json_result + '\n')
    result_dict_list = json_handler.json_user_score_data_to_dict_list(
        json_result, person_id, content_num)
    csv_handler.dict_to_csv(dict_list=result_dict_list)
f.close()
Пример #16
0
class QueryMatcher(object):
    jsonHandler = JsonHandler()
    nlp = None

    def __init__(self, nlp):
        self.nlp = nlp

    def getQuery(self, statement):
        properties = self.getProperties(statement)
        queries = self.searchPropertyMatch(properties)
        maxQueryID, maxReplaces, maxScore = self.getBestQueryProperties(
            queries, statement)
        return self.buildOriginalQuery(maxQueryID, maxReplaces), maxScore

    def getProperties(self, statement):

        doc = self.nlp(statement)
        properties = {"time": "0", "price": "0"}
        for each in doc.ents:
            if str(each.label_) == "MONEY" or str(each.label_) == "CARDINAL":
                properties.__setitem__("price", "1")
            elif str(each.label_) == "TIME":
                properties.__setitem__("time", "1")

        return properties

    def searchPropertyMatch(self, properties):
        queries = self.jsonHandler.matchProperties(properties)
        return queries

    def getBestQueryProperties(self, queries, statement):
        #do the partial matching for each query as well as the entity matching
        maxScore = 0.0
        maxQueryID = ""
        maxReplaces = None
        for ID in queries:
            totScore = self.getMaxHit(statement,
                                      self.jsonHandler.getGeneralizedQuery(ID))
            replaces = self.jsonHandler.getReplaces(ID)
            numOfReplaces = len(replaces)
            for i in range(0, len(replaces)):
                replace = replaces[i]
                score, key = self.getMaximumScoringKey(replace, statement)
                totScore += score / numOfReplaces
                replaces[i] = key
            if (maxScore >= totScore):
                continue
            maxScore = totScore
            maxQueryID = ID
            maxReplaces = replaces
        return maxQueryID, maxReplaces, maxScore

    def getMaxHit(self, statement, generalizedQueries):
        maxScore = 0.0
        queryList = generalizedQueries.split(",")
        for each in queryList:
            score = fuzz.partial_ratio(statement, each)
            if (score > maxScore):
                maxScore = score

        return maxScore

    def buildOriginalQuery(self, ID, replaces):
        if ID == "":
            return
        originalQuery = self.jsonHandler.getOriginalQuery(ID)
        for i in range(0, len(replaces)):
            originalQuery = originalQuery.replace("xxx" + str(i), replaces[i])
        return originalQuery

    def getMaximumScoringKey(self, replace, statement):

        if (replace == "price"):
            doc = self.nlp(statement)
            for ent in doc.ents:
                if str(ent.label_) == "MONEY" or str(ent.label_) == "CARDINAL":
                    return 0, self.get_first_nbr_from_str(str(ent))
        else:
            categorySet = replace.split(",")
            maxScore = 0.0
            maxScoreReplace = ""
            for each in categorySet:
                keySet = self.jsonHandler.getKeywordList(each)
                for eachInner in keySet:
                    score = fuzz.partial_ratio(eachInner, statement)
                    if score > maxScore:
                        maxScore = score
                        maxScoreReplace = eachInner
            return maxScore, maxScoreReplace

    def get_first_nbr_from_str(self, input_str):

        if not input_str and not isinstance(input_str, str):
            return 0
        out_number = ''
        for ele in input_str:
            if (ele == '.' and '.' not in out_number) or ele.isdigit():
                out_number += ele
            elif out_number:
                break
        return out_number
Пример #17
0
sys.path.append('{}/ProjectDoBrain/codes/Modules'.format(home))

from rest_handler import RestHandler
from json_handler import JsonHandler
from csv_handler import CsvHandler


def parse_commands(argv):
    from optparse import OptionParser
    parser = OptionParser('"')
    parser.add_option('-p', '--personFile', dest='person_file')
    parser.add_option('-m', '--mobileOs', dest='mobile_os')

    options, otherjunk = parser.parse_args(argv)
    return options


#make person_id csv without HEADER

options = parse_commands(sys.argv[1:])

header_list = ["person_id"]

rest_handler = RestHandler(mobile_os=options.mobile_os)
json_handler = JsonHandler()
csv_handler = CsvHandler(filepath=options.person_file, header_list=header_list)

json_result = rest_handler.get_json_of_person_id()
result_dict_list = json_handler.json_person_id_to_dict_list(
    json_source=json_result, mobile_os=options.mobile_os)
csv_handler.dict_to_csv(dict_list=result_dict_list)