def test_tree_root(self):
        path = get_path(['tree', utils.EXPECTED_BRANCH])
        rv = self.app.get(path)
        assert json.loads(rv.data) == utils.load_json('tree.json')

        rv = self.app.get('{0}?limit={1}'.format(path, utils.EXPECTED_LIMIT))
        assert json.loads(rv.data) == utils.load_json('tree_limit.json')
def main():
    parser = get_arg_parser()
    args = parser.parse_args()

    endpoint = utils.format_endpoint(args.endpoint[0])
    options = None

    if args.datafile is not None:
        if os.path.exists(args.datafile):
            options = utils.load_json(args.datafile)
    else:
        fkjson_path = os.path.dirname(os.path.abspath(__file__)) + '/fkdata.json'
        if os.path.exists(fkjson_path):
            options = utils.load_json(fkjson_path)

    if options is None:
        print '''
        fkdata.json file needed...
        '''
    else:
        fk_checker = fkchecker.FkChecker(endpoint, options)
        fk_checker.execute()

        print 'Total Frames Received: ' + str(fk_checker.get_recv_frames)
        print 'Total Asserts: ' + str(fk_checker.get_asserts)
        print 'Total of validation executed: ' + str(fk_checker.get_validations_runned)
        print 'Assert frame rate: ' + str(fk_checker.get_assert_rate) + ' %'
    def test_commits_root(self):
        path = get_path(['commits', utils.EXPECTED_BRANCH])
        rv = self.app.get(path)
        assert json.loads(rv.data) == utils.load_json('commits.json')

        rv = self.app.get('{0}?limit={1}'.format(path, utils.EXPECTED_LIMIT))
        assert json.loads(rv.data) == utils.load_json('commits_limit.json')

        rv = self.app.get(get_path(['commits', utils.EXPECTED_REV]))
        assert json.loads(rv.data) == utils.load_json('commits_rev.json')
    def test_blob(self):
        rv = self.app.get(get_path(['blob',
                                    utils.EXPECTED_BRANCH,
                                    utils.EXPECTED_RESOURCE]))
        assert json.loads(rv.data) == utils.load_json('blob.json')

        rv = self.app.get(get_path(['blob',
                                    utils.EXPECTED_REV,
                                    utils.EXPECTED_RESOURCE]))
        assert json.loads(rv.data) == utils.load_json('blob_rev.json')
    def test_get_resource(self):
        resource = core.get_resource(utils.EXPECTED_PROJECT,
                                     utils.EXPECTED_REPOSITORY,
                                     utils.EXPECTED_BRANCH,
                                     utils.EXPECTED_RESOURCE)
        assert resource == utils.load_json('blob.json')

        resource = core.get_resource(utils.EXPECTED_PROJECT,
                                     utils.EXPECTED_REPOSITORY,
                                     utils.EXPECTED_REV,
                                     utils.EXPECTED_RESOURCE)
        assert resource == utils.load_json('blob_rev.json')
示例#6
0
文件: __init__.py 项目: toxu/NLP
def load_train_result(args):
    settings = parse_settings(args.setting)
    try:
        chn_trained_file = os.path.join(ENV.data_dir, "%s.json" % settings['chn'])
        eng_trained_file = os.path.join(ENV.data_dir, "%s.json" % settings['eng'])
    except Exception as e:
        print e
    if os.path.isfile(chn_trained_file) and os.path.isfile(eng_trained_file):
        chn_freq = utils.load_json(chn_trained_file)
        eng_freq = utils.load_json(eng_trained_file)
    else:
        chn_freq, eng_freq = train(args)
    return chn_freq, eng_freq
示例#7
0
    def _request(self, url, data=None, method=None):
        """Send an HTTP request to the remote server.

        Args:
          method - A string for the HTTP method to send the request with.
          url - The URL to send the request to.
          body - The message body to send.

        Returns:
          A dictionary with the server's parsed JSON response.
        """
        logging.debug('%s %s %s' % (method, url, data))

        request = Request(url, data=data, method=method)
        request.add_header('Accept', 'application/json')

        opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(),
                                      HttpErrorHandler())
        response = opener.open(request)
        try:
            if response.code > 399 and response.code < 500:
                return {'status': response.code, 'value': response.read()}
            body = response.read().replace('\x00', '').strip()
            if body:
                data = utils.load_json(body.strip())
                assert type(data) is dict, (
                    'Invalid server response body: %s' % body)
                assert 'status' in data, (
                    'Invalid server response; no status: %s' % body)
                assert 'value' in data, (
                    'Invalid server response; no value: %s' % body)
                return data
        finally:
            response.close()
示例#8
0
	def run(self, edit):
		print("va a importar")
		window=sublime.active_window()
		view=window.active_view()
		self.window=sublime.active_window()
		self.view=self.window.active_view()
		java=Java()
		tipos=java.get_tipos()
		self.packages=utils.load_json(PATH_INDEX_PACKAGES)
		projectFiles=utils.get_files({"ext":"java"})
		projectFiles=[x.replace("/", ".").replace("\\", ".") for x in projectFiles]
		projectFiles=[x[x.rfind(".java.")+6:x.rfind(".")] for x in projectFiles]
		
		##print(projectFiles)
		viewPackage=view.substr(view.find(utils.REG_JAVA_PACKAGE, 0))
		viewPackage=viewPackage.replace("package ", "").replace(";", "")

		for projectFile in projectFiles:
			className=projectFile[projectFile.rfind(".")+1:]
			packageClass=projectFile[:projectFile.rfind(".")]
			if packageClass==viewPackage:continue
			if self.packages.get(className)==None:
				self.packages[className]=[]
			self.packages[className].append(packageClass)
		
		self.clases=list(set(tipos))
		##print(self.clases)
		self.i=0
		self.importar(None)
示例#9
0
	def on_query_completions(self, view, prefix, locations):
		if utils.get_language() != "java":return
		ultimo=utils.get_last_character()
		if ultimo=="." and utils.get_language()=="java":
			window=sublime.active_window()
			view=window.active_view()
			word=utils.get_word(-1)
			variables=Java().get_variables()
			tipo=word
			static=True
			if variables.get(word):
				tipo=variables[word]
				static=False

			package=re.findall("import ([\w.]+\.%s);"%tipo, utils.get_text())
			
			if not package:
				posibleRuta=os.path.join(PATH_JSON, "java", "lang", tipo+".json")
				if os.path.exists(posibleRuta):
					package=["java.lang."+tipo]

			if package:
				package=package[0]
				clase=self.get_project_class(package)
				if clase:
					return utils.get_completion_list(clase["members"])
				ruta=package.replace(".", os.sep)+".json"
				ruta=os.path.join(PATH_JSON, ruta)
				print("ya se determino")
				objeto=utils.load_json(ruta)
				miembros="clase" if static else "object"
				return utils.get_completion_list(objeto[miembros])
def make_Xy():
    orders = pd.read_pickle(ORDERS_LOCAL_PATH)
    print('%s: %s' % (ORDERS_NAME, list(orders.shape)))

    Xcols = [col for col in orders.columns if orders[col].dtype == np.int32]
    print(len(Xcols))
    orders = orders[Xcols]
    print('%s: %s' % (ORDERS_NAME, list(orders.shape)))

    converted_quotes = load_json('converted_quotes.json')
    converted = {q for q, s in converted_quotes if s}

    quotes = orders[orders['isQuote'] == 1]

    _, key_to_idx = load_enumerations()
    CHANGE = key_to_idx['type']['CHANGE']
    quotes = quotes[quotes['type'] == CHANGE]

    print('%s: %s' % ('quotes', dim(quotes)))
    quotes = quotes[[col for col in quotes.columns if col not in
                     {'endCustomerId', 'originalEndCustomerId'}]]
    print('%s: %s' % ('quotes', dim(quotes)))

    y = pd.DataFrame(index=quotes.index)
    y_values = [i in converted for i in quotes['id'].values]
    y['converted'] = pd.Series(y_values, index=quotes.index, dtype=np.int32)
    print('y', y.shape, y['converted'].dtype)

    X = quotes[[col for col in quotes.columns if col != 'isQuote']]
    X = X[[col for col in X.columns if col != 'id']]
    X = X[[col for col in X.columns if col != 'customerId']]
    print('X', X.shape)
    # print(X.describe())
    return X, y
示例#11
0
def read_project_config(project_path):
	config_path = project_path + "/project-config.luna2d"

	if os.path.exists(config_path):
		return utils.load_json(config_path)
	else:
		return {}
    def __init__(self, input_movies, file_path, count=5):
        # Call parent constructor function
        super(Recommendation, self).__init__()

        self.movies_data = load_json(file_path)
        self.input_movies = input_movies
        self.show_count = count
示例#13
0
    def _request(self, url, data=None, method=None):
        """Send an HTTP request to the remote server.

        Args:
          method - A string for the HTTP method to send the request with.
          url - The URL to send the request to.
          body - The message body to send.

        Returns:
          A dictionary with the server's parsed JSON response.
        """
        LOGGER.debug('%s %s %s' % (method, url, data))

        parsed_url = urlparse.urlparse(url)
        auth = None
        password_manager = None
        if parsed_url.username:
            netloc = parsed_url.hostname
            if parsed_url.port:
                netloc += ":%s" % parsed_url.port
            cleaned_url = urlparse.urlunparse((parsed_url.scheme, netloc, parsed_url.path,
                parsed_url.params, parsed_url.query, parsed_url.fragment))
            password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
            password_manager.add_password(None, "%s://%s" % (parsed_url.scheme, netloc), parsed_url.username, parsed_url.password)
            request = Request(cleaned_url, data=data, method=method)
        else:
            request = Request(url, data=data, method=method)


        request.add_header('Accept', 'application/json')

        if password_manager:
            opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(),
                                          HttpErrorHandler(),
                                          urllib2.HTTPBasicAuthHandler(password_manager))
        else:
            opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(),
                                          HttpErrorHandler())
        response = opener.open(request)
        try:
            if response.code > 399 and response.code < 500:
                return {'status': response.code, 'value': response.read()}
            body = response.read().replace('\x00', '').strip()
            content_type = response.info().getheader('Content-Type') or []
            if 'application/json' in content_type:
                data = utils.load_json(body.strip())
                assert type(data) is dict, (
                    'Invalid server response body: %s' % body)
                assert 'status' in data, (
                    'Invalid server response; no status: %s' % body)
                # Some of the drivers incorrectly return a response
                # with no 'value' field when they should return null.
                if 'value' not in data:
                    data['value'] = None
                return data
            elif 'image/png' in content_type:
                data = {'status': 0, 'value': body.strip()}
                return data
        finally:
            response.close()
示例#14
0
 def salt_ssh(self, target, cmd):
     roster = self['container']['config']['salt_config']['roster']
     target_id = target['config']['name']
     SSH = "salt-ssh -l quiet -i --out json --key-deploy --passwd {0} {1} {{0}}".format(
         target['ssh_config']['password'], target_id)
     data = self['container'].run(SSH.format(cmd))
     return load_json(data)[target_id]
示例#15
0
 def run(self):
     while True:
         comando=None
         if os.path.exists(ARCHIVO_COMANDO) and existe_archivo_bloqueo(ORDEN):
             archivito=open(ARCHIVO_COMANDO)
             comando=archivito.read()
             archivito.close()
             os.remove(ARCHIVO_COMANDO)
             if comando:
                 comando=comando.strip()
                 print(comando)
                 window=sublime.active_window()
                 view=window.active_view()
                 if comando.find("side_bar")!=-1:
                     window.run_command(comando)
                 elif comando.startswith("code_"):
                     rutaJson=sublime.packages_path()+os.sep+"snippets"+os.sep+utils.get_language()+".json"
                     if os.path.exists(rutaJson):
                         comando=comando.replace("code_", "")
                         d=utils.load_json(rutaJson)
                         if d.get(comando) :view.run_command('insert_snippet', {"contents":utils.agregarCursores(d[comando])})
                 elif comando.startswith("make_"):
                     window=sublime.active_window()
                     view=window.active_view()
                     comando=comando.replace("make_", "")   
                     view.run_command("load_template", {"nombre":comando})
                 else:
                     view.run_command(comando)
         time.sleep(1)
def find_all_chars(verbose=False):

    char_count = load_json(CHAR_COUNT, {})
    if not char_count:
        train, test, _ = load_data()
        char_count = defaultdict(int)
        S = 0
        for df in (test, train):
            for sentence in df_to_sentences(df):
                S += 1
                for c in sentence:
                    assert len(c) == 1, (c, sentence)
                    char_count[c] += 1
        char_count = {c: n for c, n in char_count.items()}
        save_json(CHAR_COUNT, char_count)
        print('S=%d' % S)

    chars = sorted(char_count, key=lambda c: (-char_count[c], c))
    N = sum(char_count.values())
    print('find_all_chars: %d %r' % (len(chars), ''.join(chars[:100])))
    print('N=%d=%.3fM' % (N, N * 1e-6))

    if verbose:
        tot = 0.0
        for i, c in enumerate(chars[:200]):
            n = char_count[c]
            r = n / N
            tot += r
            print('%4d: %8d %.4f %.3f %4d=%2r' % (i, n, r, tot, ord(c), c))

    return char_count
示例#17
0
    def test_get_resources(self):
        resources = core.get_resources(utils.EXPECTED_PROJECT,
                                       utils.EXPECTED_REPOSITORY,
                                       utils.EXPECTED_BRANCH)
        assert resources == utils.load_json('tree.json')

        resources = core.get_resources(utils.EXPECTED_PROJECT,
                                       utils.EXPECTED_REPOSITORY,
                                       utils.EXPECTED_REV)
        assert resources == utils.load_json('tree_rev.json')

        resources = core.get_resources(utils.EXPECTED_PROJECT,
                                       utils.EXPECTED_REPOSITORY,
                                       utils.EXPECTED_BRANCH,
                                       limit=utils.EXPECTED_LIMIT)
        assert resources == utils.load_json('tree_limit.json')
示例#18
0
def update_libs(args, luna2d_path, config):
	libs_source_dir = luna2d_path + "/lib/" + args.platform + "/release/"
	libs_dest_dir = args.project_path + "/.luna2d/libs"

	# Copy luna2d libs
	shutil.rmtree(libs_dest_dir, ignore_errors=True)
	shutil.copytree(libs_source_dir, libs_dest_dir)

	# Copy sdkmodules libs
	if "sdkmodules" in config:
		for module_name in config["sdkmodules"]:
			module_path = find_sdk_module(module_name, args, luna2d_path)

			if module_path is None:
				print("SDK module \"" + module_name + "\" not found")
				continue

			module_config_path = module_path + "sdkmodule.luna2d"
			if not os.path.exists(module_config_path):
				print("Config for SDK module \"" + module_name + "\" not found")
				continue

			module_config = utils.load_json(module_config_path)

			for module_file in module_config["files"]:
				src_path = module_path + "/" + module_file
				dest_path = libs_dest_dir + "/" + module_name + "-" + module_file
				shutil.copyfile(src_path, dest_path)

			if args.platform == "android":
				update_android.apply_sdk_module(args, module_name, config, module_config)
示例#19
0
文件: __init__.py 项目: toxu/NLP
def run(args):
    settings = parse_settings(args.setting)
    try:
        chn_trained_file = os.path.join(ENV.data_dir, "%s.json" % settings['chn'])
        eng_trained_file = os.path.join(ENV.data_dir, "%s.json" % settings['eng'])
        test_data = os.path.join(ENV.data_dir, settings['input'])
    except Exception as e:
        print e
    if os.path.isfile(chn_trained_file) and os.path.isfile(eng_trained_file):
        chn_freq = utils.load_json(chn_trained_file)
        eng_freq = utils.load_json(eng_trained_file)
    else:
        chn_freq, eng_freq = train(args)
    
    classifier = naive_baysian_classifier.NaiveBaysianClassifier(chn_freq, eng_freq)
    worker.start_classify(test_data, classifier)
示例#20
0
def cambiar_version(cambio=1, mostrar=False):
    diff=utils.load_json(DIFF_JSON)
    window=sublime.active_window()
    view=window.active_view()
    filename=view.file_name()
    folder=get_folder(filename)
    actual=diff[view.file_name()]
    viejo=actual
    lista=os.listdir(folder)
    lista=sorted(lista)
    i=lista.index(actual)
    i+=cambio
    if i<0 or i==len(lista):return
    actual=lista[i]
    diff[filename]=actual
    utils.save_json(DIFF_JSON, diff)
    lines=view.lines(sublime.Region(0, view.size()))
    folder=get_folder(filename)
#    self.view.add_regions("diferentes", self.lista, "comment", "bookmark", sublime.DRAW_OUTLINED)
    if not mostrar:utils.set_text(open(get_folder(filename)+os.sep+actual).read())

    print("\n")
    with open(folder+os.sep+actual, 'r') as one:
        with open(folder+os.sep+viejo, 'r') as two:
            diff = difflib.unified_diff(one.readlines(),two.readlines())
            for line in diff:
                line=line.strip()
                if line.startswith("@@ -"):
#                    line=line[4, line.find(",")]
#                    print(line)
                    utils.go_line(int(line[4:line.find(",")])+3)
                if line.startswith("-") or line.startswith("+") or line.startswith("@@"):
                    print(line.strip()+":")
    print("\n")
示例#21
0
 def on_query_completions(self, view, prefix, locations):
     lang=utils.get_language()
     if lang != "go":return
     ultimo=utils.get_last_character()
     if ultimo != ".":return
     d=utils.load_json(GO_MAIN_MODULE)
     word=utils.get_word(-1)
     if d.get(word):
         return utils.get_completion_list(d[word])
 def __init__(self):
     os.makedirs(SPACY_DIR, exist_ok=True)
     self.text_tokens_path = os.path.join(SPACY_DIR, 'text.tokens.json')
     self.token_vector_path = os.path.join(SPACY_DIR, 'token.vector.pkl')
     self.text_tokens = load_json(self.text_tokens_path, {})
     self.token_vector = load_pickle(self.token_vector_path, {})
     self.text_tokens_len = len(self.text_tokens)
     self.token_vector_len = len(self.token_vector)
     self.nlp = spacy.load('en_core_web_lg')
     self.n_calls = 0
示例#23
0
 def run(self, edit):
     lang=utils.get_language()
     self.rutaSamples=os.path.normpath(os.path.join(sublime.packages_path(), "..", "samples", lang+".json"))
     print("la ruta es : "+self.rutaSamples)
     if not os.path.exists(self.rutaSamples):
         return
     self.samples=utils.load_json(self.rutaSamples)
     self.keys=list(self.samples.keys())
     window=sublime.active_window()
     window.show_quick_panel(self.keys,self.load)
示例#24
0
def get_metrics(start, end, author = None, config = None):

    # GitHub search API uses dates, not hours
    if len(start) == 19:
        start = start[:-9]
        end = end[:-9]

    metrics = {}

    if config is None:
        config = get_config(os.environ['HOME'] + '/.qe-metrics/github.conf')

    user = config['username']

    if not author:
        author = user

    ##### NEW issues opened during period
    url = 'https://api.github.com/search/issues?q=author:%s+created:%s..%s' % \
            (author, start, end)
    result = load_json(url)

    prs = 0 # pull requests
    for item in result['items']:
        if item.has_key('pull_request'):
            prs += 1
    metrics['issues'] = int(result['total_count']) - prs
    metrics['pull_requests'] = prs

    #### get all events in the last 90 days and filter out commits
    result = load_json('https://api.github.com/users/%s/events/public' % author)
    commits = 0
    start_dt = datetime.strptime(start, '%Y-%m-%d')
    end_dt = datetime.strptime(end + ' 23:59:59', '%Y-%m-%d %H:%M:%S')

    for event in result:
        if event['type'] == "PushEvent":
            created_at = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ')
            if start_dt <= created_at <= end_dt:
                commits += event['payload']['size']
    metrics['commits'] = commits

    return metrics
def load_enumerations():
    enumerations = load_json('enumerations.json')
    idx_to_key = {col: {i: k for i, k in enumerate(enumerations[col])} for col in enumerations}
    key_to_idx = {col: {k: i for i, k in enumerate(enumerations[col])} for col in enumerations}

    # for col in enumerations:
    #     idx_to_key[col] = {i: k for i, k in enumerate(enumerations[col])}
    #     key_to_idx[col] = {k: i for i, k in enumerate(enumerations[col])}

    return idx_to_key, key_to_idx
 def run(self, edit):
     paquete_snippets=sublime.packages_path()+os.sep+"snippets"
     lista=[]
     for archivo in utils.get_files({"folder":paquete_snippets, "ext":"json"}):
         snip=utils.load_json(archivo)
         lista=lista + list(snip.keys())
     lista=list(set(lista))
     for snippet in lista:
         snippet=snippet.lower().replace("-", "_").replace(" ", "").replace("?", "_")
         utils.file_write(RUTA_COMANDOS+"code_"+snippet+".bat", "echo code_"+snippet+" > d:/sublime3/comando.txt")
         print(snippet)
示例#27
0
 def __init__(self, task):
     self.ckpt_path = './ckpt/{}/'.format(task)
     if not os.path.exists(self.ckpt_path):
         os.makedirs(self.ckpt_path)
     source_dir = os.path.join('.', 'dataset', 'data', task)
     self.word_vocab, _ = load_vocab(os.path.join(source_dir, 'words.vocab'))
     self.char_vocab, _ = load_vocab(os.path.join(source_dir, 'chars.vocab'))
     self.vocab_size = len(self.word_vocab)
     self.char_vocab_size = len(self.char_vocab)
     self.label_size = load_json(os.path.join(source_dir, 'label.json'))["label_size"]
     self.word_emb = load_embeddings(os.path.join(source_dir, 'glove.filtered.npz'))
示例#28
0
def main(args):
	luna2d_path = utils.get_luna2d_path()
	config = utils.load_json(args.game_path + "/config.luna2d")
	project_config = read_project_config(args.project_path)
	build_config = utils.load_json(args.project_path + "/.luna2d/build.luna2d")

	print("Updating config...")
	merge_configs(config, project_config)

	print("Updating libraries...")
	update_libs(args, luna2d_path, config)

	print("Updating project..")
	if args.platform == "wp":
		update_wp.do_update(args, config, build_config["projectName"])

	if args.skip_assets == "false":
		update_assets(args, luna2d_path, config)

	print("Done")
示例#29
0
def main():
    data_folder = os.path.join('.', 'dataset', 'data')
    # set tasks
    source_dir = os.path.join(data_folder, task)
    # create config
    config = Config(task)
    # load datasets
    trainset = load_json(os.path.join(source_dir, 'train.json'))
    devset = load_json(os.path.join(source_dir, 'dev.json'))
    testset = load_json(os.path.join(source_dir, 'test.json'))
    # build model
    model = DenseConnectBiLSTM(config, resume_training=resume_training)
    # training
    batch_size = 200
    epochs = 30
    if has_devset:
        model.train(trainset, devset, testset, batch_size=batch_size, epochs=epochs, shuffle=True)
    else:
        trainset = trainset + devset
        model.train(trainset, None, testset, batch_size=batch_size, epochs=epochs, shuffle=True)
示例#30
0
def run(function_path, experiment_config_file, job_config_file, force):

    job_dir = os.path.dirname(job_config_file)
    experiment_config = load_json(os.path.join(experiment_config_file))
    state = load_json(job_config_file)

    with ChangeDir(job_dir):

        experiment = experiment_scheduler.save_experiment(
            name=experiment_config["experiment_name"],
            table_name=experiment_config["experiment_name"] + "_jobs",
            clusters=experiment_config["clusters"],
            duree="", mem="", env="", gpu="")

        table_name = experiment["table"]

        channel = Channel()

        state["jobman"] = dict(status=channel.START)
        state_to_hash = copy.copy(state)
        jobs = job_scheduler.load_jobs(table_name, hash_of=state_to_hash)
        state_to_hash["jobman"] = dict(status=channel.RUNNING)
        jobs += job_scheduler.load_jobs(table_name, hash_of=state_to_hash)
        if len(jobs) > 0:
            logger.warning("Job already registered, loading from database")
            state = jobs[0]

        if state["jobman"]["status"] != channel.START:
            if not force:
                raise RuntimeError("Job (%d) is not available" % state["id"])

            logging.warning("Job (%d) is not available. Forcing it to run" %
                            state["id"])

        state["jobman"]["status"] = channel.RUNNING

        state = job_scheduler.save_job(table_name, state)

        resolve(function_path_re.sub('', function_path)).jobman_main(state, channel)

        job_scheduler.save_job(experiment["table"], state)
示例#31
0
import pytest
import pandas as pd
import numpy as np
from data import SF1Data
from utils import load_json
config = load_json('config.json')


class TestSF1Data:
    def test_load_base_data(self):
        data_loader = SF1Data(config['sf1_data_path'])
        df = data_loader.load_base_data()
        assert type(df) == pd.DataFrame
        assert len(df) > 0
        assert 'ticker' in df.columns
        assert df['ticker'].isnull().max() == False

    @pytest.mark.parametrize(
        ["tickers", "quarter_count", "dimension"],
        [(['AAPL', 'ZRAN', 'TSLA', 'WORK'], 10, 'ARQ'),
         (['INTC', 'ZRAN', 'XRDC', 'XOM'], 5, 'ARQ'),
         (['INTC', 'ZRAN', 'XRDC', 'XOM'], 5, 'MRY'), (['NVDA'], 10, 'ARQ'),
         (['ZRAN'], 10, 'ARQ')],
    )
    def test_load_quarterly_data(self, tickers, quarter_count, dimension):
        data_loader = SF1Data(config['sf1_data_path'])
        quarterly_df = data_loader.load_quarterly_data(tickers, quarter_count,
                                                       dimension)

        assert type(quarterly_df) == pd.DataFrame
        assert 'ticker' in quarterly_df.columns
def export(brand):
    workbook = xlsxwriter.Workbook(
        'output/excel/' + brand.replace('/', '#') + '.xlsx',
        {'strings_to_urls': False})
    worksheet = workbook.add_worksheet('jsfilter')

    worksheet.write('A1', 'Brand')
    worksheet.write('B1', 'Class')
    worksheet.write('C1', 'Model')
    worksheet.write('D1', 'Year')
    worksheet.write('E1', 'Engine Vol')
    worksheet.write('F1', 'Engine No')
    worksheet.write('G1', 'Body No')
    worksheet.write('H1', 'Filter Type')
    worksheet.write('I1', 'Product Name')
    worksheet.write('J1', 'Product Code')
    worksheet.write('K1', 'Specifications')
    worksheet.write('L1', 'Cross Reference')
    worksheet.write('M1', 'Applications')
    worksheet.write('N1', 'Image URL')
    worksheet.write('O1', 'Product URL')

    worksheet.set_column('A:H', 15)
    worksheet.set_column('I:I', 30)
    worksheet.set_column('J:J', 15)
    worksheet.set_column('K:O', 30)

    row, col = 1, 0

    with os.scandir('output/data') as it:
        for file_entry in it:
            if file_entry.name.endswith('.json'):
                if file_entry.name.split('_')[0] != brand:
                    continue
                data_file = load_json(file_entry.path)

                for item in data_file:

                    category_list = ['oil', 'air', 'fuel', 'cabin', 'trans']
                    for category_item in category_list:
                        for product_item in item[category_item]:
                            details = get_product_details(
                                category_item, product_item['code'])

                            worksheet.write(row, col, item['brand'])
                            worksheet.write(row, col + 1, item['class'])
                            worksheet.write(row, col + 2, item['model'])
                            worksheet.write(row, col + 3, item['year'])
                            worksheet.write(row, col + 4, item['engine_vol'])
                            worksheet.write(row, col + 5, item['engine_no'])
                            worksheet.write(row, col + 6, item['body_no'])
                            worksheet.write(row, col + 7,
                                            category_item.upper())

                            worksheet.write(row, col + 9, product_item['code'])

                            if details is not None:
                                worksheet.write(row, col + 8, details['name'])
                                worksheet.write(
                                    row, col + 10,
                                    flatten_list(details['specifications']))
                                worksheet.write(
                                    row, col + 11,
                                    flatten_list(details['cross_reference']))
                                worksheet.write(
                                    row, col + 12,
                                    flatten_list(details['applications']))
                                worksheet.write(row, col + 13,
                                                details['image_url'])
                                worksheet.write(row, col + 14, details['url'])

                            print('\r', brand, row, end='')
                            row += 1

    workbook.close()
    # fileName = 'hive_tb_day_lp_30day_bfor_data_2020_210201.csv',
    # fileName = 'hive_tb_day_lp_30day_bfor_data_2020.csv',
    period = '2020-01-01:2020-12-31', # 좌측, 우측 모두 포함
    # busiDiv = ['산업'],
    # category = ['금속', '병원'], 
    # enteCode = ['055344', '000014'] # 하위기준이 우선순위가 높음
    )


df = viewHeader(myDir=r'D:\데이터 분석\data\postGres', 
            fileName = 'post_tb_asos_mart_hour_20200101_20201231.csv',
            reload = True, pickleWrite=False, nrows=2)
    
    

cust_no_lst_api_df = utils.load_json(path = 'D:\데이터 분석\data',
                fileName = 'custLst_210216.txt')

importlib.reload(utils)
cust_no_lst_hive_df = utils.load_cust_no_list_hive(
    path = 'D:\데이터 분석\data',
    fileName = 'hive_tb_cust_no_list_20201201_20210217.csv',
    date = '20210216')



cust_no_lst_hive_set = set(cust_no_lst_hive_df.cust_no)

cust_no_lst_api_set = set(cust_no_lst_api_df.custNo)

print(cust_no_lst_hive_set  - cust_no_lst_api_set )
print(cust_no_lst_api_set  - cust_no_lst_hive_set )
async def get_movies_posters(kp: KP, assets_folder: str):
    movies = load_json('parser/movies.json')
    print("Movies started")
    await asyncio.gather(*_get_tasks(movies, kp.get_film_photo, assets_folder=assets_folder))
    print("Movies Done!")
    def test_opus_mt_distill_script(self):
        data_dir = f"{self.test_file_dir_str}/test_data/wmt_en_ro"
        env_vars_to_replace = {
            "--fp16_opt_level=O1": "",
            "$MAX_LEN": 128,
            "$BS": 16,
            "$GAS": 1,
            "$ENRO_DIR": data_dir,
            "$m": "sshleifer/student_marian_en_ro_6_1",
            "val_check_interval=0.25": "val_check_interval=1.0",
        }

        # Clean up bash script
        bash_script = ((self.test_file_dir / "distil_marian_no_teacher.sh"
                        ).open().read().split("distillation.py")[1].strip())
        bash_script = bash_script.replace("\\\n",
                                          "").strip().replace('"$@"', "")
        bash_script = bash_script.replace("--fp16 ", " ")

        for k, v in env_vars_to_replace.items():
            bash_script = bash_script.replace(k, str(v))
        output_dir = self.get_auto_remove_tmp_dir()
        bash_script = bash_script.replace("--fp16", "")
        epochs = 6
        testargs = (["distillation.py"] + bash_script.split() + [
            f"--output_dir={output_dir}",
            "--gpus=1",
            "--learning_rate=1e-3",
            f"--num_train_epochs={epochs}",
            "--warmup_steps=10",
            "--val_check_interval=1.0",
            "--do_predict",
        ])
        with patch.object(sys, "argv", testargs):
            parser = argparse.ArgumentParser()
            parser = pl.Trainer.add_argparse_args(parser)
            parser = SummarizationDistiller.add_model_specific_args(
                parser, os.getcwd())
            args = parser.parse_args()
            # assert args.gpus == gpus THIS BREAKS for multi_gpu

            model = distill_main(args)

        # Check metrics
        metrics = load_json(model.metrics_save_path)
        first_step_stats = metrics["val"][0]
        last_step_stats = metrics["val"][-1]
        assert len(
            metrics["val"]) >= (args.max_epochs / args.val_check_interval
                                )  # +1 accounts for val_sanity_check

        assert last_step_stats["val_avg_gen_time"] >= 0.01

        assert first_step_stats["val_avg_bleu"] < last_step_stats[
            "val_avg_bleu"]  # model learned nothing
        assert 1.0 >= last_step_stats[
            "val_avg_gen_time"]  # model hanging on generate. Maybe bad config was saved.
        assert isinstance(last_step_stats[f"val_avg_{model.val_metric}"],
                          float)

        # check lightning ckpt can be loaded and has a reasonable statedict
        contents = os.listdir(output_dir)
        ckpt_path = [x for x in contents if x.endswith(".ckpt")][0]
        full_path = os.path.join(args.output_dir, ckpt_path)
        ckpt = torch.load(full_path, map_location="cpu")
        expected_key = "model.model.decoder.layers.0.encoder_attn_layer_norm.weight"
        assert expected_key in ckpt["state_dict"]
        assert ckpt["state_dict"][
            "model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32

        # TODO: turn on args.do_predict when PL bug fixed.
        if args.do_predict:
            contents = {os.path.basename(p) for p in contents}
            assert "align_wnums_test_generations.txt" in contents
            assert "test_results.txt" in contents
            # assert len(metrics["val"]) ==  desired_n_evals
            assert len(metrics["test"]) == 1
    parser.add_argument('--action',  type=str,
                        default=None, choices=['create_dict', 'plot', 'acc_by_type_plot', 'magnum_opus'],
                        help='create_qtype_dict or plot')
    parser.add_argument('--lanecheck_path',  type=str,
                        default=None,
                        help='Where to get the lanecheck dictionary from')
    parser.add_argument('--model',  type=str,
                        default=None,
                        help='Model name')
    args = parser.parse_args()
    if not args.action:
        sys.exit()

    # If you want to create a dictinary
    if args.action == 'create_dict':    
        train_dset = load_json(os.path.expanduser("~/kable_management/data/tvqa/tvqa_train_processed.json"))
        val_dset = load_json(os.path.expanduser("~/kable_management/data/tvqa/tvqa_val_processed.json"))
        test_dset = load_json(os.path.expanduser("~/kable_management/data/tvqa/tvqa_test_public_processed.json"))
        total_dset = []
        total_dset += train_dset
        total_dset += val_dset
        total_dset += test_dset

        create_q_type_dict(train_dset, "train")
        create_q_type_dict(val_dset, "val")
        create_q_type_dict(test_dset, "test")
        create_q_type_dict(total_dset, 'total')
    
    # Plot the dictionary of question types
    if args.action == 'plot':
        train_qtype_dict = load_pickle(os.path.expanduser("~/kable_management/data/tvqa/q_type/train_q_type_dict.pickle"))
示例#37
0
        logging.info("data upload succeeded")
    else:
        logging.error("failed to upload data")


def upload_data(couch, data, db_name):
    db = get_db(couch, db_name)
    bulk_upload_data(db_name, data)


def populate_couch(data, info):
    try:
        couch = couchdb.Server(os.environ["COUCH_CONNECTION_URL"])

        delete_db(couch, "airports")
        delete_db(couch, "flights")

        logging.info("starting data upload")
        upload_data(couch, data, "flights")
        upload_data(couch, info, "airports")
        logging.info("data upload complete")
    except Exception as e:
        logging.error(e)


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
    flights = utils.load_json("flight_data.json")
    airports = utils.load_json("airports.json")
    populate_couch(flights, airports)
示例#38
0
def extract_sentences(max_processed=-1):

    path_raw = load_json('summary_raw.json')
    path_raw = {fix(k): fix(v) for k, v in path_raw.items()}
    file_url_path = join(root, 'file_url.json')
    raw_url = load_json(file_url_path)
    raw_url = {fix(k): v for k, v in raw_url.items()}

    print('+' * 80)
    print('path_raw', len(path_raw))
    for i, k in enumerate(sorted(path_raw)[:5]):
        print('%d: %s %s %s %s' % (i, k, exists(k), path_raw[k], exists(path_raw[k])))
    print('#' * 80)
    print('raw_url', len(raw_url))
    for i, k in enumerate(sorted(raw_url)[:5]):
        print('%d: %s %s' % (i, k, exists(k)))
    # assert False

    path_url = {path: raw_url[raw] for path, raw in path_raw.items() if raw in raw_url}

    files = glob(join(summaries_dir, '*.json'))
    print('%4d files' % len(files))
    print('%4d path_url' % len(path_url))

    para_count = defaultdict(int)
    sent_count = defaultdict(int)
    para_url = {}
    sent_url = {}
    for path in files:
        path = fix(abspath(path))
        # if not exists(path):
        #     print('^^ %s does not exist' % path)
        #     continue
        summary = load_json(path)

        for para in summary['text:paras']:
            para_count[para] += 1
            if len(para) < 30:
                continue
            if para not in para_url:
                if path not in path_url:
                    continue
                assert path in path_url, path
                para_url[para] = path_url.get(path, "UNKNOWN")
        # for para2 in summary['text:sents']:
        #     for sent in para2:
        #         sent_count[sent] += 1
        #         # if sent not in sent_url:
        #         #     sent_url[sent] = path_url[name]

    print('%d paragraphs %d unique' % (sum(para_count.values()), len(para_count)))
    # print('%d sentences %d unique' % (sum(sent_count.values()), len(sent_count)))

    def sent_key(sent):
        return -len(sent), sent_count[sent], sent

    paras = sorted(para_count, key=sent_key)
    # sents = sorted(sent_count, key=sent_key)

    paras = [{'text': text, 'meta': {'url': para_url[text]}} for text in paras if text in para_url]
    # sents = [{'text': text, 'meta': {'url': sent_url[text]}} for text in sents]

    # paras = [{'text': text} for text in paras]
    # sents = [{'text': text} for text in sents]

    # "meta":{"source":"GitHub","url":"https://github.com/rdbc-io/rdbc/issues/86"}}
    # {"text":"Uber\u2019s Lesson: Silicon Valley\u2019s Start-Up Machine Needs Fixing","meta":{"source":"The New York Times"}}

    save_jsonl('blog.paragraphs.jsonl', paras)
        if book:
            time = second_to_hour(sum(duration))
            file_count = len(duration)
            total_count += file_count

            if print_detail:
                print(" [*] Duration of {}: {} (file #: {})". \
                        format(book, time, file_count))

    print(" [*] Total Duration : {} (file #: {})". \
            format(second_to_hour(duration_all), total_count))
    print()
    return duration_all


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--audio-pattern',
                        default=None)  # datasets/krbook/audio/*.wav
    parser.add_argument('--data-path',
                        default=None)  # datasets/jtbc/alignment.json
    config, unparsed = parser.parse_known_args()

    if config.audio_pattern is not None:
        duration = get_durations(get_paths_by_pattern(config.data_dir))
    elif config.data_path is not None:
        paths = load_json(config.data_path, encoding="utf8").keys()
        duration = get_durations(paths)
#         model_config_list.append(model_config)
# else:
#     print("predict all model")
#     model_config = {
#         "path": model_dir / f"all_model.pth",
#         "model_name": config["model"]["name"],
#         "n_class": len(utils.BIRD_CODE),
#         "in_chans": config["model"]["in_chans"],
#     }
#     model_config_list.append(model_config)

if args.th:
    print(f"override threshold with {args.th}")
    threshold = args.th
else:
    threshold = utils.load_json(model_dir / "threshold.json")
# test_df = pd.read_csv(utils.DATA_DIR / "test.csv")
test_audio_dir = utils.DATA_DIR / "test_audio"
if test_audio_dir.exists():
    test_df = pd.read_csv(utils.DATA_DIR / "test.csv")
else:
    check_dir = Path("/kaggle/input/birdcall-check/")
    test_audio_dir = check_dir / "test_audio"
    test_df = pd.read_csv(check_dir / "test.csv")

weights_path = Path(args.model_dir) / "all" / "checkpoints" / "best.pth"
# weights_path = Path(args.model_dir) / "fold0" / "checkpoints" / "best.pth"

prediction_df = prediction(
    test_df=test_df,
    test_audio=test_audio_dir,
def fetch_data():
    """
    If data exists, load data using json.
    Get values out of json dict.
    Call write_to_cassandra to write the values to Cassandra.
    """
    if consumer:
        for message in consumer:
            message = load_json(message.value)  #Convert to json
            venue = get_dict_val(message, 'venue')
            if venue:
                venue_name = get_dict_val(venue, 'venue_name')
                venue_lon = get_dict_val(venue, 'lon')
                venue_lat = get_dict_val(venue, 'lat')
                venue_id = get_dict_val(venue, 'venue_id')
            else:
                venue_name, venue_lon, venue_lat, venue_id = None, None, None, None
            visibility = get_dict_val(message, 'visibility')
            response = get_dict_val(message, 'response')
            guests = get_dict_val(message, 'guests')
            # Member who RSVP'd
            member = get_dict_val(message, 'member')
            if member:
                member_id = get_dict_val(member, 'member_id')
                member_name = get_dict_val(member, 'member_name')
            else:
                member_id, member_name = '', ''
            rsvp_id = get_dict_val(message, 'rsvp_id')
            #since epoch
            mtime = get_dict_val(message, 'mtime')
            if mtime:
                rsvp_last_modified_time = datetime_from_epoch(mtime)
            else:
                rsvp_last_modified_time = None
            # Event for the RSVP
            event = get_dict_val(message, 'event')
            if event:
                event_name = get_dict_val(event, 'event_name')
                time = get_dict_val(event, 'time')
                if time:
                    event_time = datetime_from_epoch(time)
                else:
                    event_time = None
                event_url = get_dict_val(event, 'event_url')
            else:
                event_name, event_id, event_time, event_url = '', '', '', ''
            # Group hosting the event
            group = get_dict_val(message, 'group')
            if group:
                group_topics = get_dict_val(group, 'group_topics')
                if group_topics:
                    group_topic_names = ','.join([
                        get_dict_val(each_group_topic, 'topic_name')
                        for each_group_topic in group_topics
                    ])
                else:
                    group_topic_names = ''
                group_city = get_dict_val(group, 'group_city')
                group_country = get_dict_val(group, 'group_country')
                group_id = get_dict_val(group, 'group_id')
                group_name = get_dict_val(group, 'group_name')
                group_lon = get_dict_val(group, 'group_lon')
                group_state = get_dict_val(group, 'group_state')
                group_lat = get_dict_val(group, 'group_lat')
            else:
                group_topic_names, group_city, group_country, group_id, \
                group_name, group_lon, group_state, group_lat = \
                    '', '', '', '', '', '', '', ''

            # Write data to Cassandra database
            write_to_cassandra(venue_name = venue_name, venue_lon = venue_lon, \
                               venue_lat = venue_lat, venue_id = venue_id, visibility = visibility, \
                               response = response, guests = guests, member_id = member_id, \
                               member_name = member_name, rsvp_id = rsvp_id, \
                               rsvp_last_modified_time  = rsvp_last_modified_time, \
                               event_name = event_name, event_time = event_time, event_url = event_url, \
                               group_topic_names = group_topic_names, group_country = group_country, \
                               group_state = group_state, group_city = group_city, group_name = group_name, \
                               group_lon = group_lon, group_lat = group_lat, group_id = group_id
                               )

            result = u' '.join(
                (str(group_topic_names), str(group_city), str(group_country),
                 str(group_id), str(group_name), str(group_lon),
                 str(group_state), str(group_lat))).encode('utf-8').strip()
            print(result)
示例#42
0
def run_bgpstream(
    prefixes_file=None,
    kafka_host=None,
    kafka_port=None,
    kafka_topic="openbmp.bmp_raw",
    start=0,
    end=0,
):
    """
    Retrieve all records related to a list of prefixes
    https://bgpstream.caida.org/docs/api/pybgpstream/_pybgpstream.html

    :param prefixes_file: <str> input prefix json
    :param kafka_host: <str> kafka host
    :param kafka_port: <int> kafka_port
    :param kafka_topic: <str> kafka topic
    :param start: <int> start timestamp in UNIX epochs
    :param end: <int> end timestamp in UNIX epochs (if 0 --> "live mode")

    :return: -
    """

    prefixes = load_json(prefixes_file)
    assert prefixes is not None

    # create a new bgpstream instance and a reusable bgprecord instance
    stream = _pybgpstream.BGPStream()

    # set kafka data interface
    stream.set_data_interface("kafka")

    # set host connection details
    stream.set_data_interface_option("kafka", "brokers",
                                     "{}:{}".format(kafka_host, kafka_port))

    # set topic
    stream.set_data_interface_option("kafka", "topic", kafka_topic)

    # filter prefixes
    for prefix in prefixes:
        stream.add_filter("prefix", prefix)

    # filter record type
    stream.add_filter("record-type", "updates")

    # filter based on timing (if end=0 --> live mode)
    stream.add_interval_filter(start, end)

    # set live mode
    stream.set_live_mode()

    # start the stream
    stream.start()

    with Connection(RABBITMQ_URI) as connection:
        exchange = Exchange("bgp-update",
                            channel=connection,
                            type="direct",
                            durable=False)
        exchange.declare()
        producer = Producer(connection)
        validator = mformat_validator()
        while True:
            # get next record
            try:
                rec = stream.get_next_record()
            except BaseException:
                continue
            if (rec.status != "valid") or (rec.type != "update"):
                continue

            # get next element
            try:
                elem = rec.get_next_elem()
            except BaseException:
                continue

            while elem:
                if elem.type in {"A", "W"}:
                    redis.set(
                        "bgpstreamkafka_seen_bgp_update",
                        "1",
                        ex=int(
                            os.getenv(
                                "MON_TIMEOUT_LAST_BGP_UPDATE",
                                DEFAULT_MON_TIMEOUT_LAST_BGP_UPDATE,
                            )),
                    )
                    this_prefix = str(elem.fields["prefix"])
                    service = "bgpstreamkafka|{}".format(str(rec.collector))
                    type_ = elem.type
                    if type_ == "A":
                        as_path = elem.fields["as-path"].split(" ")
                        communities = [{
                            "asn": int(comm.split(":")[0]),
                            "value": int(comm.split(":")[1]),
                        } for comm in elem.fields["communities"]]
                    else:
                        as_path = []
                        communities = []
                    timestamp = float(rec.time)
                    peer_asn = elem.peer_asn

                    for prefix in prefixes:
                        base_ip, mask_length = this_prefix.split("/")
                        our_prefix = IPNetwork(prefix)
                        if (IPAddress(base_ip) in our_prefix
                                and int(mask_length) >= our_prefix.prefixlen):
                            msg = {
                                "type": type_,
                                "timestamp": timestamp,
                                "path": as_path,
                                "service": service,
                                "communities": communities,
                                "prefix": this_prefix,
                                "peer_asn": peer_asn,
                            }
                            if validator.validate(msg):
                                msgs = normalize_msg_path(msg)
                                for msg in msgs:
                                    key_generator(msg)
                                    log.debug(msg)
                                    producer.publish(
                                        msg,
                                        exchange=exchange,
                                        routing_key="update",
                                        serializer="ujson",
                                    )
                            else:
                                log.warning(
                                    "Invalid format message: {}".format(msg))
                            break
                try:
                    elem = rec.get_next_elem()
                except BaseException:
                    continue
示例#43
0
def parse_ripe_ris(connection, prefixes_file, hosts):
    exchange = Exchange("bgp-update",
                        channel=connection,
                        type="direct",
                        durable=False)
    exchange.declare()

    prefixes = load_json(prefixes_file)
    assert prefixes is not None
    prefix_tree = {"v4": pytricia.PyTricia(32), "v6": pytricia.PyTricia(128)}
    for prefix in prefixes:
        ip_version = get_ip_version(prefix)
        prefix_tree[ip_version].insert(prefix, "")

    ris_suffix = os.getenv("RIS_ID", "my_as")

    validator = mformat_validator()
    with Producer(connection) as producer:
        while True:
            try:
                events = requests.get(
                    "https://ris-live.ripe.net/v1/stream/?format=json&client=artemis-{}"
                    .format(ris_suffix),
                    stream=True,
                    timeout=10,
                )
                # http://docs.python-requests.org/en/latest/user/advanced/#streaming-requests
                iterator = events.iter_lines()
                next(iterator)
                for data in iterator:
                    try:
                        parsed = json.loads(data)
                        msg = parsed["data"]
                        if "type" in parsed and parsed["type"] == "ris_error":
                            log.error(msg)
                        # also check if ris host is in the configuration
                        elif ("type" in msg and msg["type"] == "UPDATE"
                              and (not hosts or msg["host"] in hosts)):
                            norm_ris_msgs = normalize_ripe_ris(
                                msg, prefix_tree)
                            for norm_ris_msg in norm_ris_msgs:
                                redis.set(
                                    "ris_seen_bgp_update",
                                    "1",
                                    ex=int(
                                        os.getenv(
                                            "MON_TIMEOUT_LAST_BGP_UPDATE",
                                            DEFAULT_MON_TIMEOUT_LAST_BGP_UPDATE,
                                        )),
                                )
                                if validator.validate(norm_ris_msg):
                                    norm_path_msgs = normalize_msg_path(
                                        norm_ris_msg)
                                    for norm_path_msg in norm_path_msgs:
                                        key_generator(norm_path_msg)
                                        log.debug(norm_path_msg)
                                        producer.publish(
                                            norm_path_msg,
                                            exchange=exchange,
                                            routing_key="update",
                                            serializer="ujson",
                                        )
                                else:
                                    log.warning(
                                        "Invalid format message: {}".format(
                                            msg))
                    except Exception:
                        log.exception("exception message {}".format(data))
                log.warning(
                    "Iterator ran out of data; the connection will be retried")
            except Exception:
                log.exception("server closed connection")
                time.sleep(60)
示例#44
0
 def test_branches(self):
     rv = self.app.get(get_path(['branches']))
     branches = json.loads(rv.data)
     branches['host'] = utils.EXPECTED_HOST
     assert branches == utils.load_json('branches.json')
示例#45
0
def deserialize_dynamodb_NewImages(new_images: List[Any]) -> List[Any]:
    new_images = [
        deserialize_dynamodb_NewImage(record) for record in new_images
    ]
    return new_images


def lambda_handler(event, context):
    # log
    print(event)

    # create Firehose client
    fh = Firehose()

    # process event
    events = event["Records"]
    events = filter_events(events)
    new_images = extract_NewImage(events)
    records_dynamodb = deserialize_dynamodb_NewImages(new_images)

    # transimit by record unit
    for rec in records_dynamodb:
        fh.put_item({"Data": json.dumps(rec)})
        print(rec)


if __name__ == "__main__":
    # test
    event = utils.load_json("./event.json")
    lambda_handler(event, {})
示例#46
0
 def test_commit(self):
     rv = self.app.get(get_path(['commit', utils.EXPECTED_REV]))
     assert json.loads(rv.data) == utils.load_json('commit.json')
示例#47
0
    def _request(self, url, data=None, method=None):
        """Send an HTTP request to the remote server.

        Args:
          method - A string for the HTTP method to send the request with.
          url - The URL to send the request to.
          body - The message body to send.

        Returns:
          A dictionary with the server's parsed JSON response.
        """
        LOGGER.debug('%s %s %s' % (method, url, data))

        parsed_url = urlparse.urlparse(url)
        auth = None
        password_manager = None
        if parsed_url.username:
            netloc = parsed_url.hostname
            if parsed_url.port:
                netloc += ":%s" % parsed_url.port
            cleaned_url = urlparse.urlunparse(
                (parsed_url.scheme, netloc, parsed_url.path, parsed_url.params,
                 parsed_url.query, parsed_url.fragment))
            password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
            password_manager.add_password(
                None, "%s://%s" % (parsed_url.scheme, netloc),
                parsed_url.username, parsed_url.password)
            request = Request(cleaned_url, data=data, method=method)
        else:
            request = Request(url, data=data, method=method)

        request.add_header('Accept', 'application/json')

        if password_manager:
            opener = urllib2.build_opener(
                urllib2.HTTPRedirectHandler(), HttpErrorHandler(),
                urllib2.HTTPBasicAuthHandler(password_manager))
        else:
            opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(),
                                          HttpErrorHandler())
        response = opener.open(request)
        try:
            if response.code > 399 and response.code < 500:
                return {'status': response.code, 'value': response.read()}
            body = response.read().replace('\x00', '').strip()
            content_type = response.info().getheader('Content-Type') or []
            if 'application/json' in content_type:
                data = utils.load_json(body.strip())
                assert type(data) is dict, (
                    'Invalid server response body: %s' % body)
                assert 'status' in data, (
                    'Invalid server response; no status: %s' % body)
                # Some of the drivers incorrectly return a response
                # with no 'value' field when they should return null.
                if 'value' not in data:
                    data['value'] = None
                return data
            elif 'image/png' in content_type:
                data = {'status': 0, 'value': body.strip()}
                return data
        finally:
            response.close()
示例#48
0
 def test_history(self):
     rv = self.app.get(get_path(['history']))
     history = json.loads(rv.data)
     for h in history:
         h['activities'] = []
     assert history == utils.load_json('history.json')
示例#49
0
from utils import load_json
from pprint import pprint

# batch0_uc_idxs = load_json('../exp/hope_cifar10_imb1_s0.4_r1.0_m50_Mar23_201207/batch0_uc_idxs.json')
epoch_total_uc_idxs = load_json(
    '../exp/hope_cifar10_imb1_s0.4_r1.0_m100_Mar27_102601/epoch_total_uc_idxs.json'
)

print('epoch\tlen(idxs)\tcommon')
for epoch in range(30, 30 + len(epoch_total_uc_idxs) - 1):
    cur_ucs = epoch_total_uc_idxs[str(epoch)]
    next_ucs = epoch_total_uc_idxs[str(epoch + 1)]
    print(
        f'{epoch}-{epoch + 1}:\t{len(cur_ucs)}-{len(next_ucs)}\t{len(set(cur_ucs) & set(next_ucs))}'
    )
    def test_train_mbart_cc25_enro_script(self):
        data_dir = "examples/seq2seq/test_data/wmt_en_ro"
        env_vars_to_replace = {
            "--fp16_opt_level=O1": "",
            "$MAX_LEN": 128,
            "$BS": 4,
            "$GAS": 1,
            "$ENRO_DIR": data_dir,
            "facebook/mbart-large-cc25": MODEL_NAME,
            # Download is 120MB in previous test.
            "val_check_interval=0.25": "val_check_interval=1.0",
        }

        # Clean up bash script
        bash_script = Path("examples/seq2seq/train_mbart_cc25_enro.sh").open(
        ).read().split("finetune.py")[1].strip()
        bash_script = bash_script.replace("\\\n",
                                          "").strip().replace('"$@"', "")
        for k, v in env_vars_to_replace.items():
            bash_script = bash_script.replace(k, str(v))
        output_dir = self.get_auto_remove_tmp_dir()

        bash_script = bash_script.replace("--fp16 ", "")
        testargs = (["finetune.py"] + bash_script.split() + [
            f"--output_dir={output_dir}",
            "--gpus=1",
            "--learning_rate=3e-1",
            "--warmup_steps=0",
            "--val_check_interval=1.0",
            "--tokenizer_name=facebook/mbart-large-en-ro",
        ])
        with patch.object(sys, "argv", testargs):
            parser = argparse.ArgumentParser()
            parser = pl.Trainer.add_argparse_args(parser)
            parser = SummarizationModule.add_model_specific_args(
                parser, os.getcwd())
            args = parser.parse_args()
            args.do_predict = False
            # assert args.gpus == gpus THIS BREAKS for multigpu
            model = main(args)

        # Check metrics
        metrics = load_json(model.metrics_save_path)
        first_step_stats = metrics["val"][0]
        last_step_stats = metrics["val"][-1]
        assert (len(
            metrics["val"]) == (args.max_epochs / args.val_check_interval) + 1
                )  # +1 accounts for val_sanity_check

        assert last_step_stats["val_avg_gen_time"] >= 0.01

        assert first_step_stats["val_avg_bleu"] < last_step_stats[
            "val_avg_bleu"]  # model learned nothing
        assert 1.0 >= last_step_stats[
            "val_avg_gen_time"]  # model hanging on generate. Maybe bad config was saved.
        assert isinstance(last_step_stats[f"val_avg_{model.val_metric}"],
                          float)

        # check lightning ckpt can be loaded and has a reasonable statedict
        contents = os.listdir(output_dir)
        ckpt_path = [x for x in contents if x.endswith(".ckpt")][0]
        full_path = os.path.join(args.output_dir, ckpt_path)
        ckpt = torch.load(full_path, map_location="cpu")
        expected_key = "model.model.decoder.layers.0.encoder_attn_layer_norm.weight"
        assert expected_key in ckpt["state_dict"]
        assert ckpt["state_dict"][
            "model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32

        # TODO: turn on args.do_predict when PL bug fixed.
        if args.do_predict:
            contents = {os.path.basename(p) for p in contents}
            assert "test_generations.txt" in contents
            assert "test_results.txt" in contents
            # assert len(metrics["val"]) ==  desired_n_evals
            assert len(metrics["test"]) == 1
示例#51
0
import os
import logging
import logging.config

import utils

# Directories
BASE_DIR = os.getcwd()  # project root
LOGS_DIR = os.path.join(BASE_DIR, 'logs')
EXPERIMENTS_DIR = os.path.join(BASE_DIR, 'experiments')

# Create dirs
utils.create_dirs(LOGS_DIR)
utils.create_dirs(EXPERIMENTS_DIR)

# Loggers
log_config = utils.load_json(filepath=os.path.join(BASE_DIR, 'logging.json'))
logging.config.dictConfig(log_config)
logger = logging.getLogger('logger')
示例#52
0
文件: game.py 项目: LyricLy/Esobot
 def __init__(self, bot):
     self.bot = bot
     self.words = None
     self.ideas = load_json(IDEA_SAVES)
示例#53
0
    def __init__(self, args, self_adv=False, cv_oc=[]):
        #seed_everything(args.seed)
        self.args = args
        self.eps = 1e-10
        self.read_data()
        self.lab2idx = load_json("../datasets/ztf/lab2idx.json")
        self.family = load_json("../datasets/ztf/family.json")

        assert isinstance(cv_oc, list), "cv_oc not a list"

        # remove outliers from train and val data only if there is some oc
        if cv_oc:
            for key in self.family:
                flab = self.family[key]
                if flab == cv_oc[0]:
                    try:
                        print(key, flab, self.lab2idx[key])
                        self.x_train, self.y_train = remove_data_from_selected_class(
                            self.x_train, self.y_train, self.lab2idx[key])
                        self.x_val, self.y_val = remove_data_from_selected_class(
                            self.x_val, self.y_val, self.lab2idx[key])
                    except:
                        print(key, flab, "not found")

        # add transformations
        if self_adv:
            self.x_train, self.y_train = process_self_adversarial(
                self.x_train, self.y_train, args)
            self.x_val, self.y_val = process_self_adversarial(
                self.x_val, self.y_val, args)

        # magnitude normalization
        self.x_train, self.mean_train, self.std_train = normalize_light_curves(
            self.x_train, minmax=False)
        self.x_val, self.mean_val, self.std_val = normalize_light_curves(
            self.x_val, minmax=False)
        self.x_test, self.mean_test, self.std_test = normalize_light_curves(
            self.x_test, minmax=False)

        # time normalization
        self.x_train = time_norm(self.x_train, log=True)
        self.x_test = time_norm(self.x_test, log=True)
        self.x_val = time_norm(self.x_val, log=True)

        self.average_precision = 0
        if cv_oc:
            for key in self.family:
                flab = self.family[key]
                if flab == cv_oc[0]:
                    try:
                        print(key, flab, self.lab2idx[key],
                              (self.y_test == self.lab2idx[key]).sum())
                        self.average_precision += (
                            self.y_test == self.lab2idx[key]).sum()
                    except:
                        print(key, flab, "not found")
        print(self.average_precision)
        print(len(self.y_test))
        self.average_precision /= len(self.y_test)
        if cv_oc:
            print("{}, avg pre {}".format(cv_oc[0], self.average_precision))

        self.seq_len_train = calculate_seq_len(self.x_train)
        self.seq_len_val = calculate_seq_len(self.x_val)
        self.seq_len_test = calculate_seq_len(self.x_test)

        # temporal class shift
        idx = 0
        self.temp_labels_dict = {}
        for lab in np.unique(self.y_train):
            self.temp_labels_dict[lab] = idx
            idx += 1
        self.y_train = np.array(
            [self.temp_labels_dict[lab] for lab in self.y_train])
        self.y_val = np.array(
            [self.temp_labels_dict[lab] for lab in self.y_val])
        self.n_inlier_classes = len(np.unique(self.y_train))
        self.ndim = self.x_train.shape[2]

        self.train_dataset = MyDataset(self.x_train,
                                       self.y_train,
                                       self.mean_train,
                                       self.std_train,
                                       self.seq_len_train,
                                       device=args["d"])
        self.val_dataset = MyDataset(self.x_val,
                                     self.y_val,
                                     self.mean_val,
                                     self.std_val,
                                     self.seq_len_val,
                                     device=args["d"])
        self.test_dataset = MyDataset(self.x_test,
                                      self.y_test,
                                      self.mean_test,
                                      self.std_test,
                                      self.seq_len_test,
                                      device=args["d"])

        # balancing
        labs, counts = np.unique(self.y_train, return_counts=True)
        # mask = labs != -99
        # weights = 1 / counts[mask]
        # weights /= 2 * weights.sum()
        # weights = np.insert(weights, 0, 0.5)

        weights = 1 / counts
        weights /= weights.sum()

        sample_weight = np.zeros(len(self.y_train))
        for i, lab in enumerate(labs):
            mask = self.y_train == lab
            sample_weight[mask] = weights[i]
        sampler = torch.utils.data.WeightedRandomSampler(
            sample_weight, len(sample_weight))
        self.train_dataloader = DataLoader(self.train_dataset,
                                           batch_size=self.args["bs"],
                                           sampler=sampler,
                                           drop_last=True)
        self.val_dataloader = DataLoader(self.val_dataset,
                                         batch_size=self.args["bs"],
                                         shuffle=True,
                                         drop_last=True)
        self.test_dataloader = DataLoader(self.test_dataset,
                                          batch_size=self.args["bs"],
                                          shuffle=False)
示例#54
0
 def test_commits(self):
     rv = self.app.get(
         get_path(
             ['commits', utils.EXPECTED_BRANCH, utils.EXPECTED_RESOURCE]))
     assert json.loads(rv.data) == utils.load_json('commits_path.json')
示例#55
0
def test_yolov3_classification():

    y = YOLO_V3(utils.load_json('./unit_test/test_config2.json'))
    def test_train_mbart_cc25_enro_script(self):
        env_vars_to_replace = {
            "$MAX_LEN": 64,
            "$BS": 64,
            "$GAS": 1,
            "$ENRO_DIR": self.data_dir,
            "facebook/mbart-large-cc25": MARIAN_MODEL,
            # "val_check_interval=0.25": "val_check_interval=1.0",
            "--learning_rate=3e-5": "--learning_rate 3e-4",
            "--num_train_epochs 6": "--num_train_epochs 1",
        }

        # Clean up bash script
        bash_script = (self.test_file_dir / "train_mbart_cc25_enro.sh"
                       ).open().read().split("finetune.py")[1].strip()
        bash_script = bash_script.replace("\\\n",
                                          "").strip().replace('"$@"', "")
        for k, v in env_vars_to_replace.items():
            bash_script = bash_script.replace(k, str(v))
        output_dir = self.get_auto_remove_tmp_dir()

        # bash_script = bash_script.replace("--fp16 ", "")
        args = f"""
            --output_dir {output_dir}
            --tokenizer_name Helsinki-NLP/opus-mt-en-ro
            --sortish_sampler
            --do_predict
            --gpus 1
            --freeze_encoder
            --n_train 40000
            --n_val 500
            --n_test 500
            --fp16_opt_level O1
            --num_sanity_val_steps 0
            --eval_beams 2
        """.split()
        # XXX: args.gpus > 1 : handle multi_gpu in the future

        testargs = ["finetune.py"] + bash_script.split() + args
        with patch.object(sys, "argv", testargs):
            parser = argparse.ArgumentParser()
            parser = pl.Trainer.add_argparse_args(parser)
            parser = SummarizationModule.add_model_specific_args(
                parser, os.getcwd())
            args = parser.parse_args()
            model = main(args)

        # Check metrics
        metrics = load_json(model.metrics_save_path)
        first_step_stats = metrics["val"][0]
        last_step_stats = metrics["val"][-1]
        self.assertEqual(len(metrics["val"]),
                         (args.max_epochs / args.val_check_interval))
        assert isinstance(last_step_stats[f"val_avg_{model.val_metric}"],
                          float)

        self.assertGreater(last_step_stats["val_avg_gen_time"], 0.01)
        # model hanging on generate. Maybe bad config was saved. (XXX: old comment/assert?)
        self.assertLessEqual(last_step_stats["val_avg_gen_time"], 1.0)

        # test learning requirements:

        # 1. BLEU improves over the course of training by more than 2 pts
        self.assertGreater(
            last_step_stats["val_avg_bleu"] - first_step_stats["val_avg_bleu"],
            2)

        # 2. BLEU finishes above 17
        self.assertGreater(last_step_stats["val_avg_bleu"], 17)

        # 3. test BLEU and val BLEU within ~1.1 pt.
        self.assertLess(
            abs(metrics["val"][-1]["val_avg_bleu"] -
                metrics["test"][-1]["test_avg_bleu"]), 1.1)

        # check lightning ckpt can be loaded and has a reasonable statedict
        contents = os.listdir(output_dir)
        ckpt_path = [x for x in contents if x.endswith(".ckpt")][0]
        full_path = os.path.join(args.output_dir, ckpt_path)
        ckpt = torch.load(full_path, map_location="cpu")
        expected_key = "model.model.decoder.layers.0.encoder_attn_layer_norm.weight"
        assert expected_key in ckpt["state_dict"]
        assert ckpt["state_dict"][
            "model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32

        # TODO: turn on args.do_predict when PL bug fixed.
        if args.do_predict:
            contents = {os.path.basename(p) for p in contents}
            assert "align_wnums_test_generations.txt" in contents
            assert "test_results.txt" in contents
            # assert len(metrics["val"]) ==  desired_n_evals
            assert len(metrics["test"]) == 1
async def get_persons_photos(kp: KP, assets_folder: str):
    persons = load_json('parser/persons.json')
    print("Persons started")
    await asyncio.gather(*_get_tasks(persons, kp.get_person_photo, assets_folder=assets_folder))
    print("Persons Done")
示例#58
0
 def test_tags(self):
     rv = self.app.get(get_path(['tags']))
     tags = json.loads(rv.data)
     tags['host'] = utils.EXPECTED_HOST
     assert tags == utils.load_json('tags.json')
示例#59
0
def main():
    # 读取配置
    config = utils.load_json("./config.json")
    data_conf, model_conf, train_conf = (config["data"], config["model"],
                                         config["train"])
    device = torch.device(train_conf["device"])
    task_name = train_conf["name"] if train_conf["name"] is not None\
        else utils.task_name_generate()

    # 读取数据集
    train_trans = [
        SampleTransfer(data_conf["npoints"], data_conf["sample_method"]),
        CircleNormTransfer(),
        RandomRotationTransfer(),
        RandomJitterTransfer(data_conf["jitter_std"], data_conf["jitter_clip"])
    ]
    eval_trans = [
        SampleTransfer(data_conf["npoints"], data_conf["sample_method"]),
        CircleNormTransfer(),
    ]
    train_dat = FileDataset.ModelNet(config["data"]["dir"], phase="train")
    test_dat = FileDataset.ModelNet(
        config["data"]["dir"],
        phase="test",
        label_encoder=train_dat.kwargs["label_encoder"])
    train_dat, eval_dat = train_dat.split(0.1, True, config["seed"], True)
    train_dat.set_transfers(*train_trans)
    eval_dat.set_transfers(*eval_trans)
    test_dat.set_transfers(*eval_trans)

    loaders = {
        "train":
        data.DataLoader(train_dat,
                        train_conf["batch_size"],
                        True,
                        num_workers=train_conf["njobs"]),
        "eval":
        data.DataLoader(eval_dat,
                        train_conf["batch_size"],
                        False,
                        num_workers=train_conf["njobs"]),
        "test":
        data.DataLoader(test_dat,
                        train_conf["batch_size"],
                        False,
                        num_workers=train_conf["njobs"])
    }
    # 构建模型
    net = PointNet(train_dat.channels,
                   train_dat.nlabels,
                   **model_conf["pointnet"],
                   stn3_kwargs=model_conf["stn3"],
                   stnk_kwargs=model_conf["stnk"]).to(device)
    criterion = CEwithReg(model_conf["reg_w"])
    optimizer = optim.Adam(net.parameters(), lr=train_conf["lr"])

    # 训练
    net, hist, best = train(net, criterion, optimizer, loaders,
                            train_conf["epoch"], device, task_name)
    test_loss, test_acc = evaluate(net, criterion, loaders["test"], device)
    best["test_loss"] = test_loss
    best["test_acc"] = test_acc

    # 保存结果
    task_dir = os.path.join("RESULTS", task_name)
    if not os.path.exists(task_dir):
        os.makedirs(task_dir)
    torch.save(net, os.path.join(task_dir, "model.pth"))
    utils.dump_json(best, os.path.join(task_dir, "best.json"))
    utils.dump_json(hist, os.path.join(task_dir, "hist.json"))
    utils.dump_json(config, os.path.join(task_dir, "config.json"))
示例#60
0
        movie_json['age_rating'],
        'year':
        int(movie_json['year']),
        'budget':
        5_000_000
    }
    if movie_genres:
        fields['genres'] = movie_genres
    return fields


if __name__ == '__main__':
    from utils import load_json, save_json

    try:
        movie_list_json = load_json('movies.json')
        person_list_json = load_json('persons.json')
    except FileNotFoundError:
        print('Run src/parser/parser.py to get raw json data')
        raise

    movies_model_list = [
        {
            'model': 'movies.movietype',
            'pk': 1,
            'fields': {
                'title': 'Сериал'
            }
        },
        {
            'model': 'movies.movietype',