def test_tree_root(self): path = get_path(['tree', utils.EXPECTED_BRANCH]) rv = self.app.get(path) assert json.loads(rv.data) == utils.load_json('tree.json') rv = self.app.get('{0}?limit={1}'.format(path, utils.EXPECTED_LIMIT)) assert json.loads(rv.data) == utils.load_json('tree_limit.json')
def main(): parser = get_arg_parser() args = parser.parse_args() endpoint = utils.format_endpoint(args.endpoint[0]) options = None if args.datafile is not None: if os.path.exists(args.datafile): options = utils.load_json(args.datafile) else: fkjson_path = os.path.dirname(os.path.abspath(__file__)) + '/fkdata.json' if os.path.exists(fkjson_path): options = utils.load_json(fkjson_path) if options is None: print ''' fkdata.json file needed... ''' else: fk_checker = fkchecker.FkChecker(endpoint, options) fk_checker.execute() print 'Total Frames Received: ' + str(fk_checker.get_recv_frames) print 'Total Asserts: ' + str(fk_checker.get_asserts) print 'Total of validation executed: ' + str(fk_checker.get_validations_runned) print 'Assert frame rate: ' + str(fk_checker.get_assert_rate) + ' %'
def test_commits_root(self): path = get_path(['commits', utils.EXPECTED_BRANCH]) rv = self.app.get(path) assert json.loads(rv.data) == utils.load_json('commits.json') rv = self.app.get('{0}?limit={1}'.format(path, utils.EXPECTED_LIMIT)) assert json.loads(rv.data) == utils.load_json('commits_limit.json') rv = self.app.get(get_path(['commits', utils.EXPECTED_REV])) assert json.loads(rv.data) == utils.load_json('commits_rev.json')
def test_blob(self): rv = self.app.get(get_path(['blob', utils.EXPECTED_BRANCH, utils.EXPECTED_RESOURCE])) assert json.loads(rv.data) == utils.load_json('blob.json') rv = self.app.get(get_path(['blob', utils.EXPECTED_REV, utils.EXPECTED_RESOURCE])) assert json.loads(rv.data) == utils.load_json('blob_rev.json')
def test_get_resource(self): resource = core.get_resource(utils.EXPECTED_PROJECT, utils.EXPECTED_REPOSITORY, utils.EXPECTED_BRANCH, utils.EXPECTED_RESOURCE) assert resource == utils.load_json('blob.json') resource = core.get_resource(utils.EXPECTED_PROJECT, utils.EXPECTED_REPOSITORY, utils.EXPECTED_REV, utils.EXPECTED_RESOURCE) assert resource == utils.load_json('blob_rev.json')
def load_train_result(args): settings = parse_settings(args.setting) try: chn_trained_file = os.path.join(ENV.data_dir, "%s.json" % settings['chn']) eng_trained_file = os.path.join(ENV.data_dir, "%s.json" % settings['eng']) except Exception as e: print e if os.path.isfile(chn_trained_file) and os.path.isfile(eng_trained_file): chn_freq = utils.load_json(chn_trained_file) eng_freq = utils.load_json(eng_trained_file) else: chn_freq, eng_freq = train(args) return chn_freq, eng_freq
def _request(self, url, data=None, method=None): """Send an HTTP request to the remote server. Args: method - A string for the HTTP method to send the request with. url - The URL to send the request to. body - The message body to send. Returns: A dictionary with the server's parsed JSON response. """ logging.debug('%s %s %s' % (method, url, data)) request = Request(url, data=data, method=method) request.add_header('Accept', 'application/json') opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), HttpErrorHandler()) response = opener.open(request) try: if response.code > 399 and response.code < 500: return {'status': response.code, 'value': response.read()} body = response.read().replace('\x00', '').strip() if body: data = utils.load_json(body.strip()) assert type(data) is dict, ( 'Invalid server response body: %s' % body) assert 'status' in data, ( 'Invalid server response; no status: %s' % body) assert 'value' in data, ( 'Invalid server response; no value: %s' % body) return data finally: response.close()
def run(self, edit): print("va a importar") window=sublime.active_window() view=window.active_view() self.window=sublime.active_window() self.view=self.window.active_view() java=Java() tipos=java.get_tipos() self.packages=utils.load_json(PATH_INDEX_PACKAGES) projectFiles=utils.get_files({"ext":"java"}) projectFiles=[x.replace("/", ".").replace("\\", ".") for x in projectFiles] projectFiles=[x[x.rfind(".java.")+6:x.rfind(".")] for x in projectFiles] ##print(projectFiles) viewPackage=view.substr(view.find(utils.REG_JAVA_PACKAGE, 0)) viewPackage=viewPackage.replace("package ", "").replace(";", "") for projectFile in projectFiles: className=projectFile[projectFile.rfind(".")+1:] packageClass=projectFile[:projectFile.rfind(".")] if packageClass==viewPackage:continue if self.packages.get(className)==None: self.packages[className]=[] self.packages[className].append(packageClass) self.clases=list(set(tipos)) ##print(self.clases) self.i=0 self.importar(None)
def on_query_completions(self, view, prefix, locations): if utils.get_language() != "java":return ultimo=utils.get_last_character() if ultimo=="." and utils.get_language()=="java": window=sublime.active_window() view=window.active_view() word=utils.get_word(-1) variables=Java().get_variables() tipo=word static=True if variables.get(word): tipo=variables[word] static=False package=re.findall("import ([\w.]+\.%s);"%tipo, utils.get_text()) if not package: posibleRuta=os.path.join(PATH_JSON, "java", "lang", tipo+".json") if os.path.exists(posibleRuta): package=["java.lang."+tipo] if package: package=package[0] clase=self.get_project_class(package) if clase: return utils.get_completion_list(clase["members"]) ruta=package.replace(".", os.sep)+".json" ruta=os.path.join(PATH_JSON, ruta) print("ya se determino") objeto=utils.load_json(ruta) miembros="clase" if static else "object" return utils.get_completion_list(objeto[miembros])
def make_Xy(): orders = pd.read_pickle(ORDERS_LOCAL_PATH) print('%s: %s' % (ORDERS_NAME, list(orders.shape))) Xcols = [col for col in orders.columns if orders[col].dtype == np.int32] print(len(Xcols)) orders = orders[Xcols] print('%s: %s' % (ORDERS_NAME, list(orders.shape))) converted_quotes = load_json('converted_quotes.json') converted = {q for q, s in converted_quotes if s} quotes = orders[orders['isQuote'] == 1] _, key_to_idx = load_enumerations() CHANGE = key_to_idx['type']['CHANGE'] quotes = quotes[quotes['type'] == CHANGE] print('%s: %s' % ('quotes', dim(quotes))) quotes = quotes[[col for col in quotes.columns if col not in {'endCustomerId', 'originalEndCustomerId'}]] print('%s: %s' % ('quotes', dim(quotes))) y = pd.DataFrame(index=quotes.index) y_values = [i in converted for i in quotes['id'].values] y['converted'] = pd.Series(y_values, index=quotes.index, dtype=np.int32) print('y', y.shape, y['converted'].dtype) X = quotes[[col for col in quotes.columns if col != 'isQuote']] X = X[[col for col in X.columns if col != 'id']] X = X[[col for col in X.columns if col != 'customerId']] print('X', X.shape) # print(X.describe()) return X, y
def read_project_config(project_path): config_path = project_path + "/project-config.luna2d" if os.path.exists(config_path): return utils.load_json(config_path) else: return {}
def __init__(self, input_movies, file_path, count=5): # Call parent constructor function super(Recommendation, self).__init__() self.movies_data = load_json(file_path) self.input_movies = input_movies self.show_count = count
def _request(self, url, data=None, method=None): """Send an HTTP request to the remote server. Args: method - A string for the HTTP method to send the request with. url - The URL to send the request to. body - The message body to send. Returns: A dictionary with the server's parsed JSON response. """ LOGGER.debug('%s %s %s' % (method, url, data)) parsed_url = urlparse.urlparse(url) auth = None password_manager = None if parsed_url.username: netloc = parsed_url.hostname if parsed_url.port: netloc += ":%s" % parsed_url.port cleaned_url = urlparse.urlunparse((parsed_url.scheme, netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(None, "%s://%s" % (parsed_url.scheme, netloc), parsed_url.username, parsed_url.password) request = Request(cleaned_url, data=data, method=method) else: request = Request(url, data=data, method=method) request.add_header('Accept', 'application/json') if password_manager: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), HttpErrorHandler(), urllib2.HTTPBasicAuthHandler(password_manager)) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), HttpErrorHandler()) response = opener.open(request) try: if response.code > 399 and response.code < 500: return {'status': response.code, 'value': response.read()} body = response.read().replace('\x00', '').strip() content_type = response.info().getheader('Content-Type') or [] if 'application/json' in content_type: data = utils.load_json(body.strip()) assert type(data) is dict, ( 'Invalid server response body: %s' % body) assert 'status' in data, ( 'Invalid server response; no status: %s' % body) # Some of the drivers incorrectly return a response # with no 'value' field when they should return null. if 'value' not in data: data['value'] = None return data elif 'image/png' in content_type: data = {'status': 0, 'value': body.strip()} return data finally: response.close()
def salt_ssh(self, target, cmd): roster = self['container']['config']['salt_config']['roster'] target_id = target['config']['name'] SSH = "salt-ssh -l quiet -i --out json --key-deploy --passwd {0} {1} {{0}}".format( target['ssh_config']['password'], target_id) data = self['container'].run(SSH.format(cmd)) return load_json(data)[target_id]
def run(self): while True: comando=None if os.path.exists(ARCHIVO_COMANDO) and existe_archivo_bloqueo(ORDEN): archivito=open(ARCHIVO_COMANDO) comando=archivito.read() archivito.close() os.remove(ARCHIVO_COMANDO) if comando: comando=comando.strip() print(comando) window=sublime.active_window() view=window.active_view() if comando.find("side_bar")!=-1: window.run_command(comando) elif comando.startswith("code_"): rutaJson=sublime.packages_path()+os.sep+"snippets"+os.sep+utils.get_language()+".json" if os.path.exists(rutaJson): comando=comando.replace("code_", "") d=utils.load_json(rutaJson) if d.get(comando) :view.run_command('insert_snippet', {"contents":utils.agregarCursores(d[comando])}) elif comando.startswith("make_"): window=sublime.active_window() view=window.active_view() comando=comando.replace("make_", "") view.run_command("load_template", {"nombre":comando}) else: view.run_command(comando) time.sleep(1)
def find_all_chars(verbose=False): char_count = load_json(CHAR_COUNT, {}) if not char_count: train, test, _ = load_data() char_count = defaultdict(int) S = 0 for df in (test, train): for sentence in df_to_sentences(df): S += 1 for c in sentence: assert len(c) == 1, (c, sentence) char_count[c] += 1 char_count = {c: n for c, n in char_count.items()} save_json(CHAR_COUNT, char_count) print('S=%d' % S) chars = sorted(char_count, key=lambda c: (-char_count[c], c)) N = sum(char_count.values()) print('find_all_chars: %d %r' % (len(chars), ''.join(chars[:100]))) print('N=%d=%.3fM' % (N, N * 1e-6)) if verbose: tot = 0.0 for i, c in enumerate(chars[:200]): n = char_count[c] r = n / N tot += r print('%4d: %8d %.4f %.3f %4d=%2r' % (i, n, r, tot, ord(c), c)) return char_count
def test_get_resources(self): resources = core.get_resources(utils.EXPECTED_PROJECT, utils.EXPECTED_REPOSITORY, utils.EXPECTED_BRANCH) assert resources == utils.load_json('tree.json') resources = core.get_resources(utils.EXPECTED_PROJECT, utils.EXPECTED_REPOSITORY, utils.EXPECTED_REV) assert resources == utils.load_json('tree_rev.json') resources = core.get_resources(utils.EXPECTED_PROJECT, utils.EXPECTED_REPOSITORY, utils.EXPECTED_BRANCH, limit=utils.EXPECTED_LIMIT) assert resources == utils.load_json('tree_limit.json')
def update_libs(args, luna2d_path, config): libs_source_dir = luna2d_path + "/lib/" + args.platform + "/release/" libs_dest_dir = args.project_path + "/.luna2d/libs" # Copy luna2d libs shutil.rmtree(libs_dest_dir, ignore_errors=True) shutil.copytree(libs_source_dir, libs_dest_dir) # Copy sdkmodules libs if "sdkmodules" in config: for module_name in config["sdkmodules"]: module_path = find_sdk_module(module_name, args, luna2d_path) if module_path is None: print("SDK module \"" + module_name + "\" not found") continue module_config_path = module_path + "sdkmodule.luna2d" if not os.path.exists(module_config_path): print("Config for SDK module \"" + module_name + "\" not found") continue module_config = utils.load_json(module_config_path) for module_file in module_config["files"]: src_path = module_path + "/" + module_file dest_path = libs_dest_dir + "/" + module_name + "-" + module_file shutil.copyfile(src_path, dest_path) if args.platform == "android": update_android.apply_sdk_module(args, module_name, config, module_config)
def run(args): settings = parse_settings(args.setting) try: chn_trained_file = os.path.join(ENV.data_dir, "%s.json" % settings['chn']) eng_trained_file = os.path.join(ENV.data_dir, "%s.json" % settings['eng']) test_data = os.path.join(ENV.data_dir, settings['input']) except Exception as e: print e if os.path.isfile(chn_trained_file) and os.path.isfile(eng_trained_file): chn_freq = utils.load_json(chn_trained_file) eng_freq = utils.load_json(eng_trained_file) else: chn_freq, eng_freq = train(args) classifier = naive_baysian_classifier.NaiveBaysianClassifier(chn_freq, eng_freq) worker.start_classify(test_data, classifier)
def cambiar_version(cambio=1, mostrar=False): diff=utils.load_json(DIFF_JSON) window=sublime.active_window() view=window.active_view() filename=view.file_name() folder=get_folder(filename) actual=diff[view.file_name()] viejo=actual lista=os.listdir(folder) lista=sorted(lista) i=lista.index(actual) i+=cambio if i<0 or i==len(lista):return actual=lista[i] diff[filename]=actual utils.save_json(DIFF_JSON, diff) lines=view.lines(sublime.Region(0, view.size())) folder=get_folder(filename) # self.view.add_regions("diferentes", self.lista, "comment", "bookmark", sublime.DRAW_OUTLINED) if not mostrar:utils.set_text(open(get_folder(filename)+os.sep+actual).read()) print("\n") with open(folder+os.sep+actual, 'r') as one: with open(folder+os.sep+viejo, 'r') as two: diff = difflib.unified_diff(one.readlines(),two.readlines()) for line in diff: line=line.strip() if line.startswith("@@ -"): # line=line[4, line.find(",")] # print(line) utils.go_line(int(line[4:line.find(",")])+3) if line.startswith("-") or line.startswith("+") or line.startswith("@@"): print(line.strip()+":") print("\n")
def on_query_completions(self, view, prefix, locations): lang=utils.get_language() if lang != "go":return ultimo=utils.get_last_character() if ultimo != ".":return d=utils.load_json(GO_MAIN_MODULE) word=utils.get_word(-1) if d.get(word): return utils.get_completion_list(d[word])
def __init__(self): os.makedirs(SPACY_DIR, exist_ok=True) self.text_tokens_path = os.path.join(SPACY_DIR, 'text.tokens.json') self.token_vector_path = os.path.join(SPACY_DIR, 'token.vector.pkl') self.text_tokens = load_json(self.text_tokens_path, {}) self.token_vector = load_pickle(self.token_vector_path, {}) self.text_tokens_len = len(self.text_tokens) self.token_vector_len = len(self.token_vector) self.nlp = spacy.load('en_core_web_lg') self.n_calls = 0
def run(self, edit): lang=utils.get_language() self.rutaSamples=os.path.normpath(os.path.join(sublime.packages_path(), "..", "samples", lang+".json")) print("la ruta es : "+self.rutaSamples) if not os.path.exists(self.rutaSamples): return self.samples=utils.load_json(self.rutaSamples) self.keys=list(self.samples.keys()) window=sublime.active_window() window.show_quick_panel(self.keys,self.load)
def get_metrics(start, end, author = None, config = None): # GitHub search API uses dates, not hours if len(start) == 19: start = start[:-9] end = end[:-9] metrics = {} if config is None: config = get_config(os.environ['HOME'] + '/.qe-metrics/github.conf') user = config['username'] if not author: author = user ##### NEW issues opened during period url = 'https://api.github.com/search/issues?q=author:%s+created:%s..%s' % \ (author, start, end) result = load_json(url) prs = 0 # pull requests for item in result['items']: if item.has_key('pull_request'): prs += 1 metrics['issues'] = int(result['total_count']) - prs metrics['pull_requests'] = prs #### get all events in the last 90 days and filter out commits result = load_json('https://api.github.com/users/%s/events/public' % author) commits = 0 start_dt = datetime.strptime(start, '%Y-%m-%d') end_dt = datetime.strptime(end + ' 23:59:59', '%Y-%m-%d %H:%M:%S') for event in result: if event['type'] == "PushEvent": created_at = datetime.strptime(event['created_at'], '%Y-%m-%dT%H:%M:%SZ') if start_dt <= created_at <= end_dt: commits += event['payload']['size'] metrics['commits'] = commits return metrics
def load_enumerations(): enumerations = load_json('enumerations.json') idx_to_key = {col: {i: k for i, k in enumerate(enumerations[col])} for col in enumerations} key_to_idx = {col: {k: i for i, k in enumerate(enumerations[col])} for col in enumerations} # for col in enumerations: # idx_to_key[col] = {i: k for i, k in enumerate(enumerations[col])} # key_to_idx[col] = {k: i for i, k in enumerate(enumerations[col])} return idx_to_key, key_to_idx
def run(self, edit): paquete_snippets=sublime.packages_path()+os.sep+"snippets" lista=[] for archivo in utils.get_files({"folder":paquete_snippets, "ext":"json"}): snip=utils.load_json(archivo) lista=lista + list(snip.keys()) lista=list(set(lista)) for snippet in lista: snippet=snippet.lower().replace("-", "_").replace(" ", "").replace("?", "_") utils.file_write(RUTA_COMANDOS+"code_"+snippet+".bat", "echo code_"+snippet+" > d:/sublime3/comando.txt") print(snippet)
def __init__(self, task): self.ckpt_path = './ckpt/{}/'.format(task) if not os.path.exists(self.ckpt_path): os.makedirs(self.ckpt_path) source_dir = os.path.join('.', 'dataset', 'data', task) self.word_vocab, _ = load_vocab(os.path.join(source_dir, 'words.vocab')) self.char_vocab, _ = load_vocab(os.path.join(source_dir, 'chars.vocab')) self.vocab_size = len(self.word_vocab) self.char_vocab_size = len(self.char_vocab) self.label_size = load_json(os.path.join(source_dir, 'label.json'))["label_size"] self.word_emb = load_embeddings(os.path.join(source_dir, 'glove.filtered.npz'))
def main(args): luna2d_path = utils.get_luna2d_path() config = utils.load_json(args.game_path + "/config.luna2d") project_config = read_project_config(args.project_path) build_config = utils.load_json(args.project_path + "/.luna2d/build.luna2d") print("Updating config...") merge_configs(config, project_config) print("Updating libraries...") update_libs(args, luna2d_path, config) print("Updating project..") if args.platform == "wp": update_wp.do_update(args, config, build_config["projectName"]) if args.skip_assets == "false": update_assets(args, luna2d_path, config) print("Done")
def main(): data_folder = os.path.join('.', 'dataset', 'data') # set tasks source_dir = os.path.join(data_folder, task) # create config config = Config(task) # load datasets trainset = load_json(os.path.join(source_dir, 'train.json')) devset = load_json(os.path.join(source_dir, 'dev.json')) testset = load_json(os.path.join(source_dir, 'test.json')) # build model model = DenseConnectBiLSTM(config, resume_training=resume_training) # training batch_size = 200 epochs = 30 if has_devset: model.train(trainset, devset, testset, batch_size=batch_size, epochs=epochs, shuffle=True) else: trainset = trainset + devset model.train(trainset, None, testset, batch_size=batch_size, epochs=epochs, shuffle=True)
def run(function_path, experiment_config_file, job_config_file, force): job_dir = os.path.dirname(job_config_file) experiment_config = load_json(os.path.join(experiment_config_file)) state = load_json(job_config_file) with ChangeDir(job_dir): experiment = experiment_scheduler.save_experiment( name=experiment_config["experiment_name"], table_name=experiment_config["experiment_name"] + "_jobs", clusters=experiment_config["clusters"], duree="", mem="", env="", gpu="") table_name = experiment["table"] channel = Channel() state["jobman"] = dict(status=channel.START) state_to_hash = copy.copy(state) jobs = job_scheduler.load_jobs(table_name, hash_of=state_to_hash) state_to_hash["jobman"] = dict(status=channel.RUNNING) jobs += job_scheduler.load_jobs(table_name, hash_of=state_to_hash) if len(jobs) > 0: logger.warning("Job already registered, loading from database") state = jobs[0] if state["jobman"]["status"] != channel.START: if not force: raise RuntimeError("Job (%d) is not available" % state["id"]) logging.warning("Job (%d) is not available. Forcing it to run" % state["id"]) state["jobman"]["status"] = channel.RUNNING state = job_scheduler.save_job(table_name, state) resolve(function_path_re.sub('', function_path)).jobman_main(state, channel) job_scheduler.save_job(experiment["table"], state)
import pytest import pandas as pd import numpy as np from data import SF1Data from utils import load_json config = load_json('config.json') class TestSF1Data: def test_load_base_data(self): data_loader = SF1Data(config['sf1_data_path']) df = data_loader.load_base_data() assert type(df) == pd.DataFrame assert len(df) > 0 assert 'ticker' in df.columns assert df['ticker'].isnull().max() == False @pytest.mark.parametrize( ["tickers", "quarter_count", "dimension"], [(['AAPL', 'ZRAN', 'TSLA', 'WORK'], 10, 'ARQ'), (['INTC', 'ZRAN', 'XRDC', 'XOM'], 5, 'ARQ'), (['INTC', 'ZRAN', 'XRDC', 'XOM'], 5, 'MRY'), (['NVDA'], 10, 'ARQ'), (['ZRAN'], 10, 'ARQ')], ) def test_load_quarterly_data(self, tickers, quarter_count, dimension): data_loader = SF1Data(config['sf1_data_path']) quarterly_df = data_loader.load_quarterly_data(tickers, quarter_count, dimension) assert type(quarterly_df) == pd.DataFrame assert 'ticker' in quarterly_df.columns
def export(brand): workbook = xlsxwriter.Workbook( 'output/excel/' + brand.replace('/', '#') + '.xlsx', {'strings_to_urls': False}) worksheet = workbook.add_worksheet('jsfilter') worksheet.write('A1', 'Brand') worksheet.write('B1', 'Class') worksheet.write('C1', 'Model') worksheet.write('D1', 'Year') worksheet.write('E1', 'Engine Vol') worksheet.write('F1', 'Engine No') worksheet.write('G1', 'Body No') worksheet.write('H1', 'Filter Type') worksheet.write('I1', 'Product Name') worksheet.write('J1', 'Product Code') worksheet.write('K1', 'Specifications') worksheet.write('L1', 'Cross Reference') worksheet.write('M1', 'Applications') worksheet.write('N1', 'Image URL') worksheet.write('O1', 'Product URL') worksheet.set_column('A:H', 15) worksheet.set_column('I:I', 30) worksheet.set_column('J:J', 15) worksheet.set_column('K:O', 30) row, col = 1, 0 with os.scandir('output/data') as it: for file_entry in it: if file_entry.name.endswith('.json'): if file_entry.name.split('_')[0] != brand: continue data_file = load_json(file_entry.path) for item in data_file: category_list = ['oil', 'air', 'fuel', 'cabin', 'trans'] for category_item in category_list: for product_item in item[category_item]: details = get_product_details( category_item, product_item['code']) worksheet.write(row, col, item['brand']) worksheet.write(row, col + 1, item['class']) worksheet.write(row, col + 2, item['model']) worksheet.write(row, col + 3, item['year']) worksheet.write(row, col + 4, item['engine_vol']) worksheet.write(row, col + 5, item['engine_no']) worksheet.write(row, col + 6, item['body_no']) worksheet.write(row, col + 7, category_item.upper()) worksheet.write(row, col + 9, product_item['code']) if details is not None: worksheet.write(row, col + 8, details['name']) worksheet.write( row, col + 10, flatten_list(details['specifications'])) worksheet.write( row, col + 11, flatten_list(details['cross_reference'])) worksheet.write( row, col + 12, flatten_list(details['applications'])) worksheet.write(row, col + 13, details['image_url']) worksheet.write(row, col + 14, details['url']) print('\r', brand, row, end='') row += 1 workbook.close()
# fileName = 'hive_tb_day_lp_30day_bfor_data_2020_210201.csv', # fileName = 'hive_tb_day_lp_30day_bfor_data_2020.csv', period = '2020-01-01:2020-12-31', # 좌측, 우측 모두 포함 # busiDiv = ['산업'], # category = ['금속', '병원'], # enteCode = ['055344', '000014'] # 하위기준이 우선순위가 높음 ) df = viewHeader(myDir=r'D:\데이터 분석\data\postGres', fileName = 'post_tb_asos_mart_hour_20200101_20201231.csv', reload = True, pickleWrite=False, nrows=2) cust_no_lst_api_df = utils.load_json(path = 'D:\데이터 분석\data', fileName = 'custLst_210216.txt') importlib.reload(utils) cust_no_lst_hive_df = utils.load_cust_no_list_hive( path = 'D:\데이터 분석\data', fileName = 'hive_tb_cust_no_list_20201201_20210217.csv', date = '20210216') cust_no_lst_hive_set = set(cust_no_lst_hive_df.cust_no) cust_no_lst_api_set = set(cust_no_lst_api_df.custNo) print(cust_no_lst_hive_set - cust_no_lst_api_set ) print(cust_no_lst_api_set - cust_no_lst_hive_set )
async def get_movies_posters(kp: KP, assets_folder: str): movies = load_json('parser/movies.json') print("Movies started") await asyncio.gather(*_get_tasks(movies, kp.get_film_photo, assets_folder=assets_folder)) print("Movies Done!")
def test_opus_mt_distill_script(self): data_dir = f"{self.test_file_dir_str}/test_data/wmt_en_ro" env_vars_to_replace = { "--fp16_opt_level=O1": "", "$MAX_LEN": 128, "$BS": 16, "$GAS": 1, "$ENRO_DIR": data_dir, "$m": "sshleifer/student_marian_en_ro_6_1", "val_check_interval=0.25": "val_check_interval=1.0", } # Clean up bash script bash_script = ((self.test_file_dir / "distil_marian_no_teacher.sh" ).open().read().split("distillation.py")[1].strip()) bash_script = bash_script.replace("\\\n", "").strip().replace('"$@"', "") bash_script = bash_script.replace("--fp16 ", " ") for k, v in env_vars_to_replace.items(): bash_script = bash_script.replace(k, str(v)) output_dir = self.get_auto_remove_tmp_dir() bash_script = bash_script.replace("--fp16", "") epochs = 6 testargs = (["distillation.py"] + bash_script.split() + [ f"--output_dir={output_dir}", "--gpus=1", "--learning_rate=1e-3", f"--num_train_epochs={epochs}", "--warmup_steps=10", "--val_check_interval=1.0", "--do_predict", ]) with patch.object(sys, "argv", testargs): parser = argparse.ArgumentParser() parser = pl.Trainer.add_argparse_args(parser) parser = SummarizationDistiller.add_model_specific_args( parser, os.getcwd()) args = parser.parse_args() # assert args.gpus == gpus THIS BREAKS for multi_gpu model = distill_main(args) # Check metrics metrics = load_json(model.metrics_save_path) first_step_stats = metrics["val"][0] last_step_stats = metrics["val"][-1] assert len( metrics["val"]) >= (args.max_epochs / args.val_check_interval ) # +1 accounts for val_sanity_check assert last_step_stats["val_avg_gen_time"] >= 0.01 assert first_step_stats["val_avg_bleu"] < last_step_stats[ "val_avg_bleu"] # model learned nothing assert 1.0 >= last_step_stats[ "val_avg_gen_time"] # model hanging on generate. Maybe bad config was saved. assert isinstance(last_step_stats[f"val_avg_{model.val_metric}"], float) # check lightning ckpt can be loaded and has a reasonable statedict contents = os.listdir(output_dir) ckpt_path = [x for x in contents if x.endswith(".ckpt")][0] full_path = os.path.join(args.output_dir, ckpt_path) ckpt = torch.load(full_path, map_location="cpu") expected_key = "model.model.decoder.layers.0.encoder_attn_layer_norm.weight" assert expected_key in ckpt["state_dict"] assert ckpt["state_dict"][ "model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32 # TODO: turn on args.do_predict when PL bug fixed. if args.do_predict: contents = {os.path.basename(p) for p in contents} assert "align_wnums_test_generations.txt" in contents assert "test_results.txt" in contents # assert len(metrics["val"]) == desired_n_evals assert len(metrics["test"]) == 1
parser.add_argument('--action', type=str, default=None, choices=['create_dict', 'plot', 'acc_by_type_plot', 'magnum_opus'], help='create_qtype_dict or plot') parser.add_argument('--lanecheck_path', type=str, default=None, help='Where to get the lanecheck dictionary from') parser.add_argument('--model', type=str, default=None, help='Model name') args = parser.parse_args() if not args.action: sys.exit() # If you want to create a dictinary if args.action == 'create_dict': train_dset = load_json(os.path.expanduser("~/kable_management/data/tvqa/tvqa_train_processed.json")) val_dset = load_json(os.path.expanduser("~/kable_management/data/tvqa/tvqa_val_processed.json")) test_dset = load_json(os.path.expanduser("~/kable_management/data/tvqa/tvqa_test_public_processed.json")) total_dset = [] total_dset += train_dset total_dset += val_dset total_dset += test_dset create_q_type_dict(train_dset, "train") create_q_type_dict(val_dset, "val") create_q_type_dict(test_dset, "test") create_q_type_dict(total_dset, 'total') # Plot the dictionary of question types if args.action == 'plot': train_qtype_dict = load_pickle(os.path.expanduser("~/kable_management/data/tvqa/q_type/train_q_type_dict.pickle"))
logging.info("data upload succeeded") else: logging.error("failed to upload data") def upload_data(couch, data, db_name): db = get_db(couch, db_name) bulk_upload_data(db_name, data) def populate_couch(data, info): try: couch = couchdb.Server(os.environ["COUCH_CONNECTION_URL"]) delete_db(couch, "airports") delete_db(couch, "flights") logging.info("starting data upload") upload_data(couch, data, "flights") upload_data(couch, info, "airports") logging.info("data upload complete") except Exception as e: logging.error(e) if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) flights = utils.load_json("flight_data.json") airports = utils.load_json("airports.json") populate_couch(flights, airports)
def extract_sentences(max_processed=-1): path_raw = load_json('summary_raw.json') path_raw = {fix(k): fix(v) for k, v in path_raw.items()} file_url_path = join(root, 'file_url.json') raw_url = load_json(file_url_path) raw_url = {fix(k): v for k, v in raw_url.items()} print('+' * 80) print('path_raw', len(path_raw)) for i, k in enumerate(sorted(path_raw)[:5]): print('%d: %s %s %s %s' % (i, k, exists(k), path_raw[k], exists(path_raw[k]))) print('#' * 80) print('raw_url', len(raw_url)) for i, k in enumerate(sorted(raw_url)[:5]): print('%d: %s %s' % (i, k, exists(k))) # assert False path_url = {path: raw_url[raw] for path, raw in path_raw.items() if raw in raw_url} files = glob(join(summaries_dir, '*.json')) print('%4d files' % len(files)) print('%4d path_url' % len(path_url)) para_count = defaultdict(int) sent_count = defaultdict(int) para_url = {} sent_url = {} for path in files: path = fix(abspath(path)) # if not exists(path): # print('^^ %s does not exist' % path) # continue summary = load_json(path) for para in summary['text:paras']: para_count[para] += 1 if len(para) < 30: continue if para not in para_url: if path not in path_url: continue assert path in path_url, path para_url[para] = path_url.get(path, "UNKNOWN") # for para2 in summary['text:sents']: # for sent in para2: # sent_count[sent] += 1 # # if sent not in sent_url: # # sent_url[sent] = path_url[name] print('%d paragraphs %d unique' % (sum(para_count.values()), len(para_count))) # print('%d sentences %d unique' % (sum(sent_count.values()), len(sent_count))) def sent_key(sent): return -len(sent), sent_count[sent], sent paras = sorted(para_count, key=sent_key) # sents = sorted(sent_count, key=sent_key) paras = [{'text': text, 'meta': {'url': para_url[text]}} for text in paras if text in para_url] # sents = [{'text': text, 'meta': {'url': sent_url[text]}} for text in sents] # paras = [{'text': text} for text in paras] # sents = [{'text': text} for text in sents] # "meta":{"source":"GitHub","url":"https://github.com/rdbc-io/rdbc/issues/86"}} # {"text":"Uber\u2019s Lesson: Silicon Valley\u2019s Start-Up Machine Needs Fixing","meta":{"source":"The New York Times"}} save_jsonl('blog.paragraphs.jsonl', paras)
if book: time = second_to_hour(sum(duration)) file_count = len(duration) total_count += file_count if print_detail: print(" [*] Duration of {}: {} (file #: {})". \ format(book, time, file_count)) print(" [*] Total Duration : {} (file #: {})". \ format(second_to_hour(duration_all), total_count)) print() return duration_all if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--audio-pattern', default=None) # datasets/krbook/audio/*.wav parser.add_argument('--data-path', default=None) # datasets/jtbc/alignment.json config, unparsed = parser.parse_known_args() if config.audio_pattern is not None: duration = get_durations(get_paths_by_pattern(config.data_dir)) elif config.data_path is not None: paths = load_json(config.data_path, encoding="utf8").keys() duration = get_durations(paths)
# model_config_list.append(model_config) # else: # print("predict all model") # model_config = { # "path": model_dir / f"all_model.pth", # "model_name": config["model"]["name"], # "n_class": len(utils.BIRD_CODE), # "in_chans": config["model"]["in_chans"], # } # model_config_list.append(model_config) if args.th: print(f"override threshold with {args.th}") threshold = args.th else: threshold = utils.load_json(model_dir / "threshold.json") # test_df = pd.read_csv(utils.DATA_DIR / "test.csv") test_audio_dir = utils.DATA_DIR / "test_audio" if test_audio_dir.exists(): test_df = pd.read_csv(utils.DATA_DIR / "test.csv") else: check_dir = Path("/kaggle/input/birdcall-check/") test_audio_dir = check_dir / "test_audio" test_df = pd.read_csv(check_dir / "test.csv") weights_path = Path(args.model_dir) / "all" / "checkpoints" / "best.pth" # weights_path = Path(args.model_dir) / "fold0" / "checkpoints" / "best.pth" prediction_df = prediction( test_df=test_df, test_audio=test_audio_dir,
def fetch_data(): """ If data exists, load data using json. Get values out of json dict. Call write_to_cassandra to write the values to Cassandra. """ if consumer: for message in consumer: message = load_json(message.value) #Convert to json venue = get_dict_val(message, 'venue') if venue: venue_name = get_dict_val(venue, 'venue_name') venue_lon = get_dict_val(venue, 'lon') venue_lat = get_dict_val(venue, 'lat') venue_id = get_dict_val(venue, 'venue_id') else: venue_name, venue_lon, venue_lat, venue_id = None, None, None, None visibility = get_dict_val(message, 'visibility') response = get_dict_val(message, 'response') guests = get_dict_val(message, 'guests') # Member who RSVP'd member = get_dict_val(message, 'member') if member: member_id = get_dict_val(member, 'member_id') member_name = get_dict_val(member, 'member_name') else: member_id, member_name = '', '' rsvp_id = get_dict_val(message, 'rsvp_id') #since epoch mtime = get_dict_val(message, 'mtime') if mtime: rsvp_last_modified_time = datetime_from_epoch(mtime) else: rsvp_last_modified_time = None # Event for the RSVP event = get_dict_val(message, 'event') if event: event_name = get_dict_val(event, 'event_name') time = get_dict_val(event, 'time') if time: event_time = datetime_from_epoch(time) else: event_time = None event_url = get_dict_val(event, 'event_url') else: event_name, event_id, event_time, event_url = '', '', '', '' # Group hosting the event group = get_dict_val(message, 'group') if group: group_topics = get_dict_val(group, 'group_topics') if group_topics: group_topic_names = ','.join([ get_dict_val(each_group_topic, 'topic_name') for each_group_topic in group_topics ]) else: group_topic_names = '' group_city = get_dict_val(group, 'group_city') group_country = get_dict_val(group, 'group_country') group_id = get_dict_val(group, 'group_id') group_name = get_dict_val(group, 'group_name') group_lon = get_dict_val(group, 'group_lon') group_state = get_dict_val(group, 'group_state') group_lat = get_dict_val(group, 'group_lat') else: group_topic_names, group_city, group_country, group_id, \ group_name, group_lon, group_state, group_lat = \ '', '', '', '', '', '', '', '' # Write data to Cassandra database write_to_cassandra(venue_name = venue_name, venue_lon = venue_lon, \ venue_lat = venue_lat, venue_id = venue_id, visibility = visibility, \ response = response, guests = guests, member_id = member_id, \ member_name = member_name, rsvp_id = rsvp_id, \ rsvp_last_modified_time = rsvp_last_modified_time, \ event_name = event_name, event_time = event_time, event_url = event_url, \ group_topic_names = group_topic_names, group_country = group_country, \ group_state = group_state, group_city = group_city, group_name = group_name, \ group_lon = group_lon, group_lat = group_lat, group_id = group_id ) result = u' '.join( (str(group_topic_names), str(group_city), str(group_country), str(group_id), str(group_name), str(group_lon), str(group_state), str(group_lat))).encode('utf-8').strip() print(result)
def run_bgpstream( prefixes_file=None, kafka_host=None, kafka_port=None, kafka_topic="openbmp.bmp_raw", start=0, end=0, ): """ Retrieve all records related to a list of prefixes https://bgpstream.caida.org/docs/api/pybgpstream/_pybgpstream.html :param prefixes_file: <str> input prefix json :param kafka_host: <str> kafka host :param kafka_port: <int> kafka_port :param kafka_topic: <str> kafka topic :param start: <int> start timestamp in UNIX epochs :param end: <int> end timestamp in UNIX epochs (if 0 --> "live mode") :return: - """ prefixes = load_json(prefixes_file) assert prefixes is not None # create a new bgpstream instance and a reusable bgprecord instance stream = _pybgpstream.BGPStream() # set kafka data interface stream.set_data_interface("kafka") # set host connection details stream.set_data_interface_option("kafka", "brokers", "{}:{}".format(kafka_host, kafka_port)) # set topic stream.set_data_interface_option("kafka", "topic", kafka_topic) # filter prefixes for prefix in prefixes: stream.add_filter("prefix", prefix) # filter record type stream.add_filter("record-type", "updates") # filter based on timing (if end=0 --> live mode) stream.add_interval_filter(start, end) # set live mode stream.set_live_mode() # start the stream stream.start() with Connection(RABBITMQ_URI) as connection: exchange = Exchange("bgp-update", channel=connection, type="direct", durable=False) exchange.declare() producer = Producer(connection) validator = mformat_validator() while True: # get next record try: rec = stream.get_next_record() except BaseException: continue if (rec.status != "valid") or (rec.type != "update"): continue # get next element try: elem = rec.get_next_elem() except BaseException: continue while elem: if elem.type in {"A", "W"}: redis.set( "bgpstreamkafka_seen_bgp_update", "1", ex=int( os.getenv( "MON_TIMEOUT_LAST_BGP_UPDATE", DEFAULT_MON_TIMEOUT_LAST_BGP_UPDATE, )), ) this_prefix = str(elem.fields["prefix"]) service = "bgpstreamkafka|{}".format(str(rec.collector)) type_ = elem.type if type_ == "A": as_path = elem.fields["as-path"].split(" ") communities = [{ "asn": int(comm.split(":")[0]), "value": int(comm.split(":")[1]), } for comm in elem.fields["communities"]] else: as_path = [] communities = [] timestamp = float(rec.time) peer_asn = elem.peer_asn for prefix in prefixes: base_ip, mask_length = this_prefix.split("/") our_prefix = IPNetwork(prefix) if (IPAddress(base_ip) in our_prefix and int(mask_length) >= our_prefix.prefixlen): msg = { "type": type_, "timestamp": timestamp, "path": as_path, "service": service, "communities": communities, "prefix": this_prefix, "peer_asn": peer_asn, } if validator.validate(msg): msgs = normalize_msg_path(msg) for msg in msgs: key_generator(msg) log.debug(msg) producer.publish( msg, exchange=exchange, routing_key="update", serializer="ujson", ) else: log.warning( "Invalid format message: {}".format(msg)) break try: elem = rec.get_next_elem() except BaseException: continue
def parse_ripe_ris(connection, prefixes_file, hosts): exchange = Exchange("bgp-update", channel=connection, type="direct", durable=False) exchange.declare() prefixes = load_json(prefixes_file) assert prefixes is not None prefix_tree = {"v4": pytricia.PyTricia(32), "v6": pytricia.PyTricia(128)} for prefix in prefixes: ip_version = get_ip_version(prefix) prefix_tree[ip_version].insert(prefix, "") ris_suffix = os.getenv("RIS_ID", "my_as") validator = mformat_validator() with Producer(connection) as producer: while True: try: events = requests.get( "https://ris-live.ripe.net/v1/stream/?format=json&client=artemis-{}" .format(ris_suffix), stream=True, timeout=10, ) # http://docs.python-requests.org/en/latest/user/advanced/#streaming-requests iterator = events.iter_lines() next(iterator) for data in iterator: try: parsed = json.loads(data) msg = parsed["data"] if "type" in parsed and parsed["type"] == "ris_error": log.error(msg) # also check if ris host is in the configuration elif ("type" in msg and msg["type"] == "UPDATE" and (not hosts or msg["host"] in hosts)): norm_ris_msgs = normalize_ripe_ris( msg, prefix_tree) for norm_ris_msg in norm_ris_msgs: redis.set( "ris_seen_bgp_update", "1", ex=int( os.getenv( "MON_TIMEOUT_LAST_BGP_UPDATE", DEFAULT_MON_TIMEOUT_LAST_BGP_UPDATE, )), ) if validator.validate(norm_ris_msg): norm_path_msgs = normalize_msg_path( norm_ris_msg) for norm_path_msg in norm_path_msgs: key_generator(norm_path_msg) log.debug(norm_path_msg) producer.publish( norm_path_msg, exchange=exchange, routing_key="update", serializer="ujson", ) else: log.warning( "Invalid format message: {}".format( msg)) except Exception: log.exception("exception message {}".format(data)) log.warning( "Iterator ran out of data; the connection will be retried") except Exception: log.exception("server closed connection") time.sleep(60)
def test_branches(self): rv = self.app.get(get_path(['branches'])) branches = json.loads(rv.data) branches['host'] = utils.EXPECTED_HOST assert branches == utils.load_json('branches.json')
def deserialize_dynamodb_NewImages(new_images: List[Any]) -> List[Any]: new_images = [ deserialize_dynamodb_NewImage(record) for record in new_images ] return new_images def lambda_handler(event, context): # log print(event) # create Firehose client fh = Firehose() # process event events = event["Records"] events = filter_events(events) new_images = extract_NewImage(events) records_dynamodb = deserialize_dynamodb_NewImages(new_images) # transimit by record unit for rec in records_dynamodb: fh.put_item({"Data": json.dumps(rec)}) print(rec) if __name__ == "__main__": # test event = utils.load_json("./event.json") lambda_handler(event, {})
def test_commit(self): rv = self.app.get(get_path(['commit', utils.EXPECTED_REV])) assert json.loads(rv.data) == utils.load_json('commit.json')
def _request(self, url, data=None, method=None): """Send an HTTP request to the remote server. Args: method - A string for the HTTP method to send the request with. url - The URL to send the request to. body - The message body to send. Returns: A dictionary with the server's parsed JSON response. """ LOGGER.debug('%s %s %s' % (method, url, data)) parsed_url = urlparse.urlparse(url) auth = None password_manager = None if parsed_url.username: netloc = parsed_url.hostname if parsed_url.port: netloc += ":%s" % parsed_url.port cleaned_url = urlparse.urlunparse( (parsed_url.scheme, netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password( None, "%s://%s" % (parsed_url.scheme, netloc), parsed_url.username, parsed_url.password) request = Request(cleaned_url, data=data, method=method) else: request = Request(url, data=data, method=method) request.add_header('Accept', 'application/json') if password_manager: opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), HttpErrorHandler(), urllib2.HTTPBasicAuthHandler(password_manager)) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler(), HttpErrorHandler()) response = opener.open(request) try: if response.code > 399 and response.code < 500: return {'status': response.code, 'value': response.read()} body = response.read().replace('\x00', '').strip() content_type = response.info().getheader('Content-Type') or [] if 'application/json' in content_type: data = utils.load_json(body.strip()) assert type(data) is dict, ( 'Invalid server response body: %s' % body) assert 'status' in data, ( 'Invalid server response; no status: %s' % body) # Some of the drivers incorrectly return a response # with no 'value' field when they should return null. if 'value' not in data: data['value'] = None return data elif 'image/png' in content_type: data = {'status': 0, 'value': body.strip()} return data finally: response.close()
def test_history(self): rv = self.app.get(get_path(['history'])) history = json.loads(rv.data) for h in history: h['activities'] = [] assert history == utils.load_json('history.json')
from utils import load_json from pprint import pprint # batch0_uc_idxs = load_json('../exp/hope_cifar10_imb1_s0.4_r1.0_m50_Mar23_201207/batch0_uc_idxs.json') epoch_total_uc_idxs = load_json( '../exp/hope_cifar10_imb1_s0.4_r1.0_m100_Mar27_102601/epoch_total_uc_idxs.json' ) print('epoch\tlen(idxs)\tcommon') for epoch in range(30, 30 + len(epoch_total_uc_idxs) - 1): cur_ucs = epoch_total_uc_idxs[str(epoch)] next_ucs = epoch_total_uc_idxs[str(epoch + 1)] print( f'{epoch}-{epoch + 1}:\t{len(cur_ucs)}-{len(next_ucs)}\t{len(set(cur_ucs) & set(next_ucs))}' )
def test_train_mbart_cc25_enro_script(self): data_dir = "examples/seq2seq/test_data/wmt_en_ro" env_vars_to_replace = { "--fp16_opt_level=O1": "", "$MAX_LEN": 128, "$BS": 4, "$GAS": 1, "$ENRO_DIR": data_dir, "facebook/mbart-large-cc25": MODEL_NAME, # Download is 120MB in previous test. "val_check_interval=0.25": "val_check_interval=1.0", } # Clean up bash script bash_script = Path("examples/seq2seq/train_mbart_cc25_enro.sh").open( ).read().split("finetune.py")[1].strip() bash_script = bash_script.replace("\\\n", "").strip().replace('"$@"', "") for k, v in env_vars_to_replace.items(): bash_script = bash_script.replace(k, str(v)) output_dir = self.get_auto_remove_tmp_dir() bash_script = bash_script.replace("--fp16 ", "") testargs = (["finetune.py"] + bash_script.split() + [ f"--output_dir={output_dir}", "--gpus=1", "--learning_rate=3e-1", "--warmup_steps=0", "--val_check_interval=1.0", "--tokenizer_name=facebook/mbart-large-en-ro", ]) with patch.object(sys, "argv", testargs): parser = argparse.ArgumentParser() parser = pl.Trainer.add_argparse_args(parser) parser = SummarizationModule.add_model_specific_args( parser, os.getcwd()) args = parser.parse_args() args.do_predict = False # assert args.gpus == gpus THIS BREAKS for multigpu model = main(args) # Check metrics metrics = load_json(model.metrics_save_path) first_step_stats = metrics["val"][0] last_step_stats = metrics["val"][-1] assert (len( metrics["val"]) == (args.max_epochs / args.val_check_interval) + 1 ) # +1 accounts for val_sanity_check assert last_step_stats["val_avg_gen_time"] >= 0.01 assert first_step_stats["val_avg_bleu"] < last_step_stats[ "val_avg_bleu"] # model learned nothing assert 1.0 >= last_step_stats[ "val_avg_gen_time"] # model hanging on generate. Maybe bad config was saved. assert isinstance(last_step_stats[f"val_avg_{model.val_metric}"], float) # check lightning ckpt can be loaded and has a reasonable statedict contents = os.listdir(output_dir) ckpt_path = [x for x in contents if x.endswith(".ckpt")][0] full_path = os.path.join(args.output_dir, ckpt_path) ckpt = torch.load(full_path, map_location="cpu") expected_key = "model.model.decoder.layers.0.encoder_attn_layer_norm.weight" assert expected_key in ckpt["state_dict"] assert ckpt["state_dict"][ "model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32 # TODO: turn on args.do_predict when PL bug fixed. if args.do_predict: contents = {os.path.basename(p) for p in contents} assert "test_generations.txt" in contents assert "test_results.txt" in contents # assert len(metrics["val"]) == desired_n_evals assert len(metrics["test"]) == 1
import os import logging import logging.config import utils # Directories BASE_DIR = os.getcwd() # project root LOGS_DIR = os.path.join(BASE_DIR, 'logs') EXPERIMENTS_DIR = os.path.join(BASE_DIR, 'experiments') # Create dirs utils.create_dirs(LOGS_DIR) utils.create_dirs(EXPERIMENTS_DIR) # Loggers log_config = utils.load_json(filepath=os.path.join(BASE_DIR, 'logging.json')) logging.config.dictConfig(log_config) logger = logging.getLogger('logger')
def __init__(self, bot): self.bot = bot self.words = None self.ideas = load_json(IDEA_SAVES)
def __init__(self, args, self_adv=False, cv_oc=[]): #seed_everything(args.seed) self.args = args self.eps = 1e-10 self.read_data() self.lab2idx = load_json("../datasets/ztf/lab2idx.json") self.family = load_json("../datasets/ztf/family.json") assert isinstance(cv_oc, list), "cv_oc not a list" # remove outliers from train and val data only if there is some oc if cv_oc: for key in self.family: flab = self.family[key] if flab == cv_oc[0]: try: print(key, flab, self.lab2idx[key]) self.x_train, self.y_train = remove_data_from_selected_class( self.x_train, self.y_train, self.lab2idx[key]) self.x_val, self.y_val = remove_data_from_selected_class( self.x_val, self.y_val, self.lab2idx[key]) except: print(key, flab, "not found") # add transformations if self_adv: self.x_train, self.y_train = process_self_adversarial( self.x_train, self.y_train, args) self.x_val, self.y_val = process_self_adversarial( self.x_val, self.y_val, args) # magnitude normalization self.x_train, self.mean_train, self.std_train = normalize_light_curves( self.x_train, minmax=False) self.x_val, self.mean_val, self.std_val = normalize_light_curves( self.x_val, minmax=False) self.x_test, self.mean_test, self.std_test = normalize_light_curves( self.x_test, minmax=False) # time normalization self.x_train = time_norm(self.x_train, log=True) self.x_test = time_norm(self.x_test, log=True) self.x_val = time_norm(self.x_val, log=True) self.average_precision = 0 if cv_oc: for key in self.family: flab = self.family[key] if flab == cv_oc[0]: try: print(key, flab, self.lab2idx[key], (self.y_test == self.lab2idx[key]).sum()) self.average_precision += ( self.y_test == self.lab2idx[key]).sum() except: print(key, flab, "not found") print(self.average_precision) print(len(self.y_test)) self.average_precision /= len(self.y_test) if cv_oc: print("{}, avg pre {}".format(cv_oc[0], self.average_precision)) self.seq_len_train = calculate_seq_len(self.x_train) self.seq_len_val = calculate_seq_len(self.x_val) self.seq_len_test = calculate_seq_len(self.x_test) # temporal class shift idx = 0 self.temp_labels_dict = {} for lab in np.unique(self.y_train): self.temp_labels_dict[lab] = idx idx += 1 self.y_train = np.array( [self.temp_labels_dict[lab] for lab in self.y_train]) self.y_val = np.array( [self.temp_labels_dict[lab] for lab in self.y_val]) self.n_inlier_classes = len(np.unique(self.y_train)) self.ndim = self.x_train.shape[2] self.train_dataset = MyDataset(self.x_train, self.y_train, self.mean_train, self.std_train, self.seq_len_train, device=args["d"]) self.val_dataset = MyDataset(self.x_val, self.y_val, self.mean_val, self.std_val, self.seq_len_val, device=args["d"]) self.test_dataset = MyDataset(self.x_test, self.y_test, self.mean_test, self.std_test, self.seq_len_test, device=args["d"]) # balancing labs, counts = np.unique(self.y_train, return_counts=True) # mask = labs != -99 # weights = 1 / counts[mask] # weights /= 2 * weights.sum() # weights = np.insert(weights, 0, 0.5) weights = 1 / counts weights /= weights.sum() sample_weight = np.zeros(len(self.y_train)) for i, lab in enumerate(labs): mask = self.y_train == lab sample_weight[mask] = weights[i] sampler = torch.utils.data.WeightedRandomSampler( sample_weight, len(sample_weight)) self.train_dataloader = DataLoader(self.train_dataset, batch_size=self.args["bs"], sampler=sampler, drop_last=True) self.val_dataloader = DataLoader(self.val_dataset, batch_size=self.args["bs"], shuffle=True, drop_last=True) self.test_dataloader = DataLoader(self.test_dataset, batch_size=self.args["bs"], shuffle=False)
def test_commits(self): rv = self.app.get( get_path( ['commits', utils.EXPECTED_BRANCH, utils.EXPECTED_RESOURCE])) assert json.loads(rv.data) == utils.load_json('commits_path.json')
def test_yolov3_classification(): y = YOLO_V3(utils.load_json('./unit_test/test_config2.json'))
def test_train_mbart_cc25_enro_script(self): env_vars_to_replace = { "$MAX_LEN": 64, "$BS": 64, "$GAS": 1, "$ENRO_DIR": self.data_dir, "facebook/mbart-large-cc25": MARIAN_MODEL, # "val_check_interval=0.25": "val_check_interval=1.0", "--learning_rate=3e-5": "--learning_rate 3e-4", "--num_train_epochs 6": "--num_train_epochs 1", } # Clean up bash script bash_script = (self.test_file_dir / "train_mbart_cc25_enro.sh" ).open().read().split("finetune.py")[1].strip() bash_script = bash_script.replace("\\\n", "").strip().replace('"$@"', "") for k, v in env_vars_to_replace.items(): bash_script = bash_script.replace(k, str(v)) output_dir = self.get_auto_remove_tmp_dir() # bash_script = bash_script.replace("--fp16 ", "") args = f""" --output_dir {output_dir} --tokenizer_name Helsinki-NLP/opus-mt-en-ro --sortish_sampler --do_predict --gpus 1 --freeze_encoder --n_train 40000 --n_val 500 --n_test 500 --fp16_opt_level O1 --num_sanity_val_steps 0 --eval_beams 2 """.split() # XXX: args.gpus > 1 : handle multi_gpu in the future testargs = ["finetune.py"] + bash_script.split() + args with patch.object(sys, "argv", testargs): parser = argparse.ArgumentParser() parser = pl.Trainer.add_argparse_args(parser) parser = SummarizationModule.add_model_specific_args( parser, os.getcwd()) args = parser.parse_args() model = main(args) # Check metrics metrics = load_json(model.metrics_save_path) first_step_stats = metrics["val"][0] last_step_stats = metrics["val"][-1] self.assertEqual(len(metrics["val"]), (args.max_epochs / args.val_check_interval)) assert isinstance(last_step_stats[f"val_avg_{model.val_metric}"], float) self.assertGreater(last_step_stats["val_avg_gen_time"], 0.01) # model hanging on generate. Maybe bad config was saved. (XXX: old comment/assert?) self.assertLessEqual(last_step_stats["val_avg_gen_time"], 1.0) # test learning requirements: # 1. BLEU improves over the course of training by more than 2 pts self.assertGreater( last_step_stats["val_avg_bleu"] - first_step_stats["val_avg_bleu"], 2) # 2. BLEU finishes above 17 self.assertGreater(last_step_stats["val_avg_bleu"], 17) # 3. test BLEU and val BLEU within ~1.1 pt. self.assertLess( abs(metrics["val"][-1]["val_avg_bleu"] - metrics["test"][-1]["test_avg_bleu"]), 1.1) # check lightning ckpt can be loaded and has a reasonable statedict contents = os.listdir(output_dir) ckpt_path = [x for x in contents if x.endswith(".ckpt")][0] full_path = os.path.join(args.output_dir, ckpt_path) ckpt = torch.load(full_path, map_location="cpu") expected_key = "model.model.decoder.layers.0.encoder_attn_layer_norm.weight" assert expected_key in ckpt["state_dict"] assert ckpt["state_dict"][ "model.model.decoder.layers.0.encoder_attn_layer_norm.weight"].dtype == torch.float32 # TODO: turn on args.do_predict when PL bug fixed. if args.do_predict: contents = {os.path.basename(p) for p in contents} assert "align_wnums_test_generations.txt" in contents assert "test_results.txt" in contents # assert len(metrics["val"]) == desired_n_evals assert len(metrics["test"]) == 1
async def get_persons_photos(kp: KP, assets_folder: str): persons = load_json('parser/persons.json') print("Persons started") await asyncio.gather(*_get_tasks(persons, kp.get_person_photo, assets_folder=assets_folder)) print("Persons Done")
def test_tags(self): rv = self.app.get(get_path(['tags'])) tags = json.loads(rv.data) tags['host'] = utils.EXPECTED_HOST assert tags == utils.load_json('tags.json')
def main(): # 读取配置 config = utils.load_json("./config.json") data_conf, model_conf, train_conf = (config["data"], config["model"], config["train"]) device = torch.device(train_conf["device"]) task_name = train_conf["name"] if train_conf["name"] is not None\ else utils.task_name_generate() # 读取数据集 train_trans = [ SampleTransfer(data_conf["npoints"], data_conf["sample_method"]), CircleNormTransfer(), RandomRotationTransfer(), RandomJitterTransfer(data_conf["jitter_std"], data_conf["jitter_clip"]) ] eval_trans = [ SampleTransfer(data_conf["npoints"], data_conf["sample_method"]), CircleNormTransfer(), ] train_dat = FileDataset.ModelNet(config["data"]["dir"], phase="train") test_dat = FileDataset.ModelNet( config["data"]["dir"], phase="test", label_encoder=train_dat.kwargs["label_encoder"]) train_dat, eval_dat = train_dat.split(0.1, True, config["seed"], True) train_dat.set_transfers(*train_trans) eval_dat.set_transfers(*eval_trans) test_dat.set_transfers(*eval_trans) loaders = { "train": data.DataLoader(train_dat, train_conf["batch_size"], True, num_workers=train_conf["njobs"]), "eval": data.DataLoader(eval_dat, train_conf["batch_size"], False, num_workers=train_conf["njobs"]), "test": data.DataLoader(test_dat, train_conf["batch_size"], False, num_workers=train_conf["njobs"]) } # 构建模型 net = PointNet(train_dat.channels, train_dat.nlabels, **model_conf["pointnet"], stn3_kwargs=model_conf["stn3"], stnk_kwargs=model_conf["stnk"]).to(device) criterion = CEwithReg(model_conf["reg_w"]) optimizer = optim.Adam(net.parameters(), lr=train_conf["lr"]) # 训练 net, hist, best = train(net, criterion, optimizer, loaders, train_conf["epoch"], device, task_name) test_loss, test_acc = evaluate(net, criterion, loaders["test"], device) best["test_loss"] = test_loss best["test_acc"] = test_acc # 保存结果 task_dir = os.path.join("RESULTS", task_name) if not os.path.exists(task_dir): os.makedirs(task_dir) torch.save(net, os.path.join(task_dir, "model.pth")) utils.dump_json(best, os.path.join(task_dir, "best.json")) utils.dump_json(hist, os.path.join(task_dir, "hist.json")) utils.dump_json(config, os.path.join(task_dir, "config.json"))
movie_json['age_rating'], 'year': int(movie_json['year']), 'budget': 5_000_000 } if movie_genres: fields['genres'] = movie_genres return fields if __name__ == '__main__': from utils import load_json, save_json try: movie_list_json = load_json('movies.json') person_list_json = load_json('persons.json') except FileNotFoundError: print('Run src/parser/parser.py to get raw json data') raise movies_model_list = [ { 'model': 'movies.movietype', 'pk': 1, 'fields': { 'title': 'Сериал' } }, { 'model': 'movies.movietype',