def generate_datasets(dataset, split, order): basedir = '../Data/%s' % dataset caption_ref = read_json(osp.join(basedir, 'caption_ref.json')) vocab = read_pkl(osp.join(basedir, 'dictionary.pkl')) salient_mapping = read_pkl('../Data/GloVe/vocab_pre_glove.pkl') file_mapping = read_pkl(osp.join(basedir, 'id2path_mapping.pkl')) detected_words_file = [ osp.join(basedir, 'visual_concept_%s.json' % set_name) for set_name in split ] # Generate caption mapping caption_mapping = {} for set_name in split: if 'official' not in set_name: inputs = caption_ref['%s_cap' % set_name] for input in inputs: if input['id'] not in caption_mapping: caption_mapping[input['id']] = [input['text']] else: caption_mapping[input['id']].append(input['text']) # Training Data for index, name in enumerate(split): detected_words = read_json(detected_words_file[index]) file_name = osp.join(basedir, 'feats_%s.h5' % name) # Generate image related features if not osp.exists(file_name): f = h5py.File(file_name, "w") semantic_flatten = f.create_dataset( "semantic_feats", (len(detected_words), order * 300), dtype="float32") else: f = h5py.File(file_name, "r+") semantic_flatten = f["/semantic_feats"] # Generate captions and imageid caption_flatten = [] start_time = time.time() # Generate semantic features and captions files for i, item in enumerate(detected_words): words, imgid = item['text'], item['id'] semantic_flatten[i, ...] = numpy.hstack( [salient_mapping[vocab[w]] for w in words]) if len(caption_mapping) > 0: for j in range(5): caption_flatten.append( [caption_mapping[imgid][j], i, imgid]) else: caption_flatten.append(["UNKNOWN SENTENCES", i, imgid]) if i % 5000 == 0 and i > 0: print "used time: ", time.time() - start_time start_time = time.time() print "finished %d images" % i # Write into captions with open(osp.join(basedir, 'captions_%s.json' % name), 'w') as f: json.dump(caption_flatten, f)
def check_requests_and_responses(self): num_sites_w_reqs = 0 num_sites_w_more_responses = 0 num_sites_w_no_responses = 0 requests = read_json( join(self.root_analysis_dir, "%s%s" % (self.crawl_name, NUM_REQUESTS_JSON))) responses = read_json( join(self.root_analysis_dir, "%s%s" % (self.crawl_name, NUM_RESPONSES_JSON))) for domain, num_requests in requests.iteritems(): num_sites_w_reqs += 1 num_responses = responses.get(domain, 0) if num_responses > num_requests: num_sites_w_more_responses += 1 elif not num_responses: num_sites_w_no_responses += 1 # to debug the domain # print self.crawl_name, domain, num_requests, num_responses # See, https://github.com/citp/openwpm-data-release/issues/1#issuecomment-415886730 # noqa # if num_sites_w_more_responses: # print (num_sites_w_more_responses, # "have more response than requests. Total:", # num_sites_w_reqs) self.metrics.rate_visits_without_responses = ( num_sites_w_no_responses / num_sites_w_reqs) # print "Num. sites with zero responses", num_sites_w_no_responses self.check_missing_requests(requests, responses)
def calc_vsm_perform(similarity_func=calc_inner_product): if similarity_func.__name__ not in [ calc_cosine.__name__, calc_inner_product.__name__, calc_jaccard.__name__ ]: print('错误的输入相似度计算函数...') return print('正在加载训练集的预处理文件...') if file_exists(preprocess_path): res_lst = read_json(preprocess_path) # 加载训练集初步处理后的文件 else: res_lst = read_json(train_path) # 加载训练集源文件 for question in res_lst: question['question'] = seg_line(question['question']) write_json(preprocess_path, res_lst) print('正在计算相似度...') res = {} for item in res_lst: q_words, pid = {}, item['pid'] for word in item['question']: q_words[word] = q_words.get(word, 0) + 1 query_dic = { word: idf.get(word, 0) * (1 + log(tf, 10)) for word, tf in q_words.items() } pred_pid = similarity_func(query_dic)[0][0] res[item['qid']] = int(pred_pid) == pid print('进度: %.2f%%' % (len(res) / len(res_lst) * 100)) return len(list(filter(lambda val: res[val], res))) / len(res)
def project_times(self, dir: str): """ Times with both solves for the project. """ durable_status_path = os.path.join(dir, 'package', 'experiment.json') transient_status_path = os.path.join(dir, 'package', 'error.json') if os.path.exists(durable_status_path): p_result = read_json(durable_status_path) elif os.path.exists(transient_status_path): p_result = read_json(transient_status_path) # HACK(arjun): This bit of info should have been written into # experiment.json. with open(os.path.join(dir, 'package', 'experiment.out'), 'r') as f: output_lines = f.readlines() if 'npm ERR! Failed to solve constraints :(\n' in output_lines: p_result['status'] = 'unsat' else: print(f'No status for {dir}') p_result = {'status': 'unavailable'} status = p_result['reason'] if 'reason' in p_result else p_result[ 'status'] time = p_result['time'] if 'time' in p_result else None return (time, self.num_deps(dir), status)
def load_and_preprocess_data(args): logger.info("Loading training data...") train = read_json('../../data/squad/train') logger.info("Done. Read %d sentences", len(train)) logger.info("Loading dev data...") dev = read_json('../../data/squad/val') logger.info("Done. Read %d sentences", len(dev)) #print('step1:',train[0]) # concat questions and context train_concat = [] for question, context, ans in train: if question != []: ans_long = ["NOT"] * len(question + context) ans_long[len(question) + ans[0]:len(question) + ans[1]] = ["ANS"] * (ans[1] - ans[0] + 1) train_concat.append((question + context, ans_long)) #print(train_concat[0]) dev_concat = [] for question, context, ans in dev: if question != []: ans_long = ["NOT"] * len(question + context) ans_long[len(question) + ans[0]:len(question) + ans[1]] = ["ANS"] * (ans[1] - ans[0] + 1) dev_concat.append((question + context, ans_long)) helper = ModelHelper.build(train_concat) # now process all the input data. train_data = helper.vectorize(train_concat) dev_data = helper.vectorize(dev_concat) return helper, train_data, dev_data, train, dev
def find_threads(course, forum_folder, forum_id): """ Find all threads in current forum. Note: forum 0 has every thread! """ # download the 1st page of given forum query = 'sort=firstposted&page=1' url = '{}/api/forum/forums/{}/threads?{}' url = url.format(course.get_url(), forum_id, query) path = forum_folder + '/temp.json' util.download(url, path, course.get_cookie_file()) # download a huge page with all threads forum = util.read_json(path) num_threads = forum['total_threads'] url += '&page_size={}'.format(num_threads) util.download(url, path, course.get_cookie_file()) # add each thread's id to forum info threads = util.read_json(path)['threads'] util.remove(path) path = forum_folder + '/info.json' forum = util.read_json(path) forum_threads = [] for thread in reversed(threads): forum_threads.append({'id': thread['id']}) forum['num_threads'] = num_threads forum['threads'] = forum_threads util.write_json(path, forum)
def read_data(): with open("data/test.json") as f: # 标准答案 standard = read_json(f) with open("result/test_pred.json") as f: result = read_json(f) return standard, result
def main(): league = util.get_league_name() prj = adj.get_projections(c.PROJECTIONS_PATH) scoring_dict = util.read_json(league, c.SCORING) roster_dict = util.read_json(league, c.ROSTER) score_keys, score_vals = util.separate_kv(scoring_dict) pos_dataframe_dict = {} flex_points = [] flex_starters = 10 + sum(roster_dict[f] * roster_dict[c.LEAGUE_SIZE] for f in c.FLEX_POSITIONS) for position, pos_df in prj.items(): pos_df = pos_df.drop(c.POINTS, axis=1) # Clean up player names names, teams, pos = adj.separate_names_teams_pos(pos_df[c.PLAYER]) pos_df[c.PLAYER] = names pos_df.insert(2, c.TEAM, teams) pos_df.insert(3, c.POS, pos) # Calculate league points and remove original points new_pts = adj.adj_scoring(pos_df[score_keys], np.array(score_vals)) pos_df.insert(4, c.POINTS, new_pts) pos_df[c.RANK] = pos_df[c.POINTS].rank(method="max", ascending=False) pos_df = pos_df.set_index(c.RANK) pos_df.insert(4, "PPG", nrm.calc_weekly(new_pts)) pos_df = adj.sort_by_pts(pos_df) # Add plus and norm stats pos_starters = roster_dict[position] * roster_dict[c.LEAGUE_SIZE] pos_plus = nrm.calc_plus(pos_df[c.POINTS], pos_starters) pos_df.insert(5, "POS+", pos_plus) if position in c.FLEX_POSITIONS: top_values = pos_df[c.POINTS].iloc[:flex_starters].values.tolist() flex_points.extend(top_values) pos_df = pos_df.round(3) pos_dataframe_dict[position] = pos_df flex_points.sort(reverse=True) flex_points = flex_points[:flex_starters] for pos in c.FLEX_POSITIONS: pos_df = pos_dataframe_dict[pos] flex_plus = pos_df[c.POINTS].divide(pd.Series(flex_points).mean()) pos_df.insert(6, "FLEX+", flex_plus.round(3)) print(pos_df) today_date = datetime.datetime.today().strftime('%Y-%m-%d') excel_file = "{}/updated_projections_{}.xlsx".format(league, today_date) writer = pd.ExcelWriter(excel_file, engine="xlsxwriter") workbook = writer.book for pos, pos_df in pos_dataframe_dict.items(): worksheet = workbook.add_worksheet(pos) writer.sheets[pos] = worksheet pos_df.to_excel(writer, sheet_name=pos)
def adv_onlyOnePacker(modelpath, confspath, featurespath): global clf, benign_feature_names, features_df cur_dir = os.path.dirname(os.path.abspath(__file__)) confspath = os.path.abspath(os.path.join(cur_dir, confspath)) res = util.read_json(confspath) featurespath = os.path.abspath(os.path.join(cur_dir, featurespath)) features = util.read_json(featurespath) features = [[w, f] for w, f in zip(features['weights'], features['features'])] features_sorted = sorted(features, reverse=True) feature_names_sorted = [f for _, f in features_sorted] feature_weights_sorted = [w for w, _ in features_sorted] clf = joblib.load(modelpath) assert res and clf confs = json.loads(res['0.5']['1.0']['confidence']) # feature_names = json.loads(json.loads(res['1.0']['1.0']['features'])) malconfs = { id: val for id, val in confs.items() if val['label'] == 1 and val['predict'] == 1 } lowconfs = {id: v for id, v in malconfs.items() if v['conf'] <= 0.6} global df, dfb, dfm df = util.load_wildlab_df() features_df = [f for f in df.columns if f in feature_names_sorted] # df = df[df.packer_name == packer] dfb = df[df.benign] dfm = df[df.malicious] benign_features = get_benign_features(feature_names_sorted, feature_weights_sorted, dfb, dfm) benign_feature_weights = [w for w, _, _, _ in benign_features] benign_feature_names = [f for _, f, _, _ in benign_features] data = [] for sample_id, value in malconfs.items(): cur_conf = confs[sample_id]['conf'] data.append([sample_id, cur_conf]) print("generating adv. samples for {} samples".format(len(data))) with multiprocessing.Pool() as p: res = p.map(attack, data) res = { sample_id: { 'log': r, 'initConf': cur_conf, 'finalConf': final_conf, 'minChanges': min_changes, 'maxChanges': max_changes } for r, cur_conf, final_conf, sample_id, min_changes, max_changes in res } resdir = '../../../results/paper/experiments/exp-adversarial' if not os.path.exists(resdir): os.makedirs(resdir) with open('{}/malconfs-adv.json'.format(resdir), 'w') as f: json.dump(res, f)
def crawl_ni(use_local=False): headers = {"X-PowerBI-ResourceKey": "df16636e-99fe-4801-a5a1-20466a39f7bf"} request_json = read_json("data/raw/ni/request-cumulative-tests.json") if use_local: file = "data/raw/ni/response-cumulative-tests.json" else: file = "https://wabi-north-europe-api.analysis.windows.net/public/reports/querydata?synchronous=true" json_data = read_json_post(file, headers, request_json) tests = json_data["results"][0]["result"]["data"]["dsr"]["DS"][0]["PH"][0]["DM0"] tests = {datetime.datetime.fromtimestamp(elt["C"][0] / 1000).strftime('%Y-%m-%d'): elt["C"][1:] for elt in tests} df = pd.DataFrame.from_dict(tests, orient='index', columns=["Tests", "ConfirmedCases"]) df["Date"] = df.index df = df.fillna(method="ffill") # fill missing values from previous save_indicators_df_to_sqlite(df, "Northern Ireland", "Tests") save_indicators_df_to_sqlite(df, "Northern Ireland", "ConfirmedCases") request_json = read_json("data/raw/ni/request-cumulative-deaths.json") if use_local: file = "data/raw/ni/response-cumulative-deaths.json" else: file = "https://wabi-north-europe-api.analysis.windows.net/public/reports/querydata?synchronous=true" json_data = read_json_post(file, headers, request_json) deaths = json_data["results"][0]["result"]["data"]["dsr"]["DS"][0]["PH"][0]["DM0"] deaths_dict = {} for idx, elt in enumerate(deaths): date = datetime.datetime.fromtimestamp(elt["C"][0] / 1000).strftime('%Y-%m-%d') if len(elt["C"]) == 1 and elt.get("R", None) == 2: # R means repeat? # use previous value = [deaths[idx - 1]["C"][1]] else: value = [elt["C"][1]] deaths_dict[date] = value df = pd.DataFrame.from_dict(deaths_dict, orient='index', columns=["Deaths"]) df["Date"] = df.index save_indicators_df_to_sqlite(df, "Northern Ireland", "Deaths") request_json = read_json("data/raw/ni/request-area-cases.json") if use_local: file = "data/raw/ni/response-area-cases.json" else: file = "https://wabi-north-europe-api.analysis.windows.net/public/reports/querydata?synchronous=true" json_data = read_json_post(file, headers, request_json) area_cases = json_data["results"][0]["result"]["data"]["dsr"]["DS"][0]["PH"][1]["DM1"] area_cases = {elt["C"][0]: [elt["C"][2]] for elt in area_cases} df = pd.DataFrame.from_dict(area_cases, orient='index', columns=["TotalCases"]) df["Area"] = df.index df["AreaCode"] = df["Area"].apply(lambda lgd: lookup_local_government_district_code(lgd)) df["Country"] = "Northern Ireland" df["Date"] = json_data["results"][0]["result"]["data"]["timestamp"].split("T")[0] df = df[["Date", "Country", "AreaCode", "Area", "TotalCases"]] save_cases_df_to_sqlite(df, "Northern Ireland", delete_old=False)
def __init__(self, path="", comments_filename="comments.json", messages_filename="messages.json", comments=True, messages=True): self.comments = comments self.messages = messages self.comments_json_data = util.read_json(path + comments_filename) self.messages_json_data = util.read_json(path + messages_filename)
def train(args): wd = args.working_directory bp = util.read_json(os.path.join(wd, 'bp.json'), wd) if args.labels_path is None: raise ValueError('Please provide a valid labels path in --labels_path') if args.feats_path is None: if not hasattr(bp, 'train_feats_'): raise ValueError('Please provide a valid features path in --feats_path') feats_path = bp.train_feats_ else: feats_path = args.feats_path if args.clf is None: clf = LinearSVC('l1', dual=False, class_weight='balanced') else: clf = util.json_to_clf(args.clf) X_train = pd.read_csv(feats_path, converters={'genome_id': str}).set_index('genome_id') y_train = util.read_labels(args.labels_path, X_train) clf.fit(X_train, y_train) if args.output is None: clf_path = os.path.join(wd, 'trained_clf.json') else: clf_path = args.output util.clf_to_json(clf, clf_path) bp.trained_clf_ = os.path.abspath(clf_path) bp.to_json(os.path.join(wd, 'bp.json')) print 'Trained classifier is stored at {}'.format(clf_path)
async def shop(self, ctx, page: int = 1): items = util.read_json('items.json') items_shop = items['shop'] embed = discord.Embed( title="Apollo's Shop", description= f"Welcome to Apollo's Shop!\nYou can purchase any of these items by typing `{ctx.prefix}purchase <itemcode>`", color=discord.Color.green()) pagesize = 7 start = (page - 1) * pagesize end = (page * pagesize) total_page = math.ceil(len(items_shop) / pagesize) for i in items_shop[start:end]: embed.add_field( name=f"{i['item_description']}", value= f"Price: {i['item_price']:,} - `itemcode: {i['item_code']}`", inline=False) if total_page > 1: embed.set_footer( text= f"Page {page}/{total_page}. Type {ctx.prefix}shop <number> to show another page" ) embed.add_field( name="Notes", value= f"If you want to turn in your items, Please exchange with this format\n`{ctx.prefix}exchange <item code> <quantity> <dodo code>`\n**Please be sure to open your island before turning in a token!**", inline=False) await ctx.send(embed=embed)
def download_thread(course, threads_folder, thread_id, page=1, post_id=None): """ Download a thread. """ # Download 1st page url = '{}/api/forum/threads/{}'.format(course.get_url(), thread_id) if post_id: url = '{}?post_id={}&position=after'.format(url, post_id) path = '{}/{}/{}.json'.format(threads_folder, thread_id, page) util.download(url, path, course.get_cookie_file()) thread = util.read_json(path) download_images(course, threads_folder, thread) util.write_json(path, thread) # Download rest pages page = thread['start_page'] num_page = thread['num_pages'] if page < num_page: page += 1 print 'thread page {}/{}'.format(page, num_page) post_id = get_next_post_id(thread['posts']) if post_id: download_thread(course, threads_folder, thread_id, page, post_id)
def undistort_image(frame, filename): """ Apply checkerboard calibration data to undistort lens images. Returns: The undistorted image Parameters: frame - image to be undistorted filename - name of the calibration data file """ calib_file, calib_data = util.read_json(filename) mat = util.get_numpy_matrix(calib_data, "camera_matrix") dist = util.get_numpy_matrix(calib_data, "dist_coeffs") new_mtx = util.get_numpy_matrix(calib_data, "new_camera_matrix") x = calib_data["roi"]["x"] y = calib_data["roi"]["y"] calib_file.close() # Undistort dst = cv2.undistort(frame, mat, dist, None, new_mtx) # Crop height, width = frame.shape[:2] dst = dst[y:y + height, x:x + width] return dst
def apply_transform(detections, filename): """ Apply matrix transformation from calibration file data to detection points. Returns: Transformed detections Parameters: detections - list of detections of the tags filename - name of the calibration data file """ # TODO apply the transform from calibration to the frame calib_file, calib_data = util.read_json(filename) transform_matrix = util.get_numpy_matrix(calib_data, "transform_matrix") camera_matrix = util.get_numpy_matrix(calib_data, "camera_matrix") dist_coeffs = util.get_numpy_matrix(calib_data, "dist_coeffs") x_offset = calib_data["offsets"]["x"] y_offset = calib_data["offsets"]["y"] detections = [{ "center": d.center, "corners": d.corners.copy() } for d in detections] # TODO fix detections list type for i in range(len(detections)): x, y, z, theta = util.compute_tag_undistorted_pose( camera_matrix, dist_coeffs, transform_matrix, detections[i], TAG_SIZE) x = MULT_FACTOR * (x + x_offset) y = MULT_FACTOR * (y + y_offset) detections[i].center = (x, y, z, theta) calib_file.close() return types.SimpleNamespace(**detections)
def predict(num=1): # num表示抽取的答案句数目 system('.\svm_rank_windows\svm_rank_classify.exe %s %s %s' % (test_feature_path, model_path, test_predict_path)) with open(test_feature_path, 'r', encoding='utf-8') as f1, open(test_predict_path, 'r', encoding='utf-8') as f2: labels = {} for line1, line2 in zip(f1, f2): if len(line1) == 1: break qid = int(line1.split()[1].split(':')[1]) if qid not in labels: labels[qid] = [] labels[qid].append((float(line2.strip()), len(labels[qid]))) seg_passages, res_lst = load_seg_passages(), read_json(test_path) for item in res_lst: # 遍历文件中的每一行query信息 qid, pid, q_words = item['qid'], item['pid'], item['question'] rank_lst, seg_passage = sorted( labels[qid], key=lambda val: val[0], reverse=True), seg_passages[str(pid)] item['answer_sentence'] = [ seg_passage[rank[1]] for rank in rank_lst[:num] ] # 抽取答案句 write_json(test_ans_path, res_lst)
def load_data(path, options, load_train=False, load_val=False, load_test=False, load_official_val=False, load_official_test=False): print "Loading data" data = [] worddict = read_pkl(osp.join(path, 'dictionary.pkl')) for opt in ('train', 'val', 'test', 'official_val', 'official_test'): if eval('load_%s' % opt): f = h5py.File(osp.join(path, 'Feats_%s.h5' % opt), 'r') f_caps = read_json(osp.join(path, 'captions_%s.json' % opt)) if options['semantic_dim'] == 2048: if 'regional_feats' in f.keys(): f_att = f['/regional_feats'] else: raise ValueError( "The dataset doesn't contain regional features") elif options['semantic_dim'] == 300: f_att = f['/semantic_feats'] else: raise ValueError("Unknown semantic dimension") if options['use_cnninit']: if options['cnn_type'] == 'vgg': f_cnn = f['/fc7_feats'] else: f_cnn = f['/pool5_feats'] else: f_cnn = numpy.zeros_like(f["/fc7_feats"][:], dtype='float32') exec "%s = (f_caps, f_att, f_cnn)" % opt data.append(opt) exec 'result = (%s, worddict)' % (', '.join(data)) return result
def build_uplift_requirements(repo_dir): if os.path.exists(requirements_file) and util.ask_yn("Found existing requirements. Should they be used?"): bug_info = util.read_json(requirements_file) else: bug_info = {} enabled_branches = c.read_value('repository.enabled_branches') all_queries = c.read_value('queries') queries = [] for branch in enabled_branches: queries.extend(all_queries[branch]) bugs = [x for x in find_bugs(queries) if not is_skipable(x)] print "Fetching bug data" for bug_id in bugs: if is_skipable(bug_id): continue bug = bzapi.fetch_complete_bug(bug_id) print "+", needed_on = branch_logic.needed_on_branches(bug) if len(needed_on) == 0: continue b = bug_info[bug_id] = {} b['needed_on'] = needed_on b['already_fixed_on'] = branch_logic.fixed_on_branches(bug) b['summary'] = bug['summary'] print "\nFinished fetching bug data" util.write_json(requirements_file, bug_info) return bug_info
def __init__(self, unittest_flag=False): if unittest_flag: # value not used, when we're testing will mock out call to read_json # below with actual translation table to use for test file_ = 'dummy_filename' else: config = util_mdtf.ConfigManager() file_ = os.path.join(config.paths.CODE_ROOT, 'src', 'cmip6-cmor-tables', 'Tables', 'CMIP6_CV.json') self._contents = util.read_json(file_) self._contents = self._contents['CV'] for k in [ 'product', 'version_metadata', 'required_global_attributes', 'further_info_url', 'Conventions', 'license' ]: del self._contents[k] # munge table_ids self._contents['table_id'] = dict.fromkeys(self._contents['table_id']) for tbl in self._contents['table_id']: self._contents['table_id'][tbl] = parse_mip_table_id(tbl) self.cv = dict() self._lookups = dict()
def plot_tree(res_path): scores_path = '{}/exp.db.json'.format(res_path) res = util.read_json(scores_path) for ratio in sorted(res.keys()): for ratio2 in sorted(res[ratio].keys()): features = json.loads(res[ratio][ratio2]['features']) features = ast.literal_eval(features) model_path = '{}/model-{}-{}.joblib'.format( res_path, ratio, ratio2) model = load_model(model_path) # Extract single tree idx = 0 for estimator in model.estimators_[:10]: idx += 1 out_file = '{}/trees/tree-{}-{}-tree-{}.dot'.format( res_path, ratio, ratio2, idx) out_file_png = '{}/trees/tree-{}-{}-tree-{}.png'.format( res_path, ratio, ratio2, idx) util.make_dir_for_file(out_file) # Export as dot file export_graphviz(estimator, out_file=out_file, class_names=['benign', 'malicious'], rounded=True, filled=True, proportion=True, feature_names=features) # Convert to png using system command (requires Graphviz) from subprocess import call call([ 'dot', '-Tpng', out_file, '-o', out_file_png, '-Gdpi=600' ])
def setUp(self): if self.args.repeatable and not self.repeatable: self.skipTest('%s is not repeatable' % self.__class__.__name__) if self.restart_browser or not self.b.driver: if self.b.driver: print('Restarting browser...') self.b.quit() self.b.start() self.b.set_window_size(1024, 768) if self.run_in_pixiv: conf = util.read_json( os.path.join(self.rootdir, 'src', 'data', 'config.json')) if not self.url.startswith('http://www.pixiv.net/'): self.open('/') if not self.is_logged_in(): self.login() js = [] for section in conf: for item in section['items']: js.append('pixplus.conf.%s.%s=%s' % (section['name'], item['key'], json.dumps(item['value']))) self.js(';'.join(js))
def _generate_transfers(self) -> Dict[str, List[TransferDefinition]]: """ Generate TransferDefinitions based on transfer-config.json, containing those ports that have a base training for transferring to another port :return: Dict of key = target_port_name, val = List of TransferDefinition """ config = read_json(self.config_path) transfer_defs = {} ports = list(config["ports"]) permutations = list(itertools.permutations(ports, r=2)) # for pair in _permute(config["ports"]): for pair in permutations: base_port, target_port = self.pm.find_port( pair[0]), self.pm.find_port(pair[1]) if target_port is None: raise ValueError( f"No port found: Unable to transfer from base-port with name '{base_port.name}'" ) if target_port is None: raise ValueError( f"No port found: Unable to transfer to target-port with name '{pair[1]}'" ) trainings = self.pm.load_trainings(base_port, self.output_dir, self.routes_dir, training_type="base") # print(f"loaded trainings. base port {base_port.name}:\n{trainings.keys()}") if len(trainings.keys()) < 1: print( f"No base-training found for port '{base_port.name}'. Skipping" ) continue training = list(trainings.values())[-1][0] # print(f"training ({len(trainings.values())}): {training}") # print(f"Pair {base_port.name} ({len(trainings)} base-trains) -> {target_port.name}. " # f"Using latest at '{training.start_time}'") verify_output_dir(self.output_dir, target_port.name) td = TransferDefinition( base_port_name=base_port.name, base_model_path=training.model_path, target_port_name=target_port.name, target_routes_dir=os.path.join(self.routes_dir, target_port.name), target_model_dir=os.path.join(self.output_dir, "model", target_port.name), target_output_data_dir=os.path.join(self.output_dir, "data", target_port.name), target_plot_dir=os.path.join(self.output_dir, "plot", target_port.name), target_log_dir=os.path.join(self.output_dir, "log", target_port.name)) name = target_port.name if name in transfer_defs: transfer_defs[target_port.name].append(td) else: transfer_defs[target_port.name] = [td] return transfer_defs
def main(): try: global conf, uid, key, ip, port, sock, status_file conf = read_json(os.path.join(os.path.dirname(__file__), config_path)) sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) socket.setdefaulttimeout(3) sock.settimeout(3) status_file = open(status_path, "w") #sock.setblocking(False) ip = conf['remote'] port = conf['remote_port'] if isfile(key_path): key = read_text(key_path) else: prompt_for_key() if isfile(uid_path): uid = read_text(uid_path) else: uid = gen_id() write_text(uid_path, uid) check_alive() #schedule.every(conf['interval']).seconds.do(check_alive) schedule.every(5).seconds.do(check_alive) print("client is running") while True: schedule.run_pending() except KeyboardInterrupt: print('manual exit') post_request("GBYE") sock.close()
def set_cli_defaults(code_root, cli_config, install_config): """Write install-time configuration options to the cli.jsonc file used to set run-time default values. """ def _set_cli_default(template, name, default): template[name] = default in_path = os.path.join(code_root, cli_config['config_in']) out_path = os.path.join(code_root, cli_config['config_out']) print("Writing default settings to {}".format(out_path)) try: cli_template = util.read_json(in_path) except Exception as exc: fatal_exception_handler(exc, "ERROR: Couldn't read {}.".format(in_path)) for key in cli_config['default_keys']: try: _set_cli_default(cli_template, key, install_config[key]) except Exception as exc: fatal_exception_handler(exc, "ERROR: {} not set".format(key)) if os.path.exists(out_path): print("{} exists; overwriting".format(out_path)) os.remove(out_path) try: util.write_json(cli_template, out_path, sort_keys=False) except Exception as exc: fatal_exception_handler(exc, "ERROR: Couldn't write {}.".format(out_path))
def recipe_id_to_ing(): ''' Create a dataframe with recipe id and ingredients ''' ori = {} dish = util.read_json(name_json) clean = pd.read_csv('cleaned_ingredients.csv', names=['id', 'origin', 'n', 'q', 'cleaned'], sep='|') clean = clean[clean['cleaned'].notna()] keys = dish.keys() id_tracker = 1 for k in keys: for course in dish[k]: ings = course.get("ingredients", []) for ing in ings: ing = ing.replace('�', '') result = clean[clean['origin'] == ing] if len(result) > 0: ori[result.iloc[0]['cleaned']] = id_tracker id_tracker += 1 df = pd.DataFrame.from_dict(ori, orient='index').reset_index() df.columns = ["origin", "recipe_id"] return df
def process_parties(parties): """Process contributors for plan and separate them in authors and contributors """ roles = read_json("data/CI_RoleCode.json") creators = [] contributors = [] for p in parties: party = process_party(p, roles) if p['role'] in ['author']: creators.append(party) else: contributors.append(party) # make sure there's at least 1 author otherwise use owner/`rightsholder` if len(creators) == 0: i = 0 for p in contributors: owners_ind = [] if p['role']['id'] == 'rightsholder': # add it to creators creators.append(p) owners_ind.append(i) i += 1 # now remove owners from contributors list [contributors.pop(i) for i in owners_ind] return creators, contributors
def __init__(self, code_root, cli_rel_path): cli_config = util.read_json(os.path.join(code_root, cli_rel_path)) self.case_list = cli_config.pop('case_list', []) self.pod_list = cli_config.pop('pod_list', []) super(FrameworkCLIHandler, self).__init__(code_root, cli_config, partial_defaults=None)
def calculate_oldness(package_name, package_lock_path): packages = read_json(package_lock_path)['packages'] # Returns 3 things: # - 'error' upon an error # - 'ignored' if a node should be ignored on purpose (we found a link but we choose not to follow links) # - A float def oldness_of_node(node_name, follow_links=True): # node_name is an unstripped key in packages. metadata = packages[node_name] if 'link' in metadata and metadata['link']: if follow_links: return oldness_of_node(metadata['resolved'], follow_links=follow_links) else: return 'ignored' elif 'version' not in metadata: print(f'Error (ignored): no version for {node_name} in {package_lock_path}', file=sys.stderr) return 'error' else: version = metadata['version'] stripped_name = strip_node_modules_from_name(node_name) try: oldness = subprocess.check_output([ '../version-oldness/version-oldness.sh', stripped_name, version], stderr=DUMP_ERRORS ).decode( 'utf-8', errors='ignore' ).strip() return float(oldness) except KeyboardInterrupt as e: raise e except BaseException as e: print(f'Error (ignored): version-oldness {node_name} {version}', file=sys.stderr) return 'error' aggregate_oldness = 0 num_packages = 0 for name in packages.keys(): # NOTE(arjun, donald): This does not credit NPM appropriately for flattening the # the tree and makes it look better than it is. For now, follow_links=False. oldness_maybe = oldness_of_node(name, follow_links=False) if oldness_maybe == 'error': pass # we already logged the error elif oldness_maybe == 'ignored': print(f'Warning (ignored): choose to ignore {name} in {package_lock_path}', file=sys.stderr) else: aggregate_oldness += oldness_maybe num_packages += 1 if num_packages == 0: return f'{package_name},0' return f'{package_name},{aggregate_oldness / num_packages}'
def add_other_2_area(path): data = read_json(path) for i, d in enumerate(data): if d['label'] == '': data[i]['path'] = [] for child in d['children']: data[i]['path'].extend(data[child]['path']) json.dump(data, open(path, 'w'))
def download_threads(course, forums_folder, threads_folder): """ Download every thread (in forum 0). """ forum = util.read_json(forums_folder + '/info.json') for idx, thread in enumerate(forum['threads']): print 'thread {}/{}'.format(idx + 1, forum['num_threads']) download_thread(course, threads_folder, thread['id'])
def check_single_child(path): flag = False data = read_json(path) for d in data: if len(d['path']) == 1 and d['parent'] == -1 and d['label'] == '': flag = True if flag: return path
def read_num_entries(self): num_entries = read_json( join(self.root_analysis_dir, "%s%s" % (self.crawl_name, NUM_ENTRIES_JSON))) self.metrics.num_requests = num_entries[HTTP_REQUESTS_TABLE] self.metrics.num_responses = num_entries[HTTP_RESPONSES_TABLE] self.metrics.num_javascript = num_entries[JAVASCRIPT_TABLE]
def _download_old_quizzes(course, item, path): """ Download old version in-video quizzes. """ url = '{}/admin/quiz/quiz_load?quiz_id={}' url = url.format(course.get_url(), item['quiz']['parent_id']) util.download(url, path, course.get_cookie_file()) util.write_json(path, util.read_json(path))
def add_shower_curtain(path_list): for p in path_list: data=read_json(p) for i,d in enumerate(data): if 'shower' in d['label']: data[i]['label']='shower curtain' with open(p,'w')as fp: json.dump(data,fp)
def get_data(api): response = 'temp.json' url = 'https://mediacosmos.rice.edu/api/' + api cmd = 'curl "{}" -o "{}" --cookie {}'.format(url, response, COOKIE) os.system(cmd) data = util.read_json(response) util.remove(response) return data
def is_skipable(bug_id): # This is a bad idea. The program should really use integer bug ids everywhere _bi = int(bug_id) skip_bugs = util.read_json(skip_bugs_file) if not skip_bugs: skip_bugs = [] for skip_bug in skip_bugs: if _bi == skip_bug: return True return False
def run(library_id): # Joe's library_id: # 457006af-2dd9-4f5a-a310-8fcef3d02eeb upload_access = get_upload_access(library_id) videos = util.read_json('videos.json') num_videos = len(videos) for idx, (path, title) in enumerate(videos.values()): print '{}/{}'.format(idx + 1, num_videos), path upload(path, title, upload_access)
def get_nodes(): nodes_data = util.read_json(NODE_DATA, INIT_DATA_DIR) nodes_list = nodes_data[u"elements"] nodes_dict = dict() for item in nodes_list: if item[u"type"] == u"node": tmp_id = item[u"id"] tmp_lat = item[u"lat"] tmp_lon = item[u"lon"] nodes_dict[tmp_id] = {u"lat": tmp_lat, u"lon": tmp_lon} return nodes_dict
def construct_cache(): words = set() dictionary = read_json(config.WORD_TRANSLATION_JSON_CORRECTED) for word in dictionary: for trans in dictionary[word]: if ';' in trans[0]: words |= set(map(lambda s: s.strip(), trans[0].split(';'))) else: words.add(trans[0].strip()) phrase = '\n'.join(words) print phrase print ngram(phrase)
def extract_feats(args): wd = args.working_directory bp = util.read_json(os.path.join(wd, 'bp.json'), wd) X = bp.extract_features(args.feats_type, args.clusters_dir) if args.output is None: feats_path = os.path.join(wd, '{}_feats.csv'.format(args.feats_type)) else: feats_path = args.output X.to_csv(feats_path) setattr(bp, '{}_feats_'.format(args.feats_type), feats_path) print '{} feats stored in {}'.format('Prediction' if args.feats_type == 'pred' else 'Training', feats_path) bp.to_json(os.path.join(wd, 'bp.json'))
def npm_publish(root, module, args): module_dir = os.path.join(root, module["folder"]) package_json = os.path.join(module_dir, "package.json") if os.path.exists(package_json): package = read_json(package_json) if not package.private: print("Publishing sub-module: %s" %module["folder"]) cmd = ["npm", "publish"] if (args['force'] == True): cmd.append("--force") p = subprocess.Popen(cmd, cwd=module_dir) p.communicate()
def download_info(self): url = self.url temp = self.info_folder + '/temp.html' util.download(url, temp, self.cookie_file) page_html = util.read_file(temp) util.remove(temp) info_files = ['user.json', 'course.json', 'sidebar.json'] matches = re.findall(r'JSON\.parse\("(.*?)"\);', page_html) for match, info_file in zip(matches, info_files)[1:]: info = util.unicode_unescape(match).replace('\\\\', '') path = '{}/{}'.format(self.info_folder, info_file) util.write_json(path, util.read_json(info, True))
def load_credentials(credentials_file="~/.bzapi_credentials"): """ I know how to load a BzAPI credentials file (json dict). I should probably be taught how to encrypt and decrypt this info""" cf = os.path.expanduser(credentials_file) if not os.path.exists(cf) or os.path.isdir(cf): raise InvalidBZAPICredentials("credentials file is not found: %s" % cf) try: data = util.read_json(cf) except IOError as ioe: raise InvalidBZAPICredentials("could not read credentials file: %s" % ioe) if data.has_key('username') and data.has_key('password'): return {'username': data['username'], 'password': data['password']} raise InvalidBZAPICredentials("credentials file did not have a username and password")
def npm_symlinks(root, module): module_dir = os.path.join(root, module["folder"]) module_config_file = os.path.join(module_dir, "package.json"); if os.path.exists(module_config_file): module_config = read_json(module_config_file) deps = [] if ("dependencies" in module_config): deps += [dep for dep in module_config["dependencies"] if dep.startswith("substance")] if ("devDependencies" in module_config): deps += [dep for dep in module_config["devDependencies"] if dep.startswith("substance")] for dep in deps: create_symlink(root, os.path.join("node_modules", dep), os.path.join(module_dir, "node_modules", dep))
def _download_new_quizzes(course, item, path): """ Download new version in-video quizzes. """ # Step 1, download a HTML that has quiz ID. url = '{}/lecture/view?quiz_v2_admin=1&lecture_id={}' url = url.format(course.get_url(), item['parent_id']) util.download(url, path, course.get_cookie_file()) pattern = r'v2-classId="(.*?)".*?v2-id="(.*?)".*?v2-lecture-id="(.*?)"' find = re.search(pattern, util.read_file(path), re.DOTALL) class_id, v2_id, lecture_id = find.group(1, 2, 3) # if no quiz in this video, delete the file if not v2_id: util.remove(path) return # Step 2, download a JSON that has question ID. class_url = 'https://class.coursera.org/api/assess/v1/inVideo/class/' + class_id url = '{}/lecture/{}/{}'.format(class_url, lecture_id, v2_id) util.download(url, path, course.get_cookie_file()) # Step 3, download each question. quiz = util.read_json(path) questions = quiz['assessment']['definition']['questions'] for question_id, question in questions.items(): url = '{}/questions/{}'.format(class_url, question_id) util.download(url, path, course.get_cookie_file()) question_json = util.read_json(path) # add question content to quiz question['metadata'] = question_json['metadata'] question['data'] = question_json['question'] # write the whole quiz to file util.write_json(path, quiz)
def download(course, item): """ Download a wiki page. :param course: A Course object. :param item: { "uid": "coursepageEYJIs_YAEeKNdCIACugoiw", "section_id": "27", "order": "1", "item_type": "coursepage", "__type": "coursepage", "item_id": "EYJIs_YAEeKNdCIACugoiw", "id": "EYJIs_YAEeKNdCIACugoiw", "metadata": { "openTime": 1373785724930, "locked": true, "creator": 726142, "title": "Home", "modified": 1405321775510, "canonicalName": "home", "created": 1374849092873, "visible": true, "version": 11 } } :return: None. """ # path = '{}/wiki/info/{}.json' # path = path.format(course.get_folder(), item['metadata']['canonicalName']) # # util.make_folder(path, True) # util.write_json(path, item) # url = '{}/admin/api/pages/{}?fields=content' url = url.format(course.get_url(), item['item_id']) path = '{}/wiki/{}.html' path = path.format(course.get_folder(), item['metadata']['canonicalName']) util.download(url, path, course.get_cookie_file()) wiki = util.read_json(path) content = wiki['content'] if content: content = util.remove_coursera_bad_formats(content) else: content = '' util.write_file(path, content)
def create_apache_docs(): config = util.read_json("apache/config.json") result = [] for item in config: path = APACHE_DOC_PATH_TEMPLATE.format(item) t = time.gmtime(os.path.getmtime(path)) r = { "doc": item, "stylesheet": config[item][0], "title": config[item][1], "descr": config[item][2], "keywords": config[item][3], "modified": time.strftime("%Y-%m-%d %H:%M:%S", t) } result.append(r) return result
def train_w2vector(features, words, sentence_dict_path, word2vec_path): file_path_train = util.data_prefix + r'resume_clean.json' json_all = read_json(file_path_train) sentences = [] level_zero = [u'size', u'salary'] level_two = [u'type', u'department', u'industry', u'position_name'] level_one = [u'major', u'gender'] dic_all = {} count2 = 0 all = 0 for i in json_all: id_one = i[u'id'] workExperienceList = i[u'workExperienceList'] all += 1 sentence = [] count = 0 for w in workExperienceList: if w == None: continue if len(w) > 2: count2 += 1 count += 1 if count == 1: continue for s in level_zero: try: if w[s] == None: sentence.append(s + u'1') else: sentence.append(s + str(w[s])) except Exception, e: sentence.append(s + u'1') for t in level_two: try: if w[t] == None: sentence.append(u'其他') else: sentence.append(w[t]) except Exception, e: sentence.append(u'其他') try: sentence.append(work_time(w['start_date'], w['end_date'])) except Exception, e: sentence.append(u'm4')
def download(self, item_type=None): type_filter = item_type if item_type in ('subtitle', 'original', 'compressed'): type_filter = 'lecture' download_queue = [] for section in util.read_json(self.section_file): for item in section['items']: if type_filter is None or type_filter == item['item_type']: download_queue.append(item) num_download = len(download_queue) for idx, item in enumerate(download_queue): print "%d/%d" % (idx + 1, num_download) if type_filter is None: item_type = item['item_type'] DOWNLOADER[item_type](self, item)
def shrinkwrap(self, args=None): print("Creating npm-shrinkwrap.json...") npm_shrinkwrap(self.root_dir) shrinkwrap_file = os.path.join(self.root_dir, "npm-shrinkwrap.json") shrinkwrap_conf = read_json(shrinkwrap_file) project_config = self.get_project_config() deps = shrinkwrap_conf['dependencies'] devDeps = shrinkwrap_conf['devDependencies'] if 'devDependencies' in shrinkwrap_conf else {} for m, __, conf in iterate_modules(self.root_dir, project_config): name = conf['name'] repo = "git+%s#%s"%(m['repository'],m['branch']) if name in deps: entry = deps[name] entry['from'] = repo if name in devDeps: entry = deps[name] entry['from'] = repo write_json(shrinkwrap_file, shrinkwrap_conf)
def download_subtitles(course, item): """ Download all subtitles of this video. """ if item['source_video']: url = '{}/admin/api/lectures/{}/subtitles' url = url.format(course.get_url(), item['item_id']) path = course.get_folder() + '/video/subtitles/temp.json' util.download(url, path, course.get_cookie_file()) subtitles = util.read_json(path) util.remove(path) for subtitle in subtitles: url = subtitle['srt_url'] if url: path = '{}/video/subtitles/{}.{}.srt' path = path.format(course.get_folder(), item['item_id'], subtitle['language']) util.download(url, path, course.get_cookie_file())
def find_forums(course, forum_folder, forum_id=0): """ Recursively find all sub-forums in current forum. Return a list of sub-forums and current forum. """ print 'crawling forum', forum_id url = '{}/api/forum/forums/{}'.format(course.get_url(), forum_id) path = forum_folder + '/info.json' util.download(url, path, course.get_cookie_file()) forum = util.read_json(path) util.write_json(path, forum) forums = [(forum_folder, forum_id)] for sub_forum in forum['subforums']: sub_folder = '{}/{}'.format(forum_folder, sub_forum['id']) # recursion forums += find_forums(course, sub_folder, sub_forum['id']) return forums
def get_ways_save_id2name(): ways_data = util.read_json(WAY_DATA, INIT_DATA_DIR) ways_list = ways_data[u"elements"] ways_dict = dict() ways_id2name = dict() for item in ways_list: if (item[u"type"] == u"way") and (u"tags" in item) and (u"highway" in item[u"tags"]): tmp_id = item[u"id"] nodes_list = list() for node_id in item[u"nodes"]: if Nodes_dict.has_key(node_id): tmp_node = Nodes_dict[node_id] tmp_lat = tmp_node[u"lat"] tmp_lon = tmp_node[u"lon"] nodes_list.append({u"lat": tmp_lat, u"lon": tmp_lon}) if len(nodes_list) > 1: ways_dict[tmp_id] = {u"highway": item[u"tags"][u"highway"], u"nodes": nodes_list} tmp_name = u"" if u"name" in item[u"tags"]: tmp_name = item[u"tags"][u"name"] ways_id2name[tmp_id] = tmp_name util.write_json(WAY_ID2NAME, INTER_DATA_DIR, ways_id2name) return ways_dict
def predict(args): wd = args.working_directory bp = util.read_json(os.path.join(wd, 'bp.json'), wd) if args.feats_path is None: if not hasattr(bp, 'pred_feats_'): raise ValueError('Please provide a valid features path in --feats_path') feats_path = bp.pred_feats_ else: feats_path = args.feats_path if args.clf is None: if not hasattr(bp, 'trained_clf_'): raise ValueError('Please provide a valid trained classifier path in --clf or run "train"') clf_path = bp.trained_clf_ else: clf_path = args.clf clf = util.json_to_clf(clf_path, LinearSVC) X_pred = pd.read_csv(feats_path, converters={'genome_id': str}).set_index('genome_id') y_pred = pd.Series(clf.predict(X_pred), X_pred.index) if args.output is None: pred_csv_path = os.path.join(wd, 'predictions.csv') else: pred_csv_path = args.output y_pred.to_csv(pred_csv_path) print 'Predictions stored at {}'.format(pred_csv_path)
def read_info(): grids = util.read_json(GRIDS_DICT, INTER_DATA_DIR) way_name = util.read_json(WAY_ID2NAME, INTER_DATA_DIR) map_info = util.read_json(MAP_INFO, INTER_DATA_DIR) speed_limit = util.read_json(SPEED_LIMIT, INIT_DATA_DIR) return grids, way_name, map_info, speed_limit
import cv2 import util import numpy as np import os from functools import partial from os.path import abspath COLS = 640 ROWS = 480 PATH = os.path.dirname(os.path.realpath(__file__)) # enable access to the json file from any directory in SDP absPathToJson = PATH+'/config/undistort.json' pitches = util.read_json(absPathToJson) def step(frame, pitch = 0): frame = undistort(frame, pitch) frame = perspective(frame, pitch) frame = translate(frame, pitch) frame = warp(frame, pitch) return frame def pitch_to_numpy(pitch=0): ret = {} for key, value in pitch.iteritems(): ret[key] = np.asarray(value) return ret
def download_section_file(self): url = '{}/admin/api/sections?course_id={}&full=1&drafts=1' url = url.format(self.url, self.id) path = self.section_file util.download(url, path, self.cookie_file) util.write_json(path, util.read_json(path))
def download_stories(self): url = self.url + '/data/api/reports/end_of_course_stories.json' path = self.info_folder + '/end_of_course_stories.json' util.download(url, path, self.cookie_file) util.write_json(path, util.read_json(path))