def _complete_path(self, item): c = re.compile(r"(%s)_(\d+)\.(\w+)" % ("|".join(self.SUPPORTED_METHODS))) url_path = UrlPath(get_url_path(item[0][len(self.api_dir):])) for current_file in sorted(item[2]): m = c.match(current_file) if m is None: continue method, status_code, format = m.groups() if method in url_path.resources: url_path.resources[method].files.append( ResourceFormatFile( status_code, format, read_file("%s/%s" % (item[0], current_file)))) else: resource = self._create_resource( item[0], method, url_path.path) resource.files.append( ResourceFormatFile( status_code, format, read_file("%s/%s" % (item[0], current_file)))) # add resource to url_path url_path.resources[method] = resource return url_path
def save_user_sim_matrix(topics): for i in range(60): topic_path = 'user_similarity/user_topic_sim/'+str(topics)+'_topics/'+str(i)+".pickle" tag_path = 'user_similarity/user_tag_sim/'+str(i)+".pickle" topic_i = util.read_file(topic_path) tag_i = util.read_file(tag_path) for t in topic_i: topic_i[t] = topic_i[t]*tag_i[t] path = 'user_similarity/user_sim/'+str(topics)+'_hybrid/'+str(i)+".pickle" util.write_file(topic_i,path)
def importData(): # read data and split training data into training and validation sets data_train, labels_train = read_file('training') #assert len(data_train[0]) == len(labels_train[0]) #assert len(data_train[200]) == len(labels_train[200]) data_test, labels_test = read_file('test') #assert len(data_test[0]) == len(data_test[0]) #assert len(data_test[200]) == len(data_test[200]) return data_train, data_test, labels_train, labels_test
def _find_files(url, folder, cookie): """ Recursively find all files in current page. :param url: A URL to given page. :param folder: A destination folder for this page. :param cookie: A cookie file used for downloading. :return: A list of files (URL, path) in current page. """ files = [] path = '{}/temp.html'.format(folder) util.download(url, path, cookie) page = util.read_file(path) util.remove(path) # recursively find all files in sub-folders pattern = r'<tr><td colspan="4"><a href="(.*?)">(.*?)</a>' for find in re.finditer(pattern, page, re.DOTALL): url = find.group(1) sub_folder = '{}/{}'.format(folder, find.group(2)) files += _find_files(url, sub_folder, cookie) # find all files in this page pattern = r'<tr><td>(.*?)</td>.*?Embed.*?<a href="(.*?)\?.*?">Download</a>' for find in re.finditer(pattern, page, re.DOTALL): url = find.group(2) file_name = find.group(1) path = u'{}/{}'.format(folder, file_name) files.append((url, path)) return files
def run_test(self, name, path, num_cores, args=[]): """ Run a single test. """ try: # Compile the program (exit, output) = call([COMPILE, path+'/'+name+'.sire'] + ['-t', 'mpi', '-n', '{}'.format(num_cores)] + args) self.assertTrue(exit) # Simulate execution (exit, output) = call([SIMULATE, '-np', '{}'.format(num_cores), 'a.out'] + SIM_FLAGS) self.assertTrue(exit) # Check the output against the .output file self.assertEqual(output.strip(), read_file(path+'/'+name+'.output').strip()) except Exception as e: sys.stderr.write('Error: {}\n'.format(e)) raise except: sys.stderr.write("Unexpected error: {}\n".format(sys.exc_info()[0])) raise
def main(): g = u.read_file("../data/NewYorkOneWeek.txt") results = iterative_removing_algorithm(g,5) print 'done' for r in results: nx.draw_networkx(r) plt.show()
def get_user_sim_matrix(topics): matrix = {} for i in range(60): path = 'user_similarity/user_sim/'+str(topics)+'_hybrid/'+str(i)+".pickle" matrix.update(util.read_file(path)) print i return matrix
def get_tag_sim_matrix(): matrix = {} for i in range(60): path = 'user_similarity/user_tag_sim/genres/'+str(i)+".pickle" matrix.update(util.read_file(path)) print i return matrix
def regenerate(in_dir): os.umask(022) global settings settings = util.parse_headers(util.read_file( os.path.join(in_dir, 'settings')).strip()) global jinja jinja = jinja2.Environment( loader=jinja2.FileSystemLoader(os.path.join(in_dir, 'templates')), autoescape=True) posts = load_posts(os.path.join(in_dir, 'posts')) util.write_if_changed('index.html', generate_frontpage(posts)) if len(posts) > FRONTPAGE_POSTS: util.write_if_changed('archive.html', generate_archive(posts)) util.write_if_changed('atom.xml', generate_feed(posts)) for post in posts: dir = os.path.split(post.path)[0] try: os.makedirs(dir) except OSError: pass root = '../' * post.path.count('/') util.write_if_changed(post.path, generate_post_page(root, post).encode('utf-8'))
def hill_climb(init_tour): cur_iterator = 0 max_iterator = 100 min_distance = 100000; best_tour = init_tour while cur_iterator < max_iterator: flag = False for newtour in swap_cities(init_tour): if cur_iterator > max_iterator: break cur_iterator += 1 distance = util.get_path_distance(newtour, util.read_file()) if distance < min_distance: best_tour = newtour min_distance = distance flag = True break if flag == False: break #print best_tour #print min_distance #print cur_iterator return best_tour, min_distance
def download(course, item): """ Download announcement JSON. :param course: A Course object. :param item: { "close_time": 2147483647, "user_id": 1069689, "open_time": 1411654451, "title": "Coursera", "deleted": 0, "email_announcements": "email_sent", "section_id": "14", "order": "6", "item_type": "announcement", "__type": "announcement", "published": 1, "item_id": "39", "message": "Hello, everyone.", "uid": "announcement39", "id": 39, "icon": "" } :return: None. """ path = '{}/announcement/{}.json' path = path.format(course.get_folder(), item['item_id']) util.make_folder(path, True) util.write_json(path, item) content = util.read_file(path) content = util.remove_coursera_bad_formats(content) util.write_file(path, content)
def render(self): path = self.model["filename"] filename = os.path.basename(path) self.headers["Content-Disposition"] = "inline; filename=%s" % (filename) return util.read_file(path)
def __init__(self, filename, data=None): self.filename = filename self.shortname = os.path.basename(filename) self.certs = None if not data: img3 = read_file(filename) else: img3 = data self.img3 = img3 self.ecidoffset = 0 if img3[0:4] != '3gmI': print "Magic 3gmI not found in " + filename return fullSize = dword(img3, 4) sizeNoPack = dword(img3, 8) sigCheckArea = dword(img3, 12) self.sha1 = hashlib.sha1(img3) self.fileHash = hashlib.sha1(img3[12:20+sigCheckArea]) i = 20 sections = {} while i < fullSize: tag = img3[i:i+4][::-1] #reverse fourcc tag total_length = dword(img3, i+4) data_length = dword(img3, i+8) if tag == "DATA": self.datalen = data_length data = img3[i+12:i+total_length] else: data = img3[i+12:i+12+data_length] if tag in Img3.INT_FIELDS: data = struct.unpack("<L", data)[0] elif tag == "VERS": data = data[4:] elif tag == "TYPE": data = data[::-1] elif tag == "ECID": self.ecidoffset = i #print "%s offset=%x len=%x" % (tag,i, data_length) if tag != "KBAG" or dword(data,0) == 1: sections[tag] = data i += total_length self.sections = sections self.leaf_cert = None self.sig = None self.key = "" self.iv = "" self.extractCertificates()
def download(): url = read_file(FILE_GDOC).strip() print("loading %s" % url) sheet = load_url(url) phenos = read_csv(StringIO(sheet)) sel = phenos[phenos["malformation?"] == "y"]["phenotype"] sel.to_csv(FILE_SELECTION, index=False) print("got %d selected phenotypes out of %d, stored in %s" % len(sel), len(phenos), FILE_SELECTION)
def main(): args = process_command_line_arguments() util.mkdir(PATH['TIMBRADAS']) xml = util.read_file(args.factura) # Enviamos a timbrar con el PAC timbra_xml(xml) return
def test_dev(self, infile, outfile): sent_iter = read_file(infile) handle = open(outfile, 'w', encoding='utf-8') for sent in sent_iter: predict_sentence = self.predict(convert_to_no_EC(sent)) for item in predict_sentence: handle.write('{0}\t{1}\n'.format(item[0], item[1])) handle.write('\n') handle.close()
def predict_and_save(self, infile, outfile): sent_iter = read_file(infile) handle = open(outfile, 'w', encoding='utf-8') for sent in sent_iter: predict_sentence = self.predict(sent) for item in predict_sentence: handle.write('{0}\t{1}\t{2}\n'.format(item[0], item[1], item[2])) handle.write('\n') handle.close()
def _get_meta(self,name,link): # load meta from disk or download it (slow for each serie, thatwhy we cache it) local = self.tmp_dir if not os.path.exists(local): os.makedirs(local) m = md5.new() m.update(name) image = os.path.join(local,m.hexdigest()+'_img.url') plot = os.path.join(local,m.hexdigest()+'_plot.txt') if not os.path.exists(image): data = util.request(link) data = util.substr(data,'<div id=\"archive-posts\"','</div>') m = re.search('<a(.+?)href=\"(?P<url>[^\"]+)', data, re.IGNORECASE | re.DOTALL) if not m == None: data = util.request(m.group('url')) self._get_image(data,image) self._get_plot(data,plot) return util.read_file(image).strip(),util.read_file(plot)
def download_stats(self): url = self.url + '/data/stats' path = self.info_folder + '/stats.html' util.download(url, path, self.cookie_file) content = util.read_file(path) pattern = r'<h1.*?</table>' content = re.search(pattern, content, re.DOTALL).group(0) util.write_file(path, content)
def load_wordlist(filename): """ This function should return a list or set of words from the given filename. """ words = set() for str in util.read_file(filename): words.add(str) return words
def get_history(self, is_master=False): pn = self.path_master_history if is_master else self.path_head_history content = util.read_file(pn).strip() if content: history = content.split("\n") history.reverse() else: history = [] return history
def load(self): data = read_file(self.filename) if data: try: return json.loads(data) except ValueError: pass return {}
def main(processes=4): global tag_map tag_map = util.read_file('user_profile/preference_tag/user_tags_50.pickle') user_list = db_info.user_list user_list_list = util.split_item(user_list) node = len(user_list_list)/60 for i in range(60): exe_list = user_list_list[i*node:(i+1)*node] results = multiprocess(processes,exe_list) path = 'user_similarity/user_tag_sim/genres/' + str(i) + '.pickle' util.write_file(results,path)
def train(self, trn_file, iter_num = 15): all_feat_vec, all_labels = [], [] sent_iter = read_file(trn_file) for sent in sent_iter: sent_feat_vec, labels = self.get_sent_feat_vec(sent) all_feat_vec.append(sent_feat_vec) all_labels.append(labels) print('--Training Perceptron for Parser--') self.train_t(all_feat_vec, all_labels, iter_num)
def svm_train(sc, top_path, stopwords_dict=None): # 留个词词典接口,如果有新的词典,把词典放到该目录下 curpath = os.path.normpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) if stopwords_dict is None: stopwords = set(read_file(os.path.join(curpath, u"stopwords.txt")).split()) else: stopwords = set(read_file(os.path.join(curpath, u"stopwords_dict.txt")).split()) # 形成两类的文件夹的每个文本分词,去停用词,词频统计结果{'pos':[counter,..],'neg':[counter]} sub_folder = os.listdir(top_path) if len(sub_folder) != 2: raise OSError("need and only need two folder") top_folder_dict = {} for name in sub_folder: top_folder_dict[name] = pre_process(os.path.join(top_path, name), stopwords) # 选出两类直接区分度最大的词作为这两类的特征词集 topk = 500 features = feature_selection(top_folder_dict[sub_folder[1]], top_folder_dict[sub_folder[0]], topk) # 计算两类的IDF IDF = idf(top_folder_dict[sub_folder[1]], top_folder_dict[sub_folder[1]], features) # 每一类每一篇文本在指定二分类下的向量表示[(),()...] vector1 = {'1.0': feature_vector(tf(top_folder_dict[sub_folder[1]], features), IDF)} vector0 = {'0.0': feature_vector(tf(top_folder_dict[sub_folder[0]], features), IDF)} # 转为Spark所需要的输入格式[Labpoint(0.0,[]),...] labpoint1 = [LabeledPoint(1.0, list) for list in vector1['1.0']] labpoint0 = [LabeledPoint(0.0, list) for list in vector0['0.0']] train_data = labpoint1 + labpoint0 classifier = SVMWithSGD.train(sc.parallelize(train_data)) path = os.path.join(curpath, 'svm_' + sub_folder[1] + '_' + sub_folder[0] + '.pkl') if os.path.isfile(path): os.remove(path) with open(path, 'wb') as output: pickle.dump((features, IDF, classifier), output)
def download_email_blacklist(self): url = self.url + '/data/export/pii' path = self.info_folder + '/temp.html' util.download(url, path, self.cookie_file) content = util.read_file(path) pattern = r'href="(https://coursera-reports.*?)"' url = re.search(pattern, content).group(1) util.remove(path) path = self.info_folder + '/email_blacklist.csv' util.download(url, path, self.cookie_file)
def load_responses(self): c = re.compile(r"%s_(\d{3})\.(\w+)" % self.method) if not os.path.exists(self.resource_dir): return for item in os.listdir(self.resource_dir): m = c.match(item) if m: status_code, format = m.groups() body = read_file( os.path.join( self.resource_dir, "%s_%s.%s" % (self.method, status_code, format))) headers = read_file( os.path.join( self.resource_dir, "%s_H_%s.%s" % (self.method, status_code, format))) self.add_response(status_code, format, body, headers)
def download_info(self): url = self.url temp = self.info_folder + '/temp.html' util.download(url, temp, self.cookie_file) page_html = util.read_file(temp) util.remove(temp) info_files = ['user.json', 'course.json', 'sidebar.json'] matches = re.findall(r'JSON\.parse\("(.*?)"\);', page_html) for match, info_file in zip(matches, info_files)[1:]: info = util.unicode_unescape(match).replace('\\\\', '') path = '{}/{}'.format(self.info_folder, info_file) util.write_json(path, util.read_json(info, True))
def __override_imputed_by_plink(imputed_file, plink_prefix, options, process_line_factory=process_line_factory): '''Main program for overriding imputing genotype file by a PLINK data file. Accepts an options struct.''' # Read plink data g = im.io_genotype.read('plink', 'genotype', prefix=plink_prefix, lazy_load=True) # Read WGS sample indices from the pedigree. Must match the genotype files' ordering. typed_sample_index = np.array(map(im.io_pedigree.read(options.pedigree_file, options.genotype_id_file)._node_of.get, g.sample_id)) # Read imputed line pairs imputed_lines = util.read_file(imputed_file, num_header_lines=options.num_header_lines, delimiter=options.delimiter) # Override lines by data from g (depending on the scenario specified by options) process_line = process_line_factory(options) return process_lines(process_line, imputed_lines, g, options.num_metadata_cols, typed_sample_index, max_flipped_hom=options.max_flipped_hom, max_mismatch_partial=options.max_mismatch_partial, override_tag=options.override_tag, warnings=options.warnings, key=options.key)
def main(): args = parse_args() # We read all these upfront to avoid race conditions where VAL # sees a different domain/problem/plan than what we compute the # hash ID for. domain_text = util.read_file(args.domain) problem_text = util.read_file(args.problem) plan_text = util.read_file(args.plan) try: quality = validator.validate(domain_text, problem_text, plan_text) except validator.Error as e: print e else: result = db.Result( domain_text=domain_text, problem_text=problem_text, plan_text=plan_text, plan_comment="<no comment for this plan>", plan_quality=quality) result.update_db()
def get_inria_holiday_background_paths(): inria_holidays_root = f'{paths.DATA_ROOT}/inria_holidays' filenames = util.read_file( f'{inria_holidays_root}/non_person_images.txt').splitlines() return sorted(f'{inria_holidays_root}/jpg_small/{filename}' for filename in filenames)
def _get_content(self, content_path): return util.read_file(content_path)
def load_file(self): self.BZMAN_settings = read_file(self.ctx.get_settings_file) if self.BZMAN_settings['path'] == "": inform_user( self, "There is no database location specified.\n\n" + "If you haven't created a database, create a new database using 'New'\n\n" + "If you have recently moved the file to a new location, use the 'Set New Path' option in File menu to set the new file location" ) else: try: # x = [os.path.abspath(f) for f in os.listdir(self.BZMAN_settings['path']) if os.path.isfile(f)] x = os.listdir(self.BZMAN_settings['path']) temp_list = [] filename = None for i in range(len(x)): if x[i].find("_BZMAN_DATABASE.json") != -1: temp_list.append(x[i]) if len(temp_list) > 1: inform_user( self, "There are more than one database files. Please select the one you want to open" ) filename = QFileDialog.getOpenFileName( self, "Open", self.BZMAN_settings['path'], filter="Database Files (*.json)")[0] elif len(temp_list) == 0: ans = ask_user( self, "No Company Database found! \n\n" + "If you haven't created a company database yet, you can do it using 'New' option. Click 'Ok' to create a new database.\n\n" + "If you have already created a Database but moved it to a differenct location, please open it manually by clicking 'Cancel'." ) if ans == QMessageBox.Ok: self.new_file() elif ans == QMessageBox.Cancel: filename = QFileDialog.getOpenFileName( self, "Open", self.BZMAN_settings['path'], filter="Database Files (*.json)")[0] if filename: ans = ask_user( self, "Do you want to save this file location for future use? This will overwrite your existing path where the file wasn't found." ) if ans == QMessageBox.Ok: new_path, new_filename = os.path.split( filename) new_company_name = " ".join( os.path.split( os.path.splitext(filename)[0]) [1].split('_')[:-2]) self.BZMAN_settings['path'] = new_path self.BZMAN_settings[ 'database_name'] = new_filename self.BZMAN_settings[ 'company'] = new_company_name write_file(self.BZMAN_settings, self.ctx.get_settings_file) else: #4194304 pass else: filename = os.path.join(self.BZMAN_settings['path'] + '/' + temp_list[0]) if filename: self._open_main_window(filename) except OSError: #TODO maybe set the file path to ""; if file can not be found inform_user( self, "The database folder has been moved or deleted! If moved, use 'Open' or 'Set New Path'" )
def __init__(self, path): self.parse(util.read_file(path))
def test(): b = util.read_file('C:/test/a.txt', 'b') #b = b'\x00\x01\x02\x03' s = util.bytes2bin(b) print('bin="' + s + '"')
def train_test_videos(path_train_violence, path_test_violence, path_train_raw_nonviolence, path_train_new_nonviolence, path_test_raw_nonviolence, path_test_new_nonviolence, proportion_norm_videos, min_num_frames): """ load train-test split from original dataset """ train_names = [] train_labels = [] test_names = [] test_labels = [] train_bbox_files = [] test_bbox_files = [] train_names_violence = util.read_file(path_train_violence) train_names_new_nonviolence = util.read_file(path_train_new_nonviolence) train_names_raw_nonviolence = util.read_file(path_train_raw_nonviolence) test_names_violence = util.read_file(path_test_violence) test_names_new_nonviolence = util.read_file(path_test_new_nonviolence) test_names_raw_nonviolence = util.read_file(path_test_raw_nonviolence) ##Remove normal videos of short duration train_names_new_nonviolence = remove_short_videos( constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, train_names_new_nonviolence, min_num_frames) train_names_raw_nonviolence = remove_short_videos( constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, train_names_raw_nonviolence, min_num_frames) test_names_new_nonviolence = remove_short_videos( constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, test_names_new_nonviolence, min_num_frames) test_names_raw_nonviolence = remove_short_videos( constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, test_names_raw_nonviolence, min_num_frames) new_split = False ### Train # print('Train names: ', len(train_names_violence)) for tr_name in train_names_violence: train_names.append( os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_VIOLENCE, tr_name)) train_labels.append(1) video_name = re.findall(r'[\w\.-]+-', tr_name)[0][:-1] train_bbox_files.append( os.path.join(constants.PATH_VIOLENCECRIME2LOCAL_BBOX_ANNOTATIONS, video_name + '.txt')) ##ramdom normal samples negative_samples = [] if not os.path.exists(constants.PATH_FINAL_RANDOM_NONVIOLENCE_TRAIN_SPLIT): print('Creating Random Normal Train examples file...') num_new_samples = int(len(train_names) * proportion_norm_videos) train_names_new_nonviolence = random.choices( train_names_new_nonviolence, k=num_new_samples) train_names_raw_nonviolence = random.choices( train_names_raw_nonviolence, k=len(train_names) - num_new_samples) if len(train_names_new_nonviolence) == 0: print('Using only raw non violence videos...') for neagtive_name in train_names_new_nonviolence: negative_samples.append( os.path.join( constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, neagtive_name)) # train_names.append(os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, neagtive_name)) # train_bbox_files.append(None) for neagtive_name in train_names_raw_nonviolence: negative_samples.append( os.path.join( constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, neagtive_name)) util.save_file(negative_samples, constants.PATH_FINAL_RANDOM_NONVIOLENCE_TRAIN_SPLIT) # train_names.append(os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, neagtive_name)) # train_bbox_files.append(None) new_split = True else: negative_samples = util.read_file( constants.PATH_FINAL_RANDOM_NONVIOLENCE_TRAIN_SPLIT) for sample in negative_samples: train_names.append(sample) train_bbox_files.append(None) negative_labels = [0 for i in range(len(negative_samples))] train_labels.extend(negative_labels) NumFrames_train = [ len(glob.glob1(train_names[i], "*.jpg")) for i in range(len(train_names)) ] ### Test for ts_name in test_names_violence: test_names.append( os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_VIOLENCE, ts_name)) test_labels.append(1) video_name = re.findall(r'[\w\.-]+-', ts_name)[0][:-1] test_bbox_files.append( os.path.join(constants.PATH_VIOLENCECRIME2LOCAL_BBOX_ANNOTATIONS, video_name + '.txt')) negative_samples = [] if not os.path.exists(constants.PATH_FINAL_RANDOM_NONVIOLENCE_TEST_SPLIT): print('Creating Random Normal Test examples file...') num_samples = int(len(test_names) * proportion_norm_videos) test_names_new_nonviolence = random.choices(test_names_new_nonviolence, k=num_samples) test_names_raw_nonviolence = random.choices(test_names_raw_nonviolence, k=len(test_names) - num_new_samples) for neagtive_name in test_names_new_nonviolence: negative_samples.append( os.path.join( constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, neagtive_name)) for neagtive_name in test_names_raw_nonviolence: negative_samples.append( os.path.join( constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, neagtive_name)) util.save_file(negative_samples, constants.PATH_FINAL_RANDOM_NONVIOLENCE_TEST_SPLIT) new_split = True else: negative_samples = util.read_file( constants.PATH_FINAL_RANDOM_NONVIOLENCE_TEST_SPLIT) for sample in negative_samples: test_names.append(sample) test_bbox_files.append(None) negative_labels = [0 for i in range(len(negative_samples))] test_labels.extend(negative_labels) NumFrames_test = [ len(glob.glob1(test_names[i], "*.jpg")) for i in range(len(test_names)) ] print('Train Split: ', len(train_names), len(train_labels), len(NumFrames_train), len(train_bbox_files), ', Test Split: ', len(test_names), len(test_labels), len(NumFrames_test), len(test_bbox_files)) data = { 'train_names': train_names, 'train_labels': train_labels, 'NumFrames_train': NumFrames_train, 'train_bbox_files': train_bbox_files, 'test_names': test_names, 'test_labels': test_labels, 'NumFrames_test': NumFrames_test, 'test_bbox_files': test_bbox_files } return data, new_split
import util import json import genp import os import shutil import genp parameter_filename = "sdrc_params.txt" #load param file into variables [ sdrc_filename, sdrc_rows, start_date, end_date, keywords, output_file, grant_filename ] = [ s.split("=")[1].strip() for s in util.read_file(parameter_filename).split("\n") if s is not '' ] grant_output = genp.cwd() + "/" + "parsed_" + grant_filename print( "\n\n=================================\nSTANFORD DIABETES RESEARCH CENTER\n=================================" ) print( "Created for Kiran Kocherlakota, contact: [email protected] , (901) 652-5382, Sun Oct 14 13:43:38 PDT 2018" ) print("\nLoading param file...") print(f'grant_file = {grant_filename}\n') print( "\nREMEMBER that each sheet in the grant file must START with the column names (i.e. have no header).\n"
import util import operator all_lines = util.read_file("inputs/day6.txt").strip() def count_answers(operator): result = 0 groups = all_lines.split("\n\n") for group in groups: person_answers = group.split("\n") answers = set(person_answers[0]) for answer in person_answers[1:]: answers = operator(answers, set(answer)) result += len(answers) return result def part1(): print(count_answers(operator.or_)) def part2(): print(count_answers(operator.and_))
def sha1sum(file_path): return get_checksums(read_file(file_path))[1]
# 加载bert模型 tvars = tf.trainable_variables() (assignment, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment) # # 获取最后一层和倒数第二层。 # 改成读文件的方式 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) token = tokenization.CharTokenizer(vocab_file=bert_vocab_file) querys = read_file("data/input") out_f = open("data/bert_output", 'w', encoding='utf-8') # query = '帮我查一下航班信息' # query = '查一下航班信息' # query = '附近有什么好玩的' for query in querys: split_tokens = token.tokenize(query) word_ids = token.convert_tokens_to_ids(split_tokens) word_ids.insert(0, token.convert_tokens_to_ids(["[CLS]"])[0]) word_ids.append(token.convert_tokens_to_ids(["[SEP]"])[0]) # 分词 index_arr = [0] seg_list = jieba.cut(query, cut_all=False) i = 0 for each in seg_list:
def load_description(self): self.description = read_file( os.path.join(self.methods_dir, "%s_doc.md" % self.name))
def load(self): self.edit = True self.method_response = read_file( os.path.join(self.methods_dir, self.name))
def load_description(self): self.description = read_file( os.path.join( self.resource_dir, "%s_doc.md" % self.method))
def main(): BTCUSD = read_file('BTCUSD_GEMINI.csv') ret = Backtest(BTCUSD, SmaCross, ExchangeAPI, 10000.0, 0.00).run() print(ret)
@author: Administrator """ from util import read_file from rule import RuleDetector from featureSelector import FeatureSelector from termWeight import TermWeight from sklearn.naive_bayes import BernoulliNB from sklearn.svm import NuSVC from sklearn.ensemble.forest import RandomForestClassifier import os from preprocesser import Preprocesser from sklearn.cross_validation import train_test_split neg_data = read_file('../data/final_neg_stem.txt') pos_data = read_file('../data/final_pos_stem.txt') rd = RuleDetector() neg_origin, neg_stem = rd.get_features(neg_data) pos_origin, pos_stem = rd.get_features(pos_data) neg_test_size = int(len(neg_data) * 0.2) pos_test_size = int(len(pos_data) * 0.2) r_train_neg_origin = neg_origin[neg_test_size:] r_test_neg_origin = neg_origin[:neg_test_size] r_train_neg_stem = neg_stem[neg_test_size:] r_test_neg_stem = neg_stem[:neg_test_size] def get_data(data, index): result = [] for line in data:
import unittest import re from util import read_file, solution_timer INPUT = read_file('input.txt') def get_passports(batch_file): data = batch_file.split('\n\n') passports = [] for value in data: passports.append(get_passport_dict(value)) return passports def get_passport_dict(passport): values = {} for i in passport.split(' '): for j in i.split('\n'): key, value = j.split(':') values[key] = value return values def validate_passport(passport: dict): keys = passport.keys() if len(keys) == 8: return True elif len(keys) == 7 and 'cid' not in keys:
# Notes the start time of script so it can compare at end start_time = time.time() print("Starting snippet generator with input folder '{0}' and output folder '{1}'".format(input_folder, output_folder)) # Where the contents of each file will be stored file_contents = {} # Input files that don't exist files_not_existing = [] # Loop over each file and store its content in the dictionary for file_name in config.NATIVE_FILE_NAMES: input_file = os.path.join(input_folder, file_name) if os.path.isfile(input_file): file_contents[file_name] = read_file(input_file) else: files_not_existing.append(file_name) # If any of the files do not exist, tell the user and exit the script if len(files_not_existing) >= 1: print("The following input files do not exist:") for file in files_not_existing: print("\t{0}".format(file)) print("Ensure that all the input files are in the input folder.") sys.exit() # Regular expression matching for Lua function definitions function_regex = re.compile(r'''
def setUpClass(cls): cls.test_input = read_file('test_input.txt')
def obj_list_from_pbg(pbg): get_objects_list = [] if pbg[-4:] == '.pbd': return content = util.read_file(pbg) lines = content.readlines() is_pbl = False pbl = '' is_obj = False for line in lines: if not line.strip(): continue if '@begin Libraries' in line: is_pbl = True continue if is_pbl: splits = line.strip().split(' ') try: ini = 1 if splits[0].rfind( '\\\\') == -1 else splits[0].rfind('\\\\') + 2 fim = -1 pbl = splits[0].strip()[ini:fim] # logger.info('\tPBL: {}'.format(pbl)) except IndexError as err: pass # logger.error('\tsplits: {}'.format(splits)) is_pbl = False continue if '@begin Objects' in line: is_obj = True continue if is_obj: if '@end;' in line.strip(): continue if line.strip() == '': continue line = line.strip() if line[0] != ' ': line = ' ' + line splits = line.split(' ') ini = 1 if splits[-1].rfind( '\\\\') == -1 else splits[-1].rfind('\\\\') + 2 fim = -2 pbl_obj = splits[-1].strip()[ini:fim] if pbl_obj == pbl: try: obj_path = pbg[:pbg.rfind('\\' )] if pbg.rfind('\\') > 0 else '' obj_name = util.path_obj_from_line(splits[1]) obj_name = obj_name if obj_name.rfind( '\\') == -1 else obj_name[obj_name.rfind('\\') + 1:] full_obj_path = '{}{}'.format( obj_path + '\\' if obj_path != '' else obj_path, obj_name) get_objects_list.append(full_obj_path) except IndexError: raise except FileNotFoundError: raise return get_objects_list
def get_api_key(): global API_KEY if API_KEY is None: API_KEY = util.read_file("key.txt") return API_KEY return API_KEY
def day1part1(): entity_list = read_file("1", "list") for entity in entity_list: sub_entity = 2020 - int(entity) if str(sub_entity) in entity_list: return int(entity) * sub_entity
def file_test5(path): b = util.read_file(path, 'b') s = '\n\n' s += util.hexdump(b, 32, 2, header=False, address=False, ascii=False) s += '\n' return s
def read_file(path): source = util.read_file(path) stream = Stream(source, 0) return decode(stream)
def __check_cache(self): if os.path.exists(self.__cache_file): self.__data = util.unpack_json(util.read_file(self.__cache_file)) return True return False
def main(args): try: # Setup the configuration variables and definitions config.init() # Setup parser, parse arguments and initialise globals argp = setup_argparse() a = argp.parse_args(args) setup_globals(a) # Create a (valid) target system device oject (before anything else) device = set_device(target_system, num_cores) # Read the input from stdin or from a file vhdr(v, 'Front end') input_file = util.read_file(infile) if infile else sys.stdin.read() # Setup the error object errorlog = ErrorLog() if build_only: # Generate code vhdr(v, 'Generating code for {}'.format(device)) generate(ast, sig, child, device, outfile, False, True, compile_only, display_memory, show_calls, v) # Parse the input file and produce an AST ast = produce_ast(input_file, errorlog) # Perform semantic analysis on the AST sym = SymbolTable(errorlog) sig = SignatureTable(errorlog) sem = semantic_analysis(sym, sig, ast, device, errorlog) # Perform AST transformations for channels and reps if not disable_transformations: transform_ast(sem, sym, sig, ast, errorlog, device, v) # Perform child analysis child = child_analysis(sig, ast) # Generate code vhdr(v, 'Generating code for {}'.format(device)) generate(ast, sig, child, device, outfile, translate_only, compile_only, display_memory, show_calls, save_temps, v) # Handle (expected) system exits except SystemExit: return 0 # Handle any specific compilation errors quietly except QuietError as e: return 1 # Handle any specific compilation errors except Error as e: #sys.stderr.write('Error: {}\n'.format(e)) return 1 # Parser attribute error #except AttributeError: # sys.stderr.write('Attribute error') # return 1 # Handle a keyboard interrupt (ctrl+c) except KeyboardInterrupt: sys.stderr.write('Interrupted') return 1 # Anything else we weren't expecting except: sys.stderr.write("Unexpected error: {}\n".format(sys.exc_info()[0])) raise return 1 return 0
def __init__(self, ctx, *args, **kwargs): super().__init__() self.ctx = ctx # remove trial checks # self.check_trial_validity() self.setWindowTitle("Welcome to BZMAN!") version_no = self.ctx.build_settings['version'] text = "<center>" \ "<br><br><h1></h1>" \ "⁣" \ "<img src=%r>" \ "</center>" \ "<p>BZMAN</p>"\ % self.ctx.get_logo#<br/>" \ # "Copyright © JSS Inc.</p>" # text = QPixmap().setPixmap(self.ctx.get_logo) # text += "\nBZMAN\n" text += "Version " + version_no label = QLabel(text) # label.setPixmap(QPixmap(self.ctx.get_logo)) # label.setText("\nVersion "+version_no) # label.setMaximumSize(self.ctx.available_geo().width()/2,self.ctx.available_geo().height()/1.5) label.setAlignment(Qt.AlignCenter) # label.setFixedSize(self.ctx.available_geo().width()/5,self.ctx.available_geo().height()/5) hbox = QHBoxLayout() btn_new = QPushButton(" New ") btn_new.setMaximumSize(self.ctx.available_geo().width() / 9.12, self.ctx.available_geo().height() / 17.44) # btn_new.setStyleSheet("QPushButton {border-radius: 20px;}") btn_new.clicked.connect(self.new_file) hbox.addWidget(btn_new) btn_open = QPushButton(" Open ") btn_open.setMaximumSize(self.ctx.available_geo().width() / 9.12, self.ctx.available_geo().height() / 17.44) # btn_open.setStyleSheet("QPushButton {border-radius: 20px;}") btn_open.clicked.connect(self.load_file) hbox.addWidget(btn_open) hbox_widget = QWidget() hbox_widget.setLayout(hbox) container = QWidget() container_layout = QVBoxLayout() container_layout.addWidget(label) container_layout.addItem(QSpacerItem(10, 10, QSizePolicy.Expanding)) container_layout.addWidget(hbox_widget) container.setLayout(container_layout) self.setCentralWidget(container) # toolbar = QToolBar("My Toolbar") # toolbar.setIconSize(QSize(30,30)) # self.addToolBar(toolbar) new_action = QAction("New", self) new_action.setStatusTip("Create a new file") new_action.triggered.connect(self.new_file) new_action.setShortcut(QKeySequence.New) # toolbar.addAction(new_action) # toolbar.addSeparator() load_action = QAction("Open", self) load_action.setStatusTip("Open an existing file") load_action.triggered.connect(self.load_file) load_action.setShortcut(QKeySequence.Open) # toolbar.addAction(load_action) load_demo = QAction("Open Demo", self) load_demo.setStatusTip("Open demo file") load_demo.triggered.connect(self.load_demo) set_new_file_path = QAction("Set New Path", self) set_new_file_path.setStatusTip("Sets a new path for an existing file") set_new_file_path.triggered.connect(self.set_new_file_location) dark_theme = QAction("Dark", self) dark_theme.triggered.connect(self.change_theme) breeze_dark = QAction("Breeze Dark", self) breeze_dark.triggered.connect(self.change_dark_breeze_theme) breeze_light = QAction("Light", self) breeze_light.triggered.connect(self.change_light_theme) blue = QAction("Blue", self) blue.triggered.connect(self.change_blue_theme) font = QAction("Fonts", self) font.triggered.connect(self.font_choice) self.setStatusBar(QStatusBar(self)) menu = self.menuBar() menu_font_size = menu.font() menu_font_size.setPointSize(10) menu.setFont(menu_font_size) menu.setNativeMenuBar(False) # for mac file_menu = menu.addMenu("&File") file_menu.setFont(menu_font_size) file_menu.addAction(new_action) file_menu.addAction(load_action) file_menu.addAction(load_demo) file_menu.addAction(set_new_file_path) options = menu.addMenu("&Customize") options.setFont(menu_font_size) theme = options.addMenu("Theme") theme.setFont(menu_font_size) theme.addAction(dark_theme) theme.addAction(breeze_dark) theme.addAction(breeze_light) theme.addAction(blue) options.addAction(font) # self.setGeometry(800,100,2000*self.devicePixelRatio(),1000*self.devicePixelRatio()) self.setWindowState(Qt.WindowMaximized) self.centerOnScreen() self.BZMAN_settings = read_file(self.ctx.get_settings_file) if self.BZMAN_settings['path'] != "": self.load_file() else: self.show() inform_user( self, "Welcom to BZMAN! \n\n" + "Click on the 'New' button to begin.")
def read_from_backing_file(offset, length): p = _file(BACKING_FILE) return read_file(p, offset, length)
def kmeans_cluster(params): import numpy as np from numpy.linalg import cholesky import matplotlib.pyplot as plt import seaborn as sns import pandas as pd import tensorflow as tf from random import choice, shuffle from numpy import array import pdb import pickle def KMeansCluster(vectors, noofclusters): """ K-Means Clustering using TensorFlow. `vertors`应该是一个n*k的二维的NumPy的数组,其中n代表着K维向量的数目 'noofclusters' 代表了待分的集群的数目,是一个整型值 """ noofclusters = int(noofclusters) assert noofclusters < len(vectors) #找出每个向量的维度 dim = len(vectors[0]) #辅助随机地从可得的向量中选取中心点 vector_indices = list(range(len(vectors))) shuffle(vector_indices) #计算图 #我们创建了一个默认的计算流的图用于整个算法中,这样就保证了当函数被多次调用 #时,默认的图并不会被从上一次调用时留下的未使用的OPS或者Variables挤满 graph = tf.Graph() with graph.as_default(): #计算的会话 sess = tf.Session() ##构建基本的计算的元素 ##首先我们需要保证每个中心点都会存在一个Variable矩阵 ##从现有的点集合中抽取出一部分作为默认的中心点 centroids = [ tf.Variable((vectors[vector_indices[i]])) for i in range(noofclusters) ] ##创建一个placeholder用于存放各个中心点可能的分类的情况 centroid_value = tf.placeholder("float32", [dim]) cent_assigns = [] for centroid in centroids: cent_assigns.append(tf.assign(centroid, centroid_value)) ##对于每个独立向量的分属的类别设置为默认值0 assignments = [tf.Variable(0) for i in range(len(vectors))] ##这些节点在后续的操作中会被分配到合适的值 assignment_value = tf.placeholder("int32") cluster_assigns = [] for assignment in assignments: cluster_assigns.append(tf.assign(assignment, assignment_value)) ##下面创建用于计算平均值的操作节点 #输入的placeholder mean_input = tf.placeholder("float32", [None, dim]) #节点/OP接受输入,并且计算0维度的平均值,譬如输入的向量列表 mean_op = tf.reduce_mean(mean_input, 0) ##用于计算欧几里得距离的节点 v1 = tf.placeholder("float32", [dim]) v2 = tf.placeholder("float32", [dim]) euclid_dist = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(v1, v2), 2))) ##这个OP会决定应该将向量归属到哪个节点 ##基于向量到中心点的欧几里得距离 #Placeholder for input centroid_distances = tf.placeholder("float32", [noofclusters]) cluster_assignment = tf.argmin(centroid_distances, 0) ##初始化所有的状态值 ##这会帮助初始化图中定义的所有Variables。Variable-initializer应该定 ##义在所有的Variables被构造之后,这样所有的Variables才会被纳入初始化 init_op = tf.global_variables_initializer() #初始化所有的变量 sess.run(init_op) ##集群遍历 #接下来在K-Means聚类迭代中使用最大期望算法。为了简单起见,只让它执行固 #定的次数,而不设置一个终止条件 noofiterations = 20 for iteration_n in range(noofiterations): ##期望步骤 ##基于上次迭代后算出的中心点的未知 ##the _expected_ centroid assignments. #首先遍历所有的向量 for vector_n in range(len(vectors)): vect = vectors[vector_n] #计算给定向量与分配的中心节点之间的欧几里得距离 distances = [ sess.run(euclid_dist, feed_dict={ v1: vect, v2: sess.run(centroid) }) for centroid in centroids ] #下面可以使用集群分配操作,将上述的距离当做输入 assignment = sess.run( cluster_assignment, feed_dict={centroid_distances: distances}) #接下来为每个向量分配合适的值 sess.run(cluster_assigns[vector_n], feed_dict={assignment_value: assignment}) ##最大化的步骤 #基于上述的期望步骤,计算每个新的中心点的距离从而使集群内的平方和最小 for cluster_n in range(noofclusters): #收集所有分配给该集群的向量 assigned_vects = [ vectors[i] for i in range(len(vectors)) if sess.run(assignments[i]) == cluster_n ] #计算新的集群中心点 new_location = sess.run( mean_op, feed_dict={mean_input: array(assigned_vects)}) #为每个向量分配合适的中心点 sess.run(cent_assigns[cluster_n], feed_dict={centroid_value: new_location}) #返回中心节点和分组 centroids = sess.run(centroids) assignments = sess.run(assignments) return centroids, assignments print("LOAD CORPUS START") content = read_file(params.train_file) train_feature = pickle.load(open(params.feature_file, 'rb')) indices = list(range(len(train_feature))) import random SEED = 42 random.seed(SEED) random.shuffle(indices) if params.sample_number >= len(train_feature): print("sample_number:" + str(len(train_feature))) indices = indices[:params.sample_number] content = np.array(content)[indices].tolist() train_feature = np.array(train_feature)[indices].tolist() print("KMEANS GPU STARTING...") center, result = KMeansCluster(train_feature, params.cluster_number) with open(params.train_file + params.method + str(params.sample_number), 'w') as file_w: for idx, line in enumerate(content): file_w.write(str(result[idx]) + '\t' + line + '\n') #可能需要降维,聚类可视化 #res={"x":[],"y":[],"kmeans_res":[]} #for i in range(len(result)): # res["x"].append(train_feature[i][0]) # res["y"].append(train_feature[i][1]) # res["kmeans_res"].append(result[i]) # pd_res=pd.DataFrame(res) # sns.lmplot("x","y",data=pd_res,fit_reg=False,size=5,hue="kmeans_res") # plt.show() pass
if len(current_state.path) == NUM_NODES: return current_state next_node = Node(0, 0, 0, sys.maxsize) next_state = (next_node, 0, [], []) for node in nodes: if not current_state.nodes_in_path[node.index]: if next_node.h > node.h: path = deepcopy(current_state.path) path.append(node) nodes_in_path = deepcopy(current_state.nodes_in_path) nodes_in_path[node.index] = True next_node = node next_state = State(node, 0, path, nodes_in_path) open_states.append(next_state) open_states.pop(0) return None nodes = read_file() calc_heuristics(nodes) final_state = pathfinding() print("FINAL_PATH:") for n in final_state.path: print(n.index)
def __init__(self, database_filename, ctx, *args, **kwargs): super().__init__() self.database_filename = database_filename self.ctx = ctx self.widgets = [] self.widget_names = [] self.pie_view = QWidget() self.pie_view_layout = QGridLayout() self.controls = QWidget() # Controls container widget. self.controlsLayout = QGridLayout() # Controls container layout. self.load_widgets() self.controls.setLayout(self.controlsLayout) self.pie_view.setLayout(self.pie_view_layout) self.load_scroll_area() # Search bar. self.searchbar = QLineEdit() self.searchbar.setPlaceholderText("Search") self.searchbar.textChanged.connect(self.update_display) # Adding Completer. self.completer = QCompleter(self.widget_names) self.completer.setCaseSensitivity(Qt.CaseInsensitive) self.searchbar.setCompleter(self.completer) # Adding Reload button self.reload_btn = QPushButton("Reload") self.reload_btn.setShortcut(QKeySequence.Refresh) self.reload_btn.setFixedSize(self.ctx.available_geo().width() / 11, self.ctx.available_geo().height() / 26.83) pix = str(self.ctx.available_geo().width() / 136) + "px" # for 20px border-radius self.reload_btn.setStyleSheet( "QPushButton {background-color:#acdbdf; color:black; border-radius: " + pix + ";}") self.reload_btn.clicked.connect(self.reload_func) # Adding "Add" Button ----> added as a quick action # self.add_new_btn = QPushButton("Create New Customer") # self.add_new_btn.setFixedSize(450,65) # self.add_new_btn.setStyleSheet("QPushButton {background-color: #927fbf;color:black; border-radius: 10px;}")#7045af # self.add_new_btn.clicked.connect(self.add_new_entry) # Add Checkbox to toggle delete self.delete_checkbox = QCheckBox("Enable Delete") self.delete_checkbox.setStyleSheet( "QCheckbox {color: red}") # #ff6363; #Not able to set this color! self.delete_checkbox.setChecked(False) self.delete_checkbox.stateChanged.connect(self.check_delete_state) # Adding Quick add ToolButton add_new_customer = QAction("New Customer", self) new_invoice_action = QAction("New Invoice", self) record_payment_action = QAction("New Payment", self) view_active_invoices = QAction("View Active Invoices", self) quick_summ_action = QAction("Quick Company Summary", self) self.quick_add = QToolButton() self.quick_add.setIcon(QIcon(QPixmap(self.ctx.get_plus_sign))) self.quick_add.addAction(add_new_customer) self.quick_add.addAction(new_invoice_action) self.quick_add.addAction(record_payment_action) self.quick_add.addAction(view_active_invoices) self.quick_add.addAction(quick_summ_action) self.quick_add.setPopupMode(QToolButton.InstantPopup) self.quick_add.setIconSize( QSize(self.ctx.available_geo().width() / 36.5, self.ctx.available_geo().width() / 36.5)) self.quick_add.setStyleSheet( 'QToolButton{border: 0px solid;} QToolButton::menu-indicator { image: none;}' ) add_new_customer.triggered.connect(self.add_new_entry) new_invoice_action.triggered.connect(self.new_invoice) record_payment_action.triggered.connect(self.new_payment) view_active_invoices.triggered.connect(self.view_active_invoices) quick_summ_action.triggered.connect(self.quick_summary) btn_container = QWidget() hbox1 = QHBoxLayout() hbox1.addWidget(self.quick_add) hbox1.addItem(QSpacerItem(35, 35, QSizePolicy.Fixed)) hbox1.addWidget(self.searchbar) # hbox1.addWidget(self.add_new_btn) hbox1.addItem(QSpacerItem(35, 35, QSizePolicy.Fixed)) hbox1.addWidget(self.reload_btn) hbox1.addItem(QSpacerItem(35, 35, QSizePolicy.Fixed)) hbox1.addWidget(self.delete_checkbox) hbox1.setAlignment(Qt.AlignLeft) btn_container.setLayout(hbox1) pie_w_control = QWidget() pie_w_control_layout = QHBoxLayout() pie_w_control_layout.addWidget(self.pie_view) pie_w_control_layout.addWidget(self.scroll) pie_w_control.setLayout(pie_w_control_layout) # Add the items to VBoxLayout (applied to container widget) # which encompasses the whole window. container = QWidget() containerLayout = QVBoxLayout() containerLayout.addWidget(btn_container) # containerLayout.addWidget(self.searchbar) # containerLayout.addWidget(self.overall_view_widget) # containerLayout.addWidget(self.scroll) containerLayout.addWidget(pie_w_control) container.setLayout(containerLayout) self.setCentralWidget(container) # self.setGeometry(800, 100, 1500*self.devicePixelRatio(), 1500*self.devicePixelRatio()) self.setWindowState(Qt.WindowMaximized) company_name = read_file(self.ctx.get_settings_file)['company'] self.setWindowTitle('BZMAN : ' + str(company_name) + " Database") self.centerOnScreen()