def files_rank_cluster(local_featurePoi_path, output_path=None, cluster_type="multi", column=None): global local_featurePoiRank_path if output_path: local_featurePoiRank_path = output_path logger.info(local_featurePoiRank_path) begin_time = time.time() logger.info("all fileList handle process") model_file_list, model_dirList = utils.get_files(local_model_path) # load model model_dict = load_models(model_file_list) # 读取文件 fileList, dirList = utils.get_files(local_featurePoi_path) logger.info('total file:{num}'.format(num=len(fileList))) pool = multiprocessing.Pool(processes=5) for file in fileList: # rank_prediction(file, model_dict) pool.apply_async(rank_prediction, (file, model_dict,)) # pool.close() pool.join() end_time = time.time() logger.info("total time:" + str(end_time - begin_time) + "s") logger.info("all fileList handle finished")
def __init__(self, opt): super(DomainTransferDataset, self).__init__() self.opt = opt self.imglist_A = utils.get_files(opt.baseroot_A) self.imglist_B = utils.get_files(opt.baseroot_B) self.len_A = len(self.imglist_A) self.len_B = len(self.imglist_B)
def get_angles_data(input_folder, output_folder, files_keep, type_data="angles", align=True): files_keep_clean = [file_name.split(".")[0] for file_name in files_keep] files_angles = get_files(join_path(input_folder, type_data)) if align: files_events = get_files(join_path(input_folder, "events")) os.makedirs(join_path(output_folder, type_data), exist_ok=True) for file_ in files_angles: if file_.split(".")[0] in files_keep_clean: data = np.load(join_path(input_folder, type_data, file_), allow_pickle=True) if len(data.shape) == 3: if np.count_nonzero(np.isnan(data)): continue else: continue np.save(join_path(output_folder, type_data, file_), data) if align: events = load_csv( join_path(input_folder, "events", "{}.csv".format(file_.split(".")[0])), dtype=str, ) align_and_save_data(data, events, output_folder, file_, type_data=type_data)
def __init__(self, opt): self.opt = opt self.imglist = sorted( utils.get_files(opt.baseroot), key=lambda d: int(d.split('/')[-1].split('.')[0])) self.masklist = sorted( utils.get_files(opt.baseroot_mask), key=lambda d: int(d.split('/')[-1].split('.')[0]))
def main(): sources = get_files(SOURCE_LOCATION, ".h") tests = get_files(TESTS_LOCATION, ".cpp") files_to_fix = [] for filename in tests: result = process_file(filename, sources) if not result: files_to_fix.append(filename) print("Have to fix files ", files_to_fix)
def test_get_files(self): files = utils.get_files("parquets", "*.parquet") if len(files) > 0: assert all(f.endswith('.parquet') for f in files) files = utils.get_files("models", "*.pkl") if len(files) > 0: assert all(f.endswith('.pkl') for f in files)
def __init__(self, config, train): target_dir = "train" if train else "test" self.config = config self.data_dir = os.path.join(config.data.audio.path, target_dir) self.audio_handler = Audio(config) self.target_list = utils.get_files(os.path.join( self.data_dir, "clean")) self.mix_list = utils.get_files(os.path.join(self.data_dir, "mix")) np.random.shuffle(self.mix_list)
def main(argv=sys.argv): common_files = get_files(argv[1], r'[a-zA-Z0-9-]*\.component\.ts') ie_files = get_files(argv[1], r'[a-zA-Z0-9-]*\.ie\.component\.ts') ionic_files = get_files(argv[1], r'[a-zA-Z0-9-]*\.ionic\.component\.ts') common_conponents = get_component_name(common_files) ie_conponents = get_component_name(ie_files) ionic_conponents = get_component_name(ionic_files) print("[%s]\n" % ",".join(common_conponents)) print("[%s]\n" % ",".join(ie_conponents)) print("[%s]\n" % ",".join(ionic_conponents))
def get_audio_transcripts_pairs(audio_files_path, transcription_files_path): audio_transcripts_descriptions = [] audio_files = get_files(audio_files_path) transcription_files = get_files(transcription_files_path) for i, _ in enumerate(audio_files): audio_transcripts_descriptions.append( (audio_files[i], transcription_files[i])) return audio_transcripts_descriptions
def generate_pdf_report(release, spec, versions): """Generate html format of the report. :param release: Release string of the product. :param spec: Specification read from the specification file. :param versions: List of versions to generate. Not implemented yet. :type release: str :type spec: Specification :type versions: list """ logging.info(" Generating the pdf report, give me a few minutes, please " "...") convert_plots = "xvfb-run -a wkhtmltopdf {html} {pdf}.pdf" # Convert PyPLOT graphs in HTML format to PDF. plots = get_files(spec.environment["paths"]["DIR[STATIC,VPP]"], "html") plots.extend( get_files(spec.environment["paths"]["DIR[STATIC,DPDK]"], "html")) for plot in plots: file_name = "{0}".format(plot.rsplit(".", 1)[0]) cmd = convert_plots.format(html=plot, pdf=file_name) execute_command(cmd) # Generate the LaTeX documentation build_dir = spec.environment["paths"]["DIR[BUILD,LATEX]"] cmd = PDF_BUILDER.format( release=release, date=datetime.datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'), working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"], build_dir=build_dir) execute_command(cmd) # Build pdf documentation archive_dir = spec.environment["paths"]["DIR[STATIC,ARCH]"] cmds = [ 'cd {build_dir} && ' 'pdflatex -shell-escape -interaction nonstopmode csit.tex || true'. format(build_dir=build_dir), 'cd {build_dir} && ' 'pdflatex -interaction nonstopmode csit.tex || true'.format( build_dir=build_dir), 'cd {build_dir} && ' 'cp csit.pdf ../{archive_dir}/csit_{release}.pdf'.format( build_dir=build_dir, archive_dir=archive_dir, release=release) ] for cmd in cmds: execute_command(cmd) logging.info(" Done.")
def extract(self, folder): print("Loading images ... ", end="") image_list, _, _ = get_files(folder) imgs = [load_image(x) for x in image_list] print("Done!") # create panels dir if not exists(join(folder, "panels")): makedirs(join(folder, "panels")) folder = join(folder, "panels") # remove images with paper texture, not well segmented paperless_imgs = [] for img in tqdm(imgs, desc="Removing images with paper texture"): hist, bins = np.histogram(img.copy().ravel(), 256, [0, 256]) if np.sum(hist[50:200]) / np.sum(hist) < self.paper_th: paperless_imgs.append(img) # remove text from panels if not self.keep_text: paperless_imgs = self.remove_text(paperless_imgs) for i, img in tqdm(enumerate(paperless_imgs), desc="extracting panels"): panels = self.generate_panels(img) name, ext = splitext(basename(image_list[i])) for j, panel in enumerate(panels): cv2.imwrite(join(folder, f'{name}_{j}.{ext}'), panel)
def get(self): try: num = int(self.get_argument('num')) files = utils.get_files(settings.PICDIR, num) self.write(json.dumps(files)) except Exception as e: self.write(json.dumps(None))
def make_expfiles(subjID, filetype=['eeg']): ''' ''' myboard = mainBoard() # Create subject folder subjfullpath = os.path.join(myboard.dataDir, subjID) utils.make_dir(subjfullpath) # Create list of filenames trial = 0 timestamp = utils.get_timenow(key=EXPFILE_TIMEKEY) tdayfileList = utils.get_files(subjfullpath, match=timestamp, fullpath=False) if tdayfileList: trialList = [] for f in tdayfileList: expf = expFile(f) trialList.append(expf.trial) trial = max(trialList) + 1 logfile = {} for ftype in filetype: temp = expFile(subjID=subjID, trial=trial, filetype=ftype) logfile[ftype] = open(os.path.join(subjfullpath, temp.filename), 'w') return logfile
def main(args): ''' :param args: arguments for 1. training the skigram model for learning subgraph representations 2. construct the deep WL kernel using the learnt subgraph representations 3. performing graph classification using the WL and deep WL kernel :return: None ''' corpus_dir = args.corpus output_dir = args.output_dir batch_size = args.batch_size epochs = args.epochs embedding_size = args.embedding_size num_negsample = args.num_negsample learning_rate = args.learning_rate wlk_h = args.wlk_h label_filed_name = args.label_filed_name class_labels_fname = args.class_labels_file_name wl_extn = 'g2v'+str(wlk_h) assert os.path.exists(corpus_dir), "File {} does not exist".format(corpus_dir) # assert os.path.exists(output_dir), "Dir {} does not exist".format(output_dir) graph_files = get_files(dirname=corpus_dir, extn='.gexf', max_files=0) logging.info('Loaded {} graph file names form {}'.format(len(graph_files),corpus_dir)) t0 = time() wlk_relabel_and_dump_memory_version(graph_files, max_h=wlk_h, node_label_attr_name=label_filed_name) logging.info('dumped sg2vec sentences in {} sec.'.format(time() - t0)) t0 = time() embedding_fname = train_skipgram(corpus_dir, wl_extn, learning_rate, embedding_size, num_negsample, epochs, batch_size, output_dir, class_labels_fname)
def upload(directory): """Upload a directory to S3. DIRECTORY: Directory to upload. Required. """ if not AWS_BUCKET: utils.error('AWS_BUCKET environment variable not set. Exiting.') return conn = S3Connection() bucket = get_or_create_bucket(conn, AWS_BUCKET) files = list(utils.get_files(directory)) total_size = 0 utils.info('Found', len(files), 'files to upload to s3://' + AWS_BUCKET) for path in files: filesize = os.path.getsize(path) total_size += filesize utils.info('Uploading', path, '-', sizeof_fmt(filesize)) k = Key(bucket) k.key = path k.set_contents_from_filename(path) utils.success('Done. Uploaded', sizeof_fmt(total_size))
def main(argv): model_file_path = os.path.join(FLAGS.vgg_model, vgg19.MODEL_FILE_NAME) vgg_net = vgg19.VGG19(model_file_path) content_images = utils.get_files(FLAGS.train) style_image = utils.load_image(FLAGS.style) # create a map for content layers info CONTENT_LAYERS = {} for layer, weight in zip(CONTENT_LAYERS_NAME, CONTENT_LAYER_WEIGHTS): CONTENT_LAYERS[layer] = weight # create a map for style layers info STYLE_LAYERS = {} for layer, weight in zip(STYLE_LAYERS_NAME, STYLE_LAYER_WEIGHTS): STYLE_LAYERS[layer] = weight with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: trainer = style_transfer_trainer.StyleTransferTrainer( session=sess, content_layer_ids=CONTENT_LAYERS, style_layer_ids=STYLE_LAYERS, content_images=content_images, style_image=add_one_dim(style_image), net=vgg_net, num_epochs=FLAGS.num_epochs, batch_size=FLAGS.batch_size, content_weight=FLAGS.content_weight, style_weight=FLAGS.style_weight, tv_weight=FLAGS.tv_weight, learn_rate=FLAGS.learn_rate, save_path=FLAGS.output, check_period=FLAGS.checkpoint_every, max_size=FLAGS.max_size or None) trainer.train()
def _get_all_config_files(self): """ :return:[(<absolute_path>, <relative_path>), ..] """ return utils.get_files(self.absolute_path_of_patch, self.filter)
def eval_predictor(func_predict, target_dir=PATH_VAL_IMAGES, batch_size=32, item_handler=default_handler): print('Start eval predictor...') results = [] return_array = Flag() images = utils.get_files(target_dir) n_images = len(images) n_batch = n_images // batch_size n_last_batch = n_images % batch_size def predict_batch(start, end): predictions = func_predict(images[start: end]) if not utils.is_multi_predictions(predictions): predictions = [predictions] return_array.value = False if len(results) == 0: for i in range(len(predictions)): results.append([]) else: assert len(results) == len(predictions), 'The predictions length is not equal with last time\'s.' image_ids = [os.path.basename(image) for image in images[start: end]] for index, prediction in enumerate(predictions): results[index].extend([item_handler(image_ids[i], prediction[i]) for i in range(end - start)]) sys.stdout.write('\rProcessing %d/%d' % (end, n_images)) sys.stdout.flush() for batch in range(n_batch): index = batch * batch_size predict_batch(index, index + batch_size) if n_last_batch: index = n_batch * batch_size predict_batch(index, index + n_last_batch) sys.stdout.write('\n') return results if return_array.value else results[0], return_array.value
def __init__(self, name=None): self.img = img self.path = SRC_PATH self.mv_path = MOVE_PATH self.notime_path = NOTIME_PATH self.files = get_files(self.path, name) self.dics = self.to_dic(self.files)
def on_query_completions(self, view, prefix, locations): window=sublime.active_window() view=window.active_view() self.clases=set() lang=utils.get_language() if lang=="html" or lang=="php": punto=view.sel()[0].a linea=view.substr(sublime.Region(view.line(punto).a, punto)).replace('"', "'") linea=linea[:linea.rfind("'")].strip() print("la linea es :"+linea) if linea.endswith("class="): print("en compass") cssFiles=utils.get_files({"ext":"css"}) self.clases=[] for cssFile in cssFiles: texto=open(cssFile).read() cssClases=re.findall("\.(?P<clase>[a-z][-\w]*)\s+", texto) self.clases=self.clases + cssClases self.clases=list(set(self.clases)) self.clases=[[clase + "\t(CSS)", clase] for clase in self.clases] return list(self.clases) linea=view.substr(sublime.Region(view.line(punto).a, punto)).replace('"', "'").strip() if linea.endswith("src='") and linea.startswith("<script"): path=view.file_name() path=path[:path.rfind("/")] if path.find("/")!=-1 else path[:path.rfind("\\")] RecursosHtml(path, "js").insertar() elif linea.endswith("href='") and linea.startswith("<link "): path=view.file_name() path=path[:path.rfind("/")] if path.find("/")!=-1 else path[:path.rfind("\\")] RecursosHtml(path, "css").insertar()
def run_tests(self, dir): def add_subdirectory(sub): self.run_tests(os.path.join(dir, sub)) config_file = os.path.join(dir, 'config.py') if not os.path.isfile(config_file): return presets = self.presets config_scope = locals() exec(open(config_file).read(), config_scope) if 'run' not in config_scope: return test_ext = '.t' ans_ext = '.a' if 'test_ext' in config_scope: test_ext = config_scope['test_ext'] if 'ans_ext' in config_scope: ans_ext = config_scope['ans_ext'] files = utils.get_files(dir, test_ext) if not files: return print(dir) for test in files: answer = test.replace(test_ext, ans_ext) if not os.path.isfile(answer): answer = test test_case = TestCase(test, answer, self.test_output, self.test_output_dir, dir) config_scope['run'](test_case) self.check_test(test_case)
def _download_module(self, module_url): request = self.request session = request.session conn = sword2cnx.Connection(session['login'].service_document_url, user_name=session['login'].username, user_pass=session['login'].password, always_authenticate=True, download_service_document=False) parts = urlparse.urlsplit(module_url) path = parts.path.split('/') path = path[:path.index('sword')] module_url = '%s://%s%s' % (parts.scheme, parts.netloc, '/'.join(path)) # example: http://cnx.org/Members/user001/m17222/sword/editmedia zip_file = conn.get_cnx_module(module_url = module_url, packaging = 'zip') save_dir = get_save_dir(request) if save_dir is None: request.session['upload_dir'], save_dir = create_save_dir(request) extract_to_save_dir(zip_file, save_dir) cnxml_file = open(os.path.join(save_dir, 'index.cnxml'), 'rb') cnxml = cnxml_file.read() cnxml_file.close() conversionerror = None try: htmlpreview = cnxml_to_htmlpreview(cnxml) save_and_backup_file(save_dir, 'index.html', htmlpreview) files = get_files(save_dir) save_zip(save_dir, cnxml, htmlpreview, files) except libxml2.parserError: conversionerror = traceback.format_exc() raise ConversionError(conversionerror)
def main(): parser = build_parser() options = parser.parse_args() check_opts(options) style_image = utils.load_image(options.style) style_image = np.ndarray.reshape(style_image, (1, ) + style_image.shape) content_targets = utils.get_files(options.train_path) content_shape = utils.load_image(content_targets[0]).shape device = '/gpu:0' if options.use_gpu else '/cpu:0' style_transfer = FastStyleTransfer(vgg_path=VGG_PATH, style_image=style_image, content_shape=content_shape, content_weight=options.content_weight, style_weight=options.style_weight, tv_weight=options.style_weight, batch_size=options.batch_size, device=device) for iteration, network, first_image, losses in style_transfer.train( content_training_images=content_targets, learning_rate=options.learning_rate, epochs=options.epochs, checkpoint_iterations=options.checkpoint_iterations): print_losses(losses) saver = tf.train.Saver() if (iteration % 100 == 0): saver.save(network, options.save_path + '/fast_style_network.ckpt') saver.save(network, options.save_path + '/fast_style_network.ckpt')
def test_predict_single_model_is_oldest(self): app.multiple_versions = False response = self.app.post( '/predict', data=json.dumps( dict(id="8db4206f-8878-174d-7a23-dd2c4f4ef5a0", score_3=480.0, score_4=105.2, score_5=0.8514, score_6=94.2, income=50000)), content_type='application/json') json_data = json.loads(response.get_data()) files = utils.get_files("parquets", "*.parquet") dates = [ date for f in files for date in re.findall("\d{4}_\d{2}_\d{2}", f) ] assert app.multiple_versions is False assert len(json_data) == 3 assert dates[0] in json_data[ 'model'] #Check if the model used to make the single prediction corresponds to the oldest model (date)
def main(): parser = make_parser() args = parser.parse_args(sys.argv[1:]) if args.model_scale < 1: raise ValueError('model_scale should be >= 1.') if args.seed != -1: set_seed(args.seed) config = tf.ConfigProto() config.gpu_options.allow_growth = True if args.ngpus > 1: hvd.init() config.gpu_options.visible_device_list = str(hvd.local_rank()) else: os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu if args.is_training: files = get_files(args.dataset_root_folder, args.allowed_categories, args.blacklisted_categories, args.training_splits, args.splits_folder_name, args.grasps_folder_name) random.shuffle(files) print('files ====>', files) print(len(files)) main_train(args, files, config) else: raise NotImplementedError( 'This is the train script. is_training should be 1')
def generate_files_from_template(self) -> None: src_dir_path = self.template.root_dir file_pathes = utils.get_files(src_dir_path, excepts='/templates/manual') for src_file_path in file_pathes: src_file = Path(src_file_path) if src_file.is_file(): relative_file_path = str(src_file.relative_to(src_dir_path)) dest_file_path = path.join(self.config.output_pj_path, relative_file_path) dest_file_dir_path = path.dirname(dest_file_path) # if the file's dir not exist, make it utils.make_dirs([dest_file_dir_path]) if 'tpl' in path.basename(src_file_path) and path.basename( src_file_path)[0] != '.': relative_src_file_path = str( src_file.relative_to(self.template.root_dir)) self.template.generate(relative_src_file_path, dest_file_dir_path) else: shutil.copy2(src_file_path, dest_file_path)
def validate(schema, jsonfiles): """Validate a JSON files against a JSON schema. \b SCHEMA: JSON schema to validate against. Required. JSONFILE: JSON files to validate. Required. """ schema = json.loads(schema.read()) success = True for path in utils.get_files(jsonfiles): with open(path) as f: try: jsonfile = json.loads(f.read()) except ValueError: logging.error("Error loading json file " + path) raise Exception("Invalid json file") try: jsonschema.validate(jsonfile, schema) except Exception as e: success = False logging.error("Error validating file " + path) logging.error(str(e)) if not success: sys.exit(-1)
def perform_classification (corpus_dir, extn, embedding_fname, class_labels_fname): gensim_model = gensim.models.KeyedVectors.load_word2vec_format(fname=embedding_fname) logging.info('Loaded gensim model of subgraph vectors') subgraph_vocab = sorted(gensim_model.vocab.keys()) logging.info('Vocab consists of {} subgraph features'.format(len(subgraph_vocab))) wlk_files = get_files(corpus_dir, extn) logging.info('Loaded {} graph WL kernel files for performing classification'.format(len(wlk_files))) c_vectorizer = CountVectorizer(input='filename', tokenizer=subgraph2vec_tokenizer, lowercase=False, vocabulary=subgraph_vocab) normalizer = Normalizer() X = c_vectorizer.fit_transform(wlk_files) X = normalizer.fit_transform(X) logging.info('X (sample) matrix shape: {}'.format(X.shape)) Y = np.array(get_class_labels(wlk_files, class_labels_fname)) logging.info('Y (label) matrix shape: {}'.format(Y.shape)) seed = randint(0, 1000) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=seed) logging.info('Train and Test matrix shapes: {}, {}, {}, {} '.format(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)) linear_kernel_svm_classify(X_train, X_test, Y_train, Y_test) subgraph_kernel = get_subgraph_kernel (gensim_model, subgraph_vocab) deep_kernel_svm_classify (X_train, X_test, Y_train, Y_test, subgraph_kernel)
def get_fluorescence_logs(): files = get_files('.', ['log', 'out']) # remove f- files from qcp results output for file in files: if 'f-' in file: files.remove(file) return files
def run(self, edit): print("va a importar") window=sublime.active_window() view=window.active_view() self.window=sublime.active_window() self.view=self.window.active_view() java=Java() tipos=java.get_tipos() self.packages=utils.load_json(PATH_INDEX_PACKAGES) projectFiles=utils.get_files({"ext":"java"}) projectFiles=[x.replace("/", ".").replace("\\", ".") for x in projectFiles] projectFiles=[x[x.rfind(".java.")+6:x.rfind(".")] for x in projectFiles] ##print(projectFiles) viewPackage=view.substr(view.find(utils.REG_JAVA_PACKAGE, 0)) viewPackage=viewPackage.replace("package ", "").replace(";", "") for projectFile in projectFiles: className=projectFile[projectFile.rfind(".")+1:] packageClass=projectFile[:projectFile.rfind(".")] if packageClass==viewPackage:continue if self.packages.get(className)==None: self.packages[className]=[] self.packages[className].append(packageClass) self.clases=list(set(tipos)) ##print(self.clases) self.i=0 self.importar(None)
def files_rank_cluster(local_featurePoi_path, output_path=None, cluster_type="multi", column=None): global local_featurePoiRank_path if not output_path == "": local_featurePoiRank_path = output_path begin_time = time.time() logger.info("all fileList handle process") # 读取文件 fileList, dirList = utils.get_files(local_featurePoi_path) logger.info('total file:{num}'.format(num=len(fileList))) pool = multiprocessing.Pool(processes=10) for file in fileList: if cluster_type == "multi": pool.apply_async(rank_cluster, (file,)) # 多维度聚类 else: pool.apply_async(rank_cluster_singleColumn, (file, column)) # 单字段聚类 pool.close() pool.join() end_time = time.time() logger.info(u"all finished use_time" + str(end_time - begin_time) + "s") logger.info("all fileList handle finished")
def perform_classification(corpus_dir, extension, embedding_fname, class_labels_fname): """ Perform classification from :param corpus_dir: folder containing subgraph2vec sentence files :param extension: extension of the subgraph2vec sentence files :param embedding_fname: file containing subgraph vectors in word2vec format :param class_labels_fname: files containing labels of each graph :return:None """ # weisfeiler lehman kernel files wlk_files = get_files(corpus_dir, extension) Y = np.array(get_class_labels(wlk_files, class_labels_fname)) logging.info('Y (label) matrix shape: {}'.format(Y.shape)) seed = randint(0, 1000) with open(embedding_fname, 'r') as fh: graph_embedding_dict = json.load(fh) X = np.array([graph_embedding_dict[fname] for fname in wlk_files]) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=seed) logging.info('Training and Test Matrix Shapes: {}. {}. {}. {} '.format( X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)) scores = rbf_svm_classify(X_train, X_test, Y_train, Y_test) return scores
def check_files(path): start_time = time.time() file_dict = {} files = get_files(path) progress = 0 for fileName in files: md5 = hashlib.md5() f = open(fileName, "rb") md5.update(f.read()) hash_code = md5.hexdigest() md5_str = str(hash_code).lower() if md5_str not in file_dict.keys(): file_dict[md5.hexdigest()] = [fileName] else: file_dict[md5.hexdigest()].append(fileName) progress += 1 sys.stdout.write("\r已扫描 [%d%%]" % (progress / len(files) * 100)) sys.stdout.flush() sys.stdout.write("\r ") sys.stdout.flush() count = 0 for (dummy, value) in filter(lambda x: len(x[1]) > 1, file_dict.items()): print("\n重复[%d]:" % len(value)) count += len(value) for file in value: print(" %s" % file) end_time = time.time() print("\n扫描完成!共发现%d个重复文件, 耗时%d秒\n" % (count, end_time - start_time))
def restore_all(password_file=None): for file_ in get_files(ATK_VAULT): if os.path.basename(file_) == 'encrypted': # Get the path without the atk vault base and encrypted filename original_path = os.path.join(*split_path(file_)[1:-1]) restore(original_path, password_file)
def main(): """主程式 """ # 輸入根目錄 input_root = input('Please enter the input folder (default: input): ') if input_root == '': input_root = 'input' # 輸出根目錄 output_root = input('Please enter the output folder (default: output): ') if output_root == '': output_root = 'output' # 時間間隔 step = input('Please enter the merge step in seconds (default: 1): ') if step == '': step = '1' # 檔案架構樹 tree = utils.get_files(input_root, output_root) # utils.py裡面的get_files函式 # 開始合併每位受試者每天的資料 for id in tree: for day in tree[id]: print("Start: " + day) utils.merge(tree[id][day], output_root, seconds=int(step)) # utils.py裡面的merge函式
def generate_fixation_maps(results_dir, output_dir): # Image name - gaze points pairs image_data_pairs = {} if not os.path.exists(output_dir): os.makedirs(output_dir) fnames = get_files(results_dir) counter = 0 for fn in fnames: path = results_dir + fn for gd in web_gaze(path, output_dir): # Use partial data # gd.gaze_data = gd.gaze_data[:10] if gd.path not in image_data_pairs: image_data_pairs[gd.path] = gd else: image_data_pairs[gd.path].gaze_data += gd.gaze_data # Output individual fixation maps for each subject # generate_img(gd.path + str(counter) + ".png", gd.image_size[0], gd.image_size[1], gd.gaze_data) counter += 1 # Generate images from combined webgaze_fixations for item in image_data_pairs.items(): combined = item[1] generate_img(combined.path, combined.image_size[0], combined.image_size[1], combined.gaze_data)
def dicom_to_hd5(input_dicom, output_hdf5=None, output_json=None, save_records=True): """ Construct a 3D volume from the dicoms present in the path and save the pixel data to a HDf5 and attributes to a json. Parameters -------- input_dicom: Path Path to a/many DICOMs output_hdf5: str Path to create output HDF5 output_json: Path to create output JSON """ logger = logging.getLogger(config.APP_NAME) logger.info("Retrieving DICOMS") dcm_paths = utils.get_files(input_dicom, config.DCM2HD5_INPUT_EXT) dcms = [dcmread(str(path)) for path in dcm_paths] logger.info("Got %d DICOMS" % len(dcms)) logger.info("Constructing 3D Volume") volume = construct_volume(dcms) logger.info("Extracting Attributes") attributes = extract_attributes(dcms) if save_records: save_record(volume, attributes, output_hdf5, output_json) return volume, attributes
def run(self, edit): paquete_snippets=sublime.packages_path()+os.sep+"snippets" lista=[] comandos=[] for archivo in utils.get_files({"folder":paquete_snippets, "ext":"json"}): snip=utils.load_json(archivo) lista=lista + list(snip.keys()) lista=list(set(lista)) for snippet in lista: snippet=snippet.lower().replace("-", "_").replace("(", "").replace(")", "").replace(" ", "").replace("?", "").replace(":", "") utils.file_write(RUTA_COMANDOS+"code_"+snippet+".bat", "echo code_"+snippet+" > d:/sublime3/comando.txt") comandos.append("code_"+snippet) archivos_plantillas=utils.get_files({"folder":RUTA_PLANTILLAS}) for plantilla in archivos_plantillas: plantilla=os.path.basename(plantilla) if plantilla.rfind(".")!=-1:plantilla=plantilla[:plantilla.rfind(".")] plantilla=plantilla.replace(" ", "_").lower() utils.file_write(RUTA_COMANDOS+"make_"+plantilla+".bat", "echo make_"+plantilla+" > d:/sublime3/comando.txt") comandos.append("make_"+plantilla) archivos_python=utils.get_files({"folder":sublime.packages_path(), "ext":".py"}) for programa in archivos_python: rutaPrograma=programa try:programa=utils.file_read(programa) except: print("saco error al leer : "+rutaPrograma) continue comandosPython=re.findall("class ([\w]+)\(sublime_plugin.TextCommand\)",programa, re.IGNORECASE) for comandoPython in comandosPython: comandoPython=comandoPython[0].lower()+comandoPython[1:] cp="" for c in comandoPython: if c.isupper():cp+="_" cp+=c.lower() if cp.endswith("_command"):cp=cp.replace("_command", "") comandos.append(cp) comandosInternos=utils.file_read("D:/sublime3/Data/Packages/User/Default (Windows).sublime-keymap") comandosInternos=re.findall('"command": *"(\w+)" *\}', comandosInternos, re.IGNORECASE) for comandoInterno in comandosInternos:comandos.append(comandoInterno) comandos=sorted(list(set(comandos))) strComandos="" for comando in comandos:strComandos+=comando+"\n" window=sublime.active_window() view=window.active_view() utils.file_write("d:/sublime3/comandos.txt", strComandos) view.run_command("ejecutar_comando", {"comando":"taskkill /f /im CustomizeableJarvis.exe\njarvis\nexit"})
def process_nga_artists(): # print "process_nga_artists" nga_reconciled_artists = get_files()["nga-artists-dbpedia"] with open(nga_reconciled_artists, 'rb') as ngafile: csv_reader = csv.reader(ngafile) for row in csv_reader: try: # If we hit a duplicate artist, select the one with # url which is not foaf:Person artists[row[0]] url = artists[row[0]][1] if "Person" not in url: artists[row[0]] = (row[0], url, "nga") else: url = get_resource_url(row) if "dbpedia" in url: artists[row[0]] = (row[0], url, "nga") except KeyError: # Open Refine rule bug consequence : # Applicable to only nga artists # Process the row to see if there is a dbpedia link artists[row[0]] = (row[0], get_resource_url(row), "nga") ngafile.close() with open('nga-artists-dbpedia-info.csv', 'a') as opfile: csvwriter = csv.writer(opfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) csvwriter.writerow(["Name", "Url", "Source", "Birth Date", "Death Date", "Short description", "Long description", "Movement"]) processed = 0 for artist, info in artists.items(): if artist not in already_artists: if "dbpedia" in info[1]: result = get_info_from_dbpedia(info[1]) # print result else: result = get_info_from_nga(artist) artist = convert(artist) name = artist url = info[1] source = "nga" birth_date = result["birth_date"] birth_date = convert(birth_date) death_date = result["death_date"] death_date = convert(death_date) short_descr = result["short_descr"] short_descr = convert(short_descr) long_descr = result["long_descr"] long_descr = convert(long_descr) movement = result["movement"] movement = convert(movement) csvwriter.writerow([name, url, source, birth_date, death_date, short_descr, long_descr, movement]) processed += 1 print processed opfile.close()
def clean_dir(base_directory): print('Cleaning all of the files in "%s".' % base_directory) files = get_files(base_directory) for f in files: file_name = '%s/%s' % (base_directory, f) print('Deleting file "%s".' % file_name) delete_file(file_name) print('Deleted %d file%s.' % (len(files), 's' if len(files) > 1 else ''))
def read_img_action(path): raw_names=utils.get_files(path+"xy/") names_size=len(raw_names) names=["act"+str(i)+".png" for i in range(names_size)] names=[utils.get_name(frame_path) for frame_path in names] print(names) new_proj=[read_projection_frame(path,name) for name in names] return ProjectionAction(new_proj)
def read_action(action_path): action_name=get_action_name(action_path) category=get_category(action_name) person=get_person(action_name) all_files=utils.get_files(action_path) all_files=utils.append_path(action_path+"/",all_files) frames=utils.read_images(all_files) return Action(category,person,frames)
def get_polarity_of_modified_files(path): reload(sys) list_of_files = utils.get_files(path) for f1 in list_of_files: #print f1 key = f1 value = utils.convert_doc(path+"/"+f1) dictonary_modified[key] = value
def run(self, edit): window=sublime.active_window() if not window.folders():return folder=window.folders()[0] view=window.active_view() self.lista=[] self.lista=utils.get_files({"ignores":["target", "build", ".svn", ".git", "bin"]}) self.Clista=[[os.path.basename(l), l] for l in self.lista] window.show_quick_panel(self.Clista, self.abrir)
def backup_all(password_file=None, vault_file=None): if vault_file: if not os.path.isfile(vault_file): raise RuntimeError("Unable to locate vault file") backup(vault_file, password_file) else: for file_ in get_files('.'): backup(file_, password_file)
def run(self, edit, **args): if not args.get("folder"):return self.folder=args.get("folder") self.resources_folder=os.path.normpath(sublime.packages_path()+os.sep+".."+os.sep+"recursos") self.files=utils.get_files({"folder":self.resources_folder}) self.nombres=[[os.path.basename(x), os.path.basename(os.path.dirname(x))] for x in self.files] window=sublime.active_window() window.show_quick_panel(self.nombres,self.utilizar) print(self.nombres)
def transform_files(in_path,out_path,transform,dirs=False): utils.make_dir(out_path) if(dirs): names=utils.get_dirs(in_path) else: names=utils.get_files(in_path) for name in names: full_in_path=in_path+name full_out_path=out_path+name transform(full_in_path,full_out_path)
def run(self, edit): archivos=utils.get_files({"folder":sublime.packages_path(), "ext":"py"}) comandos=[] for archivo in archivos: texto=utils.file_read(archivo) comandos+=re.findall("class\s+([\w]+)\(sublime_plugin.TextCommand\):", texto, flags=re.IGNORECASE) comandos=list(set(comandos)) # print(comandos) for comando in comandos: self.generar_comando(comando)
def get_polarity_of_original_files(path): reload(sys) list_of_files = utils.get_files(path) for f1 in list_of_files: f = open(f1,'r') file_content = f.read() original_encoded_file_content = unicode(file_content,'utf-8') document = TextBlob(original_encoded_file_content) dictonary_original[f1] = document.sentiment.polarity f.close()
def run(self, edit): paquete_snippets=sublime.packages_path()+os.sep+"snippets" lista=[] for archivo in utils.get_files({"folder":paquete_snippets, "ext":"json"}): snip=utils.load_json(archivo) lista=lista + list(snip.keys()) lista=list(set(lista)) for snippet in lista: snippet=snippet.lower().replace("-", "_").replace(" ", "").replace("?", "_") utils.file_write(RUTA_COMANDOS+"code_"+snippet+".bat", "echo code_"+snippet+" > d:/sublime3/comando.txt") print(snippet)
def get_patch_files(self, patch_path, filters): """ :param patch_path: path of patch's source code :param filters: [] array of valid suffix of file. for example: ['.py'] :return: (absolute path, relative path) for example: [(/root/tricircle-master/novaproxy/nova/compute/clients.py, nova/compute/clients.py), ..] """ return utils.get_files(patch_path, filters)
def process_nga_source(): nga_artist_source = get_files()["nga-artists"] with open(nga_artist_source, 'rb') as jsonfile: for line in jsonfile: try: entry = json.loads(line) entry = convert(entry) artists_entry = entry["artists"] build_nga_source_map(artists_entry) except Exception, e: pass
def scan_and_load_corpus(self): self.doc_list = get_files(self.corpus_folder, extn=self.extn, max_files=self.max_files) word_to_id_map = self.scan_corpus() # self.get_reject_prob() logging.info('vocabulary size: %d' % len(word_to_id_map)) logging.info('number of documents: %d' % len(self.doc_list)) logging.info('number of words to be trained: %d' % self._wordcount) self.doc_shuffle = range(len(self.doc_list)) np.random.shuffle(self.doc_shuffle)
def process(sources, output, force): """Download sources and process the file to the output directory. \b SOURCES: Source JSON file or directory of files. Required. OUTPUT: Destination directory for generated data. Required. """ for path in utils.get_files(sources): pathparts = utils.get_path_parts(path) pathparts[0] = output.strip(os.sep) pathparts[-1] = pathparts[-1].replace('.json', '.geojson') outdir = os.sep.join(pathparts[:-1]) outfile = os.sep.join(pathparts) source = utils.read_json(path) urlfile = urlparse(source['url']).path.split('/')[-1] if not hasattr(adapters, source['filetype']): utils.error('Unknown filetype', source['filetype'], '\n') continue if os.path.isfile(outfile) and not force: utils.error('Skipping', path, 'since generated file exists.', 'Use --force to regenerate.', '\n') continue utils.info('Downloading', source['url']) try: fp = utils.download(source['url']) except IOError: utils.error('Failed to download', source['url'], '\n') continue utils.info('Reading', urlfile) try: geojson = getattr(adapters, source['filetype']).read(fp, source['properties']) except IOError: utils.error('Failed to read', urlfile) continue finally: os.remove(fp.name) utils.make_sure_path_exists(outdir) utils.write_json(outfile, geojson) utils.success('Done. Processed to', outfile, '\n')
def dispatch_patch_tool_to_host(self, host): path_of_patch_tool = utils.get_patches_tool_path() files_need_to_dispatch = utils.get_files(path_of_patch_tool, self.filter_for_dispatch) ssh = sshutils.SSH(host=host, user=SysUserInfo.FSP, password=SysUserInfo.FSP_PWD) try: for absolute_file, relative_path_of_file in files_need_to_dispatch: log.info('start to copy file <<%s>> to host <<%s>>' % (relative_path_of_file, host)) file_copy_to = os.path.join(SysPath.HOME_FSP, SysPath.PATCHES_TOOL, relative_path_of_file) file_dir_copy_to = os.path.dirname(file_copy_to) ssh.run('mkdir -p %s' % file_dir_copy_to) ssh.put_file(absolute_file, file_copy_to) log.info('End to copy file <<%s>> to host <<%s>>' % (relative_path_of_file, host)) except Exception, e: log.error('Exception occur when dispatch patches tool to host: <%s>, Exception: %s' % (host, traceback.format_exc()))
def add_getty_artists_to_collection(): global tuples_inserted getty_info_file = get_files()["getty-artists-dbpedia-info"] with open(getty_info_file, 'rb') as gettyfile: csv_reader = csv.reader(gettyfile) for row in csv_reader: try: json_object = build_json_object(row) inserted_id = collection.insert(json_object) tuples_inserted += 1 print tuples_inserted except TypeError: print "JSON type error" sys.exit(1) gettyfile.close()
def run(self, edit): window=sublime.active_window() view=window.active_view() folder=window.folders()[0] archivos=utils.get_files({"folder":folder, "ext":"jar"}) ejecutables=[] for archivo in archivos: if archivo.find("with-dependencies")!=-1: ejecutables.append(archivo) if len(ejecutables)==1: view.run_command("ejecutar_comando", {"comando":"java -jar "+ejecutables[0]}) else: self.ejecutables=ejecutables window.show_quick_panel(ejecutables, self.elegirEjecutable) print(ejecutables)