def check_directory_structure(self, dirname): """ Assert that directory `dirname` is a properly-formatted experiment directory. Should have: - a folder `typed/` - a folder `untyped/` - a folder `base/` - (optional) a folder `both/` Constraints: - the `typed/` and `untyped/` folders should have the same numbers of files, and matching filenames. """ # Ensure directories exist base_dir = "%s/base" % dirname un_dir = "%s/base" % dirname ty_dir = "%s/base" % dirname req_dirs = [base_dir, un_dir, ty_dir] for required_dir in req_dirs: if not os.path.isdir(required_dir): raise ValueError("Required directory '%s' not found. Please create it." % required_dir) both_dir = "%s/both" if os.path.isdir(both_dir): req_dirs.append(both_dir) # Ensure no nested directories for d in req_dirs: if util.contains_any_directories(d): raise ValueError("Directory '%s' should not contain any sub-directories, but it does. Please remove." % d) # Ensure num. typed and num. untyped files are the same if not(util.count_files(ty_dir) == util.count_files(un_dir)): raise ValueError("Directories '%s' and '%s' must have the same number of files." % (un_dir, ty_dir)) # Ensure filenames in typed/untyped are the same if not (sorted((util.get_files(un_dir))) == sorted((util.get_files(ty_dir)))): raise ValueError("The filenames in '%s' must match the filenames in '%s', but do not. Please fix." % (un_dir, ty_dir)) return
def main(): impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots" # args["ip"] compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites" # args["cp"] goodUris = [] with open("gooduris_20160225.txt", "r") as read: for uri in map(lambda line: line.rstrip("\n"), read): goodUris.append(uri) compisits = get_files(compath, lambda f: "allTheSame" not in f and check_if_goodURI(f, goodUris) and "interval" not in f) method_composites = defaultdict(dict) for comp in sorted(compisits): site = comp[comp.find("_") + 1:comp.rfind("_")] method_composites[comp[:comp.index("_")]][site] = comp # composite_only_histogram(method_composites,compath) files = get_and_process_thumbs(impath, method_composites, goodUris) print(type(files)) # print(method_composites) impath += "/" methods = {'random': MethodCompThums('random', impath, files["random"]), 'temporalInterval': MethodCompThums('temporalInterval', impath, files["temporalInterval"]), 'alSum': MethodCompThums('alSum', impath, files["alSum"])} # thumbThumbAnalysis(methods['alSum'], methods['random'], methods['temporalInterval']) temporalPairs(methods['alSum'], methods['random'], methods['temporalInterval'])
def clean(files=None): """ Cleans the given files :param files: List of files or dict to pass to get_files for all files to run against. """ logger.debug("Running clean function. Files: " + str(files)) if files is None: raise Exception("Must run clean with files") if isinstance(files, dict): if not os.path.exists(files['path']): logger.debug("Clean path does not exist. Clean considered successful.") return files = get_files(files) if not getattr(env, 'dry_run', False): for path in files: if not os.path.exists(path): continue if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path) logger.info("Successfully cleaned {} files/folders".format(len(files))) logger.debug("Files/Folders removed: \n" + "\n".join(files))
def get_host(PATHS): machines = util.get_files(PATHS['MACHINE'], '*') for n in range(len(machines)): machines[n] = machines[n].split('/')[-1].replace('.py', '') try: machine = __import__(machines[n]) except ImportError: continue if machine.matches_host() == True: break del machine try: machine except NameError: util.warn("HOST " + os.uname()[1] + " UNKNOWN"); sys.exit() host = machine.get_options() # overwrite with environment variable if available for name in HOST_OPTIONS: env_name = get_env_name(name) if env_name in os.environ: util.gentle_warn( '{} environment variable overriding {} in machine file'.format(env_name, name) ) host[name] = os.environ[env_name] return host
def setup_TAC08(task): """ task.topic_file: xml file for TAC task.doc_path: path containing source documents task.manual_path: path for manual (human) summaries """ ## get all document data all_docs = {} files = util.get_files(task.doc_path, r'[^_]+_[^_]+_\d+[\.\-]\d+') sys.stderr.write('Loading [%d] files\n' %len(files)) for file in files: id = os.path.basename(file) all_docs[id] = file ## initialize problems problems = [] # load XML task definition from xml.etree import ElementTree root = ElementTree.parse(task.topic_file).getroot() for topic in root: if topic.tag != "topic": continue id = topic.attrib["id"] title = None narr = None docsets = [] docset_ids = [] for node in topic: if node.tag == "title": title = node.text.strip() elif node.tag == "narrative": narr = node.text.strip() elif node.tag == "docsetA": documents = node.findall("doc") docsets.append([doc.attrib["id"] for doc in documents]) docset_ids.append(node.attrib["id"]) elif node.tag == "docsetB": documents = node.findall("doc") docsets.append([doc.attrib["id"] for doc in documents]) docset_ids.append(node.attrib["id"]) old_docs = [] old_problems = [] for docset_index in range(len(docsets)): ## map docids to documents new_docs = [all_docs[doc] for doc in docsets[docset_index]] ## create a SummaryProblem problem = SummaryProblem(docset_ids[docset_index], title, narr, new_docs, old_docs, old_problems[:]) old_docs += new_docs old_problems.append(problem) ## include training data in problem if task.manual_path: problem._load_training(task.manual_path) problems.append(problem) sys.stderr.write('Setting up [%d] problems\n' %len(problems)) task.problems = problems
def add_lm_lattices(model, lattice_dir, output_dir, dict, lm): sys.stderr.write('adding LM scores to numerator lattices\n') ## Create a config file to use with HLRescore hlrescore_config = '%s/hlrescore.config' %output_dir fh = open(hlrescore_config, 'w') #fh.write('HLANGMODFILTER = "gunzip -c $.gz"\n') fh.write('HNETFILTER = "gunzip -c < $.gz"\n') fh.write('HNETOFILTER = "gzip -c > $.gz"\n') fh.write('RAWMITFORMAT = T\n') fh.write('HLRESCORE: FIXBADLATS = TRUE\n') fh.write('HLRESCORE: STARTWORD = <s>\n') fh.write('HLRESCORE: ENDWORD = </s>\n') fh.close() ## HLRescore parameters grammar_scale = 15.0 trans_penalty = 0.0 def hlrescore(input, path): cmd = 'HLRescore -A -D -T 1 -w -c -q tvaldm -C %s' %hlrescore_config cmd += ' -S %s' %input cmd += ' -L %s/%s/' %(lattice_dir, path) cmd += ' -l %s/%s/' %(output_dir, path) cmd += ' -s %f' %grammar_scale cmd += ' -p %f' %trans_penalty cmd += ' -n %s' %lm cmd += ' %s' %dict if model.verbose > 0: cmd += ' >%s/%s.log' %(output_dir, os.path.basename(input)) return cmd ## Split up lattice list lattice_list = '%s/lattice.list' %output_dir fh = open(lattice_list, 'w') remove_gz = lambda x: x.replace('.gz', '') files = map(remove_gz, util.get_files(lattice_dir, r'.*\.lat')) fh.write('\n'.join(files)) fh.close() split_lattice = SplitList(output_dir, lattice_list, by_path=True) ## Create the HLRescore commands cmds = [] inputs = split_lattice.get_files() for input in inputs: key = split_lattice.get_key(input) new_output = '%s/%s' %(output_dir, key) if not os.path.isdir(new_output): os.makedirs(new_output) cmds.append(hlrescore(input, key)) if model.local == 1: for cmd in cmds: print cmd print os.popen(cmd).read() else: cmds_file = '%s/hlrescore.commands' %output_dir fh = open(cmds_file, 'w') for cmd in cmds: fh.write('%s\n' %cmd) fh.close() util.run_parallel(cmds_file, model.jobs, output_dir)
def initializeAuthentication(): #If no file use default but set filename anyway so the dump function will work for i in range(0,15): #Gets the highest numbered of all directories that are named after floating point values(i.e. most recent timestamp) name = util.getHighestNumberedTimeDirectory(directories.usersdir) possibledir = os.path.join(directories.usersdir,name) #__COMPLETE__ is a special file we write to the dump directory to show it as valid if '''__COMPLETE__''' in util.get_files(possibledir): try: f = open(os.path.join(possibledir,'users.json')) temp = json.load(f) f.close() except: temp = {'users':{},'groups':{}} global Users Users = temp['users'] global Groups Groups = temp['groups'] global Tokens Tokens = {} for user in Users: #What an unreadable line! It turs all the dicts in Users into User() instances Users[user] = User(Users[user]) assignNewToken(user) generateUserPermissions() break #We sucessfully found the latest good users.json dump! so we break the loop else: #If there was no flag indicating that this was an actual complete dump as opposed #To an interruption, rename it and try again shutil.copytree(possibledir,os.path.join(directories.usersdir,name+"INCOMPLETE")) shutil.rmtree(possibledir)
def wsj(path_wav, path_trans, config, output, wav_list=[]): ## wav files wav_files = set(util.get_files(path_wav, re.compile('.*\.wv1', re.IGNORECASE))) print 'found wav files [%d]' %len(wav_files) ## filter using a wav list keys = set() if wav_list: hint = path_wav.split('/')[-2]+'/' files = os.popen('grep -i %s %s' %(hint, wav_list)).read().splitlines() if len(files) == 0: files = [os.path.basename(f).split('.')[0] for f in open(wav_list).read().splitlines()] for file in files: key = os.path.basename(file).split('.')[0].upper() keys.add(key) print 'found keys [%d]' %len(list(keys)) fh = open(output, 'w') ## transcription files trans_files = util.get_files(path_trans, re.compile('.*\.dot', re.IGNORECASE)) print 'found transcription files [%d]' %len(trans_files) unmatched = 0 for file in trans_files: dirname = os.path.dirname(file) for line in open(file): line = line.strip() ext = re.search('\([^()]*\)$', line).group() trans = line.replace(ext, '').strip() trans = fix_wsj_trans(trans) ext = ext.replace('(','').replace(')','').upper() wav_file = '%s/%s.WV1' %(dirname, ext) if wav_file not in wav_files: wav_file = '%s/%s.wv1' %(dirname, ext.lower()) if wav_file not in wav_files: for w in wav_files: if ext in w or ext.lower() in w: wav_file = w if wav_file not in wav_files: print 'no matching wav file [%s]' %wav_file unmatched += 1 continue if ext in keys or len(keys)==0: fh.write('%s %s %s\n' %(wav_file, config, trans)) fh.close() print 'unmatched [%d]' %unmatched
def predikate(event, watcher): if predicate and not predicate(event, watcher): return False if not watcher.files: return True files = get_files(watcher.files) found = getattr(event, 'dest_path', None) in files return found or event.src_path in files
def loadAll(): for i in range(0,15): #Gets the highest numbered of all directories that are named after floating point values(i.e. most recent timestamp) name = util.getHighestNumberedTimeDirectory(directories.persistdir) possibledir = os.path.join(directories.persistdir,name) #__COMPLETE__ is a special file we write to the dump directory to show it as valid if '''__COMPLETE__''' in util.get_files(possibledir): with persistance_dict_connection_lock: for i in util.get_files(possibledir): with open(os.path.join(directories.persistdir,i)) as f: persistancedict[util.unurl(i)] = _PersistanceFile(json.load(f)) break #We sucessfully found the latest good ActiveModules dump! so we break the loop else: #If there was no flag indicating that this was an actual complete dump as opposed #To an interruption, rename it and try again shutil.copytree(possibledir,os.path.join(directories.persistdir,name+"INCOMPLETE")) shutil.rmtree(possibledir)
def listlogdumps(): where =os.path.join(directories.logdir,'dumps') logz = [] r = re.compile(r'^([0-9]*\.[0-9]*)\.json(\.gz|\.bz2)?$') for i in util.get_files(where): m = r.match(i) if not m == None: #Make time,fn,ext,size tuple logz.append((float(m.groups('')[0]), os.path.join(where,i),m.groups('Uncompressed')[1],os.path.getsize(os.path.join(where,i)))) return logz
def read_sents(paths): doc_sets = [] # multiple annotators for path in paths: doc_sets.append(sorted(util.get_files(path, r'.*'))) problems = [] for i in range(len(doc_sets[0])): filenames = [doc_set[i] for doc_set in doc_sets] read_news_doc(filenames, problems) print 'Load %d news (BN/WN) problems' % len(problems) return problems
def jade(files=None, obj=None, out=None, path=None, pretty=False, client=False, no_debug=False, watch=False): """ Compiles jade templates :param files: List of files or dict to pass to get_files for all files to run against :param obj: javascript options object :param out: output the compiled html to <dir> :param path: filename used to resolve includes :param pretty: compile pretty html output :param client: compile function for client-side runtime.js :param no_debug: compile without debugging (smaller functions) :param watch: watch files for changes and automatically re-render """ logger.debug("Running jade function. Files: " + str(files)) if files is None: raise Exception("Must run jade with files") if isinstance(files, dict): # Some sane defaults for jade if not specified files['only_files'] = True if 'recursive' not in files: files['recursive'] = True if 'match' not in files: files['match'] = ['*jade', ] files = get_files(files) compile_paths = ' '.join(['"{}"'.format(f) for f in files]) if not getattr(env, 'dry_run', False): opts = [] if obj: opts.append('-O "{}"'.format(obj)) if out: opts.append('-o "{}"'.format(out)) if path: opts.append('-p "{}"'.format(path)) if pretty: opts.append('-P') if client: opts.append('-c') if no_debug: opts.append('-D') if watch: opts.append('-w') build_run("jade {} {}".format(' '.join(opts), compile_paths), async=watch, keep_alive=1 if watch else 0) logger.info("Successfully compiled {} jade files".format(len(files))) logger.debug("Files compiled: \n" + "\n".join(files))
def setup_DUC_basic(task, skip_updates=False): """ task.topic_file: sgml file for DUC task.doc_path: path containing source documents task.manual_path: path for manual (human) summaries """ ## get all document data all_docs = {} files = util.get_files(task.doc_path, '\w{2,3}\d+[\.\-]\d+') sys.stderr.write('Loading [%d] files\n' % len(files)) for file in files: id = os.path.basename(file) all_docs[id] = file ## initialize problems problems = [] data = open(task.topic_file).read().replace('\n', ' ') topics = re.findall('<topic>.+?</topic>', data) sys.stderr.write('Setting up [%d] problems\n' % len(topics)) for topic in topics: id = util.remove_tags(re.findall('<num>.+?</num>', topic)[0])[:-1] title = util.remove_tags(re.findall('<title>.+?</title>', topic)[0]) narr = util.remove_tags(re.findall('<narr>.+?</narr>', topic)[0]) docsets = re.findall('<docs.*?>.+?</docs.*?>', topic) docsets = map(util.remove_tags, docsets) docsets = [d.split() for d in docsets] old_docs = [] for docset_index in range(len(docsets)): ## update naming convention different from main if len(docsets) > 1: id_ext = '-' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'[docset_index] else: id_ext = '' new_docs = [all_docs[doc] for doc in docsets[docset_index]] ## create a SummaryProblem problem = SummaryProblem(id + id_ext, title, narr, new_docs, old_docs) old_docs += new_docs ## include training data in problem if task.manual_path: problem._load_training(task.manual_path) problems.append(problem) ## skip updates? if skip_updates: break task.problems = problems
def colorAnalysis(): impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots" # args["ip"] compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites" # args["cp"] goodUris = [] with open("gooduris_20160225.txt", "r") as read: for uri in map(lambda line: line.rstrip("\n"), read): goodUris.append(uri) compisits = get_files(compath, lambda f: "allTheSame" not in f and check_if_goodURI(f, goodUris) and "interval" not in f) method_composites = defaultdict(dict) for comp in sorted(compisits): site = comp[comp.find("_") + 1:comp.rfind("_")] method_composites[comp[:comp.index("_")]][site] = comp # composite_only_histogram(method_composites,compath) files = get_and_process_thumbs(impath, method_composites, goodUris) print(type(files)) # print(method_composites) impath += "/" methods = {'random': MethodCompThums('random', impath, files["random"]), 'temporalInterval': MethodCompThums('temporalInterval', impath, files["temporalInterval"]), 'alSum': MethodCompThums('alSum', impath, files["alSum"])} # type: dict[str,MethodCompThums] alsum = methods['alSum'] out = {} # type: dict[str,dict[str,CompositeColorResulst]] for mname, method in methods.items(): print(mname,method) dcm = method.get_composite_dom_colors() out[mname] = dcm try: with open("colorResults2.json","w+") as wout: wout.write(json.dumps(out,indent=1,default=lambda x:x.to_jdic())) except TypeError as e: print("Wow bad thing happened",e) for k,v in out.items(): print("+++++++++++++++++++++++++++++++++++++++++++++++++") print(k) for site,ret in v.items(): print("site: ",site) for date,color in ret.results.items(): print(date,''.join(color))
def coffee(files=None, bare=False, lint=False, map=False, join=None, output=None, watch=False): """ Compiles coffeescript files :param files: List of files or dict to pass to get_files for all files to run against :param bare: output bare coffeescript :param lint: run through jslint :param map: generate source maps :param join: join together files to this output location (string path) :param output: directory to output files :param watch: True to watch the output and recompile on changes """ logger.debug("Running coffee function. Files: " + str(files)) if files is None: raise Exception("Must run coffee with files") if isinstance(files, dict): # Some sane defaults for coffeescript if not specified files['only_files'] = True if 'recursive' not in files: files['recursive'] = True if 'match' not in files: files['match'] = ['*coffee', ] files = get_files(files) compile_paths = ' '.join(['"{}"'.format(f) for f in files]) if not getattr(env, 'dry_run', False): opts = [] if bare: opts.append('-b') if join: opts.append('-j "{}"'.format(join)) if lint: opts.append('-l') if map: opts.append('-m') if watch: opts.append('-w') if output: opts.append('-o "{}"'.format(output)) build_run("coffee {} -c {}".format(' '.join(opts), compile_paths), async=watch, keep_alive=1 if watch else 0) logger.info("Successfully compiled {} coffeescript files".format(len(files))) logger.debug("Files compiled: \n" + "\n".join(files))
def parse_user_verify_files(): files = get_files("E:/取证/05.案件支持/上海/青浦反诈/dibaqu/第八区/实名认证", []) # 循环打印get_files函数返回的文件名列表 count = 0 user_infos = [] for file in sorted(files): # if file.find('1414.html') > 0: print(file) count = count + 1 print(count) parse_html(file, user_infos) # write_to_excel(file, app_infos) return user_infos
def setup_DUC_basic(task, skip_updates=False): """ task.topic_file: sgml file for DUC task.doc_path: path containing source documents task.manual_path: path for manual (human) summaries """ ## get all document data all_docs = {} files = util.get_files(task.doc_path, '\w{2,3}\d+[\.\-]\d+') sys.stderr.write('Loading [%d] files\n' %len(files)) for file in files: id = os.path.basename(file) all_docs[id] = file ## initialize problems problems = [] data = open(task.topic_file).read().replace('\n', ' ') topics = re.findall('<topic>.+?</topic>', data) sys.stderr.write('Setting up [%d] problems\n' %len(topics)) for topic in topics: id = util.remove_tags(re.findall('<num>.+?</num>', topic)[0])[:-1] title = util.remove_tags(re.findall('<title>.+?</title>', topic)[0]) narr = util.remove_tags(re.findall('<narr>.+?</narr>', topic)[0]) docsets = re.findall('<docs.*?>.+?</docs.*?>', topic) docsets = map(util.remove_tags, docsets) docsets = [d.split() for d in docsets] old_docs = [] for docset_index in range(len(docsets)): ## update naming convention different from main if len(docsets) > 1: id_ext = '-' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'[docset_index] else: id_ext = '' new_docs = [all_docs[doc] for doc in docsets[docset_index]] ## create a SummaryProblem problem = SummaryProblem(id+id_ext, title, narr, new_docs, old_docs) old_docs += new_docs ## include training data in problem if task.manual_path: problem._load_training(task.manual_path) problems.append(problem) ## skip updates? if skip_updates: break task.problems = problems
def create_background_dataset(dir, target, ext='jpg', type='otsu'): from util import get_files if not os.path.isdir(target): os.mkdir(target) for src, fname in get_files(dir, ext=ext): print src if type == 'otsu': bg_file = get_background_otsu(src) else: bg_file = get_background_sauvola(src) f_bg = os.path.join(target, fname) imsave(f_bg, bg_file)
def align_mappy(dir_in, file_out, file_fasta): a = mp.Aligner(file_fasta, preset='map-ont') # Load or build index if not a: raise Exception("ERROR: failed to load/build index") reads = get_files(dir_in) files_fastq = {} data = [] for read in tqdm(reads): with h5py.File(read, 'r', libver='latest') as fd: no_alignment = True fastq = read_fastq(fd) files_fastq[fastq.id] = len(fastq.seq) for hit in a.map(fastq.seq): # Traverse alignments if hit.is_primary: # Check if the alignment is primary # Reference for seq_record in SeqIO.parse(file_fasta, 'fasta'): ref = seq_record.seq[hit.r_st:hit.r_en] r_CG_num = len(re.findall(r'(CG)', str(ref))) # Query query = fastq.seq[hit.q_st:hit.q_en] if hit.strand == -1: query = mp.revcomp(query) q_CG_num = len(re.findall(r'(CG)', str(query))) no_alignment = False data.append([ fastq.id, hit.r_st, hit.r_en, hit.q_st, hit.q_en, r_CG_num, q_CG_num, hit.cigar_str ]) break if no_alignment: data.append([fastq.id, '', '', '', '', 0, 0, '']) data = pd.DataFrame(data, columns=[ 'read_id', 'r_st', 'r_en', 'q_st', 'q_en', 'r_CG_num', 'q_CG_num', 'cigar_str' ]) data.sort_values('read_id', inplace=True) data.to_csv(file_out, index=False) print("Average length of fastq files:", sum(files_fastq.values()) / len(files_fastq.values()))
def loadAll(): for i in range(0,15): #Gets the highest numbered of all directories that are named after floating point values(i.e. most recent timestamp) name = util.getHighestNumberedTimeDirectory(directories.moduledir) possibledir = os.path.join(directories.moduledir,name) #__COMPLETE__ is a special file we write to the dump directory to show it as valid if '''__COMPLETE__''' in util.get_files(possibledir): loadModules(possibledir) auth.importPermissionsFromModules() break #We sucessfully found the latest good ActiveModules dump! so we break the loop else: #If there was no flag indicating that this was an actual complete dump as opposed #To an interruption, rename it and try again shutil.copytree(possibledir,os.path.join(directories.moduledir,name+"INCOMPLETE")) shutil.rmtree(possibledir)
def loadModule(moduledir,path_to_module_folder): with modulesLock: #Make an empty dict to hold the module resources module = {} #Iterate over all resource files and load them for i in util.get_files(os.path.join(path_to_module_folder,moduledir)): try: f = open(os.path.join(path_to_module_folder,moduledir,i)) #Load the resource and add it to the dict. Resouce names are urlencodes in filenames. module[unurl(i)] = json.load(f) finally: f.close() name = unurl(moduledir) ActiveModules[name] = module bookkeeponemodule(name)
def analyse_CG(dir_in, file_out, file_fasta): a = mp.Aligner(file_fasta, preset='map-ont') # Load or build index if not a: raise Exception("ERROR: failed to load/build index") reads = get_files(dir_in) data = [] for read in tqdm(reads): with h5py.File(read, 'r', libver='latest') as fd: matches = {'M': 0, 'X': 0, 'D': 0, 'I': 0} CG_cnt = {'M': 0, 'X': 0, 'D': 0, 'I': 0} fastq = read_fastq(fd) ref = '' mapq = 0 for hit in a.map(fastq.seq, cs=True): # Traverse alignments if hit.is_primary: # Check if the alignment is primary # Alignment matches = count_matches(hit.cs) # Reference for seq_record in SeqIO.parse(file_fasta, 'fasta'): ref = seq_record.seq[hit.r_st: hit.r_en] # Query query = fastq.seq[hit.q_st: hit.q_en] if hit.strand == -1: query = mp.revcomp(query) # Normalize ref, query = normalize(ref, query, hit.cigar_str) # Analyse CG motif CG_cnt = count_CG(ref, query) mapq = hit.mapq break data.append([fastq.id, len(ref), matches['M'], matches['X'], matches['D'], matches['I'], CG_cnt['M'], CG_cnt['X'], CG_cnt['D'], CG_cnt['I'], mapq]) data = pd.DataFrame(data, columns=['read_id', 'alignment_len', 'M', 'X', 'D', 'I', 'M_CG', 'X_CG', 'D_CG', 'I_CG', 'mapq']) data.sort_values('read_id', inplace=True) data.to_csv(file_out, index=False)
def test_get_files(self): paths = get_files(dict(path='/tmp/test_fab_build/', recursive=False)) expected = ['/tmp/test_fab_build/test.txt', '/tmp/test_fab_build/test.py', '/tmp/test_fab_build/coffee', '/tmp/test_fab_build/1'] self.assertItemsEqual(paths, expected) paths = get_files(dict(path='/tmp/test_fab_build/', match=["*.py",], recursive=False)) expected = ['/tmp/test_fab_build/test.py'] self.assertItemsEqual(paths, expected) paths = get_files(dict(path='/tmp/test_fab_build/', ignore=["*.py",], recursive=False)) expected = ['/tmp/test_fab_build/test.txt', '/tmp/test_fab_build/coffee', '/tmp/test_fab_build/1'] self.assertItemsEqual(paths, expected) paths = get_files(dict(path='/tmp/test_fab_build/', only_files=True, recursive=False)) expected = ['/tmp/test_fab_build/test.txt', '/tmp/test_fab_build/test.py'] self.assertItemsEqual(paths, expected) paths = get_files(dict(path='/tmp/test_fab_build/')) expected = [ '/tmp/test_fab_build/1', '/tmp/test_fab_build/1/2', '/tmp/test_fab_build/1/2/3', '/tmp/test_fab_build/1/test', '/tmp/test_fab_build/test.txt', '/tmp/test_fab_build/test.py', '/tmp/test_fab_build/1/one.py', '/tmp/test_fab_build/1/2/two.py', '/tmp/test_fab_build/coffee', '/tmp/test_fab_build/coffee/test1.coffee', '/tmp/test_fab_build/coffee/test2.coffee', '/tmp/test_fab_build/coffee/test3.coffee', '/tmp/test_fab_build/1/2/3/three.py' ] self.assertItemsEqual(paths, expected) paths = get_files(dict(path='/tmp/test_fab_build/', recursive=True, only_files=True)) expected = [ '/tmp/test_fab_build/test.txt', '/tmp/test_fab_build/test.py', '/tmp/test_fab_build/1/one.py', '/tmp/test_fab_build/1/2/two.py', '/tmp/test_fab_build/1/2/3/three.py', '/tmp/test_fab_build/coffee/test1.coffee', '/tmp/test_fab_build/coffee/test2.coffee', '/tmp/test_fab_build/coffee/test3.coffee' ] self.assertItemsEqual(paths, expected)
def timit(path_wav, path_trans, output, config, test_speakers=None): ## transcription files trans_files = [f for f in util.get_files(path_trans, re.compile('\.txt', re.IGNORECASE)) if '/si' in f or '/sx' in f] if test_speakers != None: trans_files = [f for f in trans_files if f.split('/')[-2][1:] in test_speakers] print 'found transcription files [%d]' %len(trans_files) fh = open(output, 'w') for trans_file in trans_files: wav_file = trans_file.replace('.txt', '.wav') if not os.path.isfile(wav_file): continue trans = open(trans_file).read().lower() trans = ' '.join(trans.split()[2:]) trans = re.sub('[^a-z ]', '', trans) fh.write('%s %s %s\n' %(wav_file, config, trans)) fh.close()
def loadModule(moduledir, path_to_module_folder): with modulesLock: #Make an empty dict to hold the module resources module = {} #Iterate over all resource files and load them for i in util.get_files(os.path.join(path_to_module_folder, moduledir)): try: f = open(os.path.join(path_to_module_folder, moduledir, i)) #Load the resource and add it to the dict. Resouce names are urlencodes in filenames. module[unurl(i)] = json.load(f) finally: f.close() name = unurl(moduledir) ActiveModules[name] = module bookkeeponemodule(name)
def loadAll(): for i in range(0, 15): #Gets the highest numbered of all directories that are named after floating point values(i.e. most recent timestamp) name = util.getHighestNumberedTimeDirectory(directories.moduledir) possibledir = os.path.join(directories.moduledir, name) #__COMPLETE__ is a special file we write to the dump directory to show it as valid if '''__COMPLETE__''' in util.get_files(possibledir): loadModules(possibledir) auth.importPermissionsFromModules() break #We sucessfully found the latest good ActiveModules dump! so we break the loop else: #If there was no flag indicating that this was an actual complete dump as opposed #To an interruption, rename it and try again shutil.copytree( possibledir, os.path.join(directories.moduledir, name + "INCOMPLETE")) shutil.rmtree(possibledir)
def start(file_path_in, file_name_need_contain): """ 得到该文件夹下的所有包含指定字符串的html文件,并解析出每个文件中的链接,而后进行get请求,请求完后再解析 :param file_path_in: :param file_name_need_contain: :return: """ fs = util.get_files(file_path_in, file_name_need_contain) for f in fs: rf = util.open_file_r(f) # 打开文件流 if rf == -1: continue html_s = rf.read() # 读取该文件, html文件 rf.close() urls = list(set(get_navigation_req(html_s))) # 解析出链接 for url in urls: print(url) res = req_res(url) parse_result.parser(res)
def get_rulesets(ruledir, recurse): """ List of ruleset objects extracted from the yaml directory """ if os.path.isdir(ruledir) and recurse: yaml_files = [] for root, dirs, files in os.walk(ruledir): for name in files: filename, file_extension = os.path.splitext(name) if file_extension == '.yaml': yaml_files.append(os.path.join(root, name)) if os.path.isdir(ruledir) and not recurse: yaml_files = util.get_files(ruledir, 'yaml') elif os.path.isfile(ruledir): yaml_files = [ruledir] extracted_files = util.extract_yaml(yaml_files) rulesets = [] for extracted_yaml in extracted_files: rulesets.append(ruleset.Ruleset(extracted_yaml)) return rulesets
def get_mapqs(dir_in, file_fasta): a = mp.Aligner(file_fasta, preset='map-ont') # Load or build index if not a: raise Exception("ERROR: failed to load/build index") reads = get_files(dir_in) mapqs = [] for read in tqdm(reads): with h5py.File(read, 'r', libver='latest') as fd: fastq = read_fastq(fd) mapq = 0 for hit in a.map(fastq.seq): # Traverse alignments if hit.is_primary: # Check if the alignment is primary mapq = hit.mapq break mapqs.append(mapq) return mapqs
def long(): impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots" # args["ip"] compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites" # args["cp"] goodUris = [] origuris = [] with open("gooduris_20160225.txt", "r") as read: for uri in map(lambda line: line.rstrip("\n"), read): goodUris.append(uri) with open("origuris.txt", "r") as read: for uri in map(lambda line: line.rstrip("\n"), read): origuris.append(uri) compisits = get_files(impath, lambda f: filterASI(f) and check_if_goodURI(f, goodUris)) useragent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:44.0) Gecko/20100101 Firefox/44.01' session = requests.Session() session.headers.update({'User-Agent': useragent}) got = {} gotURIs = [] with open("tms2.json", "w+") as out: out.write("{ tms:[") for it in sorted(origuris): # tm = TimeMap("www.%s"%it) # print(tm.mementos) request = session.get("http://web.archive.org/web/timemap/json/www.%s" % it) # got[it] = {"tmuri":"http://web.archive.org/web/timemap/json/www.%s"%it,'uri':it,"tms":json.loads( # request.text)} try: got = json.loads(request.text) jsn = json.dumps(got) print(jsn + "\n") out.write("[" + jsn + "],\n") gotURIs.append(it) except ValueError: print(request.text) print(request.headers) print("\n\n") session.close()
def run(p_method, p_normalise = True, p_reverse_results = True): all_dirs = current.train + [current.test] for d in all_dirs: vectors[d] = {} util.read_files(util.get_files(dataset.DATA_DIRECTORY + d), d, vectors) if p_normalise: util.normalise(vectors) if p_method == Method.DOT_PRODUCT: dot.compute_dot_product(current, vectors, results) elif p_method == Method.DIRICHLET: bayes.calculate(current, vectors, results) elif p_method == Method.DIFFERENCE: dot.compute_difference(current, vectors, results) elif p_method == Method.PEARSON: pearson.compute(current, vectors, results) #bayes.cal(current, vectors, results) util.print_results(results, p_reverse_results, decimal_numbers)
def run(p_method, p_normalise=True, p_reverse_results=True): all_dirs = current.train + [current.test] for d in all_dirs: vectors[d] = {} util.read_files(util.get_files(dataset.DATA_DIRECTORY + d), d, vectors) if p_normalise: util.normalise(vectors) if p_method == Method.DOT_PRODUCT: dot.compute_dot_product(current, vectors, results) elif p_method == Method.DIRICHLET: bayes.calculate(current, vectors, results) elif p_method == Method.DIFFERENCE: dot.compute_difference(current, vectors, results) elif p_method == Method.PEARSON: pearson.compute(current, vectors, results) #bayes.cal(current, vectors, results) util.print_results(results, p_reverse_results, decimal_numbers)
def concat(files=None, output=None, banner=None, line_sep=None, clean=True): """ Concatenates together files and writes them to output :param files: List of files or dict to pass to get_files for all files to run against :param output: the location to output the resulting file :param banner: A top banner string to add to the file :param line_sep: The character with which the files will be concatenated :param clean: If True, removes the output file before running concat. """ logger.debug("Running concat function. Files: " + str(files)) if files is None or output is None: raise Exception("Must run concat with files and output location") if clean and os.path.exists(output): os.remove(output) line_sep = line_sep or "\n" files = get_files(files) if not getattr(env, 'dry_run', False): out_data = banner or "" for path in files: if not os.path.exists(path) or not os.path.isfile(path): continue with open(path, 'r') as f: out_data = line_sep.join([out_data, f.read()]) with open(output, 'w') as f: f.write(out_data) logger.info("Successfully concatenated {} files/folders to {}".format(len(files), output)) logger.debug("Files/Folders concatenated: \n" + "\n".join(files))
def fisher(path_wav, path_trans, output): ## transcription files trans_files = util.get_files(path_trans, re.compile('\.txt', re.IGNORECASE)) trans_files = [f for f in trans_files if 'bbn' not in f] #print 'found transcription files [%d]' %len(trans_files) fh = open(output, 'w') file_count = 0 token_count = 0 word_list = set() for file in trans_files: file_count += 1 if file_count % 100 == 0: print 'transcription files processed [%d]' %file_count id = os.path.basename(file).replace('.txt', '') for line in open(file): line = line.strip() if not line: continue if line.startswith('#'): continue words = map(str.upper, ' '.join(line.split(':')[1:]).split()) if not words: continue fh.write(' '.join(words) + '\n') fh.close()
def setup_TAC08(task): """ task.topic_file: xml file for TAC task.doc_path: path containing source documents task.manual_path: path for manual (human) summaries """ ## get all document data all_docs = {} files = util.get_files(task.doc_path, r'[^_]+_[^_]+_\d+[\.\-]\d+') sys.stderr.write('Loading [%d] files\n' % len(files)) for file in files: id = os.path.basename(file) all_docs[id] = file ## initialize problems problems = [] # load XML task definition from xml.etree import ElementTree root = ElementTree.parse(task.topic_file).getroot() for topic in root: if topic.tag != "topic": continue id = topic.attrib["id"] title = None narr = None docsets = [] docset_ids = [] for node in topic: if node.tag == "title": title = node.text.strip() elif node.tag == "narrative": narr = node.text.strip() elif node.tag == "docsetA": documents = node.findall("doc") docsets.append([doc.attrib["id"] for doc in documents]) docset_ids.append(node.attrib["id"]) elif node.tag == "docsetB": documents = node.findall("doc") docsets.append([doc.attrib["id"] for doc in documents]) docset_ids.append(node.attrib["id"]) old_docs = [] old_problems = [] for docset_index in range(len(docsets)): ## map docids to documents new_docs = [all_docs[doc] for doc in docsets[docset_index]] ## create a SummaryProblem problem = SummaryProblem(docset_ids[docset_index], title, narr, new_docs, old_docs, old_problems[:]) old_docs += new_docs old_problems.append(problem) ## include training data in problem if task.manual_path: problem._load_training(task.manual_path) problems.append(problem) sys.stderr.write('Setting up [%d] problems\n' % len(problems)) task.problems = problems
def build(PROBLEM, PATHS): print("") print("********************************************************************************") print("") print(" BHLIGHT BUILD SCRIPT") print("") print(" OPTIONS:") print(" -help (print this message and exit)") print(" -debug (use debug compile params)") print(" -force (do not abort upon compile-time warnings/errors)") print(" -noclean (do not delete old source files)") print(" -noparam (do not create new parameter file)") print(" -dir /path/to/target (create target dir and build there)") print("") print("********************************************************************************") print("") if is_user_help(): sys.exit() # PROCESS USER INPUT DEBUG = is_user_debug() FORCE = is_user_force() MOVEEXEC = set_dirs(PATHS) NOPARAM = is_user_noparam() NOCLEAN = is_user_noclean() CLEAN = not NOCLEAN WRITE_PARAM = not NOPARAM KEEP_SRC = '-src' in sys.argv REMOVE_SRC = not KEEP_SRC # get version VERSION = get_version() # PRINT TO TERMINAL AND LOGFILE LOGFILE = os.path.join(PATHS['BUILD'], 'log_build') util.log_output(sys, LOGFILE) # SEARCH FOR MACHINE host = get_host(PATHS) if DEBUG and 'DEBUG_FLAGS' not in host.keys(): util.warn("Debug compiler options not set! Using normal compiler flags.") host['DEBUG_FLAGS'] = host['COMPILER_FLAGS'] C_FLAGS = '-std=c99 ' if 'MEM_MODEL' in host: C_FLAGS += "-mcmodel=" + host['MEM_MODEL'] + ' ' else: C_FLAGS += "-mcmodel=medium " if DEBUG: C_FLAGS += host['DEBUG_FLAGS'] else: C_FLAGS += host['COMPILER_FLAGS'] # MATH AND DYNAMIC LINKING LIB_FLAGS = '-lm -ldl' LIBRARIES = '' INCLUDES = '' # GSL if 'GSL_DIR' not in host: host['GSL_DIR'] = '' host['GSL_DIR'] = util.sanitize_path(host['GSL_DIR']) LIB_FLAGS += (' -lgsl -lgslcblas' + ' -Wl,-rpath=' + host['GSL_DIR'] + 'lib/') LIBRARIES += '-L' + host['GSL_DIR'] + 'lib/' INCLUDES += '-I' + host['GSL_DIR'] + 'include/' # MPI if 'MPI_DIR' in host: host['MPI_DIR'] = util.sanitize_path(host['MPI_DIR']) LIB_FLAGS += (' -Wl,-rpath=' + host['MPI_DIR'] + 'lib/') LIBRARIES += ' -L' + host['MPI_DIR'] + 'lib/' INCLUDES += ' -I' + host['MPI_DIR'] + 'include/' # HDF5 if 'HDF5_DIR' in host: host['HDF5_DIR'] = util.sanitize_path(host['HDF5_DIR']) LIB_FLAGS += (' -lhdf5_hl -lhdf5' +' -Wl,-rpath=' + host['HDF5_DIR'] + 'lib/') LIBRARIES += ' -L' + host['HDF5_DIR'] + 'lib/' INCLUDES += ' -I' + host['HDF5_DIR'] + 'include/' print(" CONFIGURATION\n") set_cparm("VERSION", '"{}"'.format(VERSION)) print_config("VERSION", VERSION) print_config("MACHINE", host['NAME']) print_config("PROBLEM", PROBLEM) print_config("BUILD DIR", PATHS['BUILD']) print_config("COMPILER", host['COMPILER']) print_config("GSL_DIR", host['GSL_DIR']) if 'MPI_DIR' in host: print_config("MPI_DIR", host['MPI_DIR']) if 'HDF5_DIR' in host: print_config("HDF5_DIR", host['HDF5_DIR']) if 'EXECUTABLE' in host: print_config("EXECUTABLE", host['EXECUTABLE']) if 'MPI_EXECUTABLE' in host: print_config("MPI_EXECUTABLE", host['MPI_EXECUTABLE']) print_config("C_FLAGS", C_FLAGS) print_config("LIB_FLAGS", LIB_FLAGS) print_config("LIBRARIES", LIBRARIES) print_config("INCLUDES", INCLUDES) print_config("OPENMP", CPARMS['OPENMP']) print("\n COMPILE-TIME PARAMETERS\n") print_config("N1TOT", CPARMS['N1TOT']) print_config("N2TOT", CPARMS['N2TOT']) print_config("N3TOT", CPARMS['N3TOT']) print_config("N1CPU", CPARMS['N1CPU']) print_config("N2CPU", CPARMS['N2CPU']) print_config("N3CPU", CPARMS['N3CPU']) print_config("METRIC", CPARMS['METRIC']) print_config("RECONSTRUCTION", CPARMS['RECONSTRUCTION']) if util.parm_is_active(CPARMS, 'EOS'): print_config("EOS", CPARMS['EOS']) else: set_cparm("EOS", 'EOS_TYPE_GAMMA') if util.parm_is_active(CPARMS, 'RADIATION'): print_config("RADIATION", CPARMS['RADIATION']) if util.parm_is_active(CPARMS, 'NU_BINS'): print_config('NU_BINS', CPARMS['NU_BINS']) else: set_cparm("NU_BINS", 200) if util.parm_is_active(CPARMS, 'NTH'): print_config("NTH", CPARMS["NTH"]) else: set_cparm("NTH", 8) if util.parm_is_active(CPARMS, 'NPHI'): print_config("NPHI", CPARMS["NPHI"]) else: set_cparm("NPHI", 8) if util.parm_is_active(CPARMS, "NU_BINS_SPEC"): print_config("NU_BINS_SPEC", CPARMS["NU_BINS_SPEC"]) else: set_cparm("NU_BINS_SPEC", 200) if util.parm_is_active(CPARMS, "BURROWS_OPACITIES"): print_config("BURROWS_OPACITIES", CPARMS["BURROWS_OPACITIES"]) else: set_cparm("BURROWS_OPACITIES", 0) if util.parm_is_active(CPARMS, "HDF5_OPACITIES"): print_config("HDF5_OPACITIES", CPARMS["HDF5_OPACITIES"]) else: set_cparm("HDF5_OPACITIES", 0) else: set_cparm("RADIATION", 0) if util.parm_is_active(CPARMS, 'ELECTRONS'): print_config("ELECTRONS", CPARMS['ELECTRONS']) else: set_cparm("ELECTRONS", 0) if util.parm_is_active(CPARMS,'NVAR_PASSIVE'): print_config("NVAR_PASSIVE", CPARMS["NVAR_PASSIVE"]) else: set_cparm("NVAR_PASSIVE", 0) if util.parm_is_active(CPARMS, 'GAMMA_FALLBACK'): print_config('GAMMA_FALLBACK', CPARMS['GAMMA_FALLBACK']) else: set_cparm('GAMMA_FALLBACK', 0) if util.parm_is_active(CPARMS, 'EXIT_ON_INIT'): print_config('EXIT_ON_INIT', CPARMS['EXIT_ON_INIT']) else: set_cparm('EXIT_ON_INIT', 0) if util.parm_is_active(CPARMS, 'OUTPUT_EOSVARS'): print_config("OUTPUT_EOSVARS", CPARMS["OUTPUT_EOSVARS"]) else: set_cparm("OUTPUT_EOSVARS", 0) if CPARMS['RADIATION']: if 'EXPTAU_WEIGHTS' in CPARMS.keys(): print_config('EXPTAU_WEIGHTS', CPARMS['EXPTAU_WEIGHTS']) else: set_cparm('EXPTAU_WEIGHTS', 1) if util.parm_is_active(CPARMS,'ELECTRONS')\ and CPARMS['EOS'] != 'EOS_TYPE_GAMMA': raise ValueError("ELECTRONS only compatible with Gamma law EOS.\n" +"Please set EOS = EOS_TYPE_GAMMA.\n") if CPARMS['EOS'] == 'EOS_TYPE_TABLE' and CPARMS['NVAR_PASSIVE'] < 2: raise ValueError("Tabulated EOS requires at least two passive scalars\n" +"for the electron fraction Ye.\n" +"Please set NVAR_PASSIVE >= 2\n" +"and ensure your problem generator sets it appropriately.\n") if CPARMS['EOS'] == 'EOS_TYPE_TABLE' \ and CPARMS['METRIC'] == 'MKS' \ and CPARMS['NVAR_PASSIVE'] < 3: raise ValueError("Tabulated EOS and MKS metric requires at least three\n" +"passive scalars, for Ye and atmosphere markers.\n" +"Please set NVAR_PASSIVE >= 3\n" +"and ensure your problem generator sets it appropriately.\n") if util.parm_is_active(CPARMS,'ESTIMATE_THETAE') \ and (CPARMS['RADIATION'] == 'RADTYPE_NEUTRINOS'): raise ValueError("Neutrinos not compatible " +"with estimating electron temperature.") if CPARMS['EOS'] == 'EOS_TYPE_POLYTROPE': util.gentle_warn("The polytropic EOS is totally untested. " +"Use at your own risk!\n") if CPARMS['EOS'] != 'EOS_TYPE_GAMMA' and not CPARMS['OUTPUT_EOSVARS']: util.gentle_warn("Setting OUTPUT_EOSVARS = True.") set_cparm("OUTPUT_EOSVARS", 1) print_config("OUTPUT_EOSVARS", CPARMS["OUTPUT_EOSVARS"]) NEED_UNITS = (util.parm_is_active(CPARMS, 'RADIATION') or util.parm_is_active(CPARMS, 'COULOMB') or CPARMS['EOS'] == 'EOS_TYPE_TABLE') if util.parm_is_active(CPARMS, 'FLATEMISS'): if not util.parm_is_active(CPARMS, 'EMISSTYPE_FLAT'): util.gentle_warn("Flatemiss active, but not emission type.\n" +"Setting EMISSTYPE_FLAT = ANTINU_ELECTRON.\n") set_cparm("EMISSTYPE_FLAT", "ANTINU_ELECTRON") if util.parm_is_active(CPARMS, 'RADIATION') \ and 'X1R_RAD_BOUND' in CPARMS.keys(): if CPARMS['X1R_RAD_BOUND'] == 'BC_CAMERA' \ and CPARMS['METRIC'] == 'MINKOWSKI': util.warn("X1R_RAD_BOUND BC_CAMERA is " +"not supported for Minkowski metrics.") print("\n EXTRA PARAMETERS\n") for k,v in REPVARS.items(): print_config(k,v) # Set core runtime parameters set_rparm('tf', 'double') set_rparm('dt', 'double') if CPARMS['METRIC'] == 'MINKOWSKI': set_rparm('x1Min', 'double', default = 0.) set_rparm('x1Max', 'double', default = 1.) set_rparm('x2Min', 'double', default = 0.) set_rparm('x2Max', 'double', default = 1.) set_rparm('x3Min', 'double', default = 0.) set_rparm('x3Max', 'double', default = 1.) if CPARMS['METRIC'] == 'MKS': set_rparm('a', 'double', default = 0.5) set_rparm('hslope', 'double', default = 0.3) set_rparm('poly_xt', 'double', default = 0.82) set_rparm('poly_alpha', 'double', default = 14.) set_rparm('mks_smooth', 'double', default = 0.5) set_rparm('Rout', 'double', default = 40.) set_rparm('Rout_vis', 'double', default = 40.) #if util.parm_is_active(CPARMS, 'RADIATION'): # set_rparm('Rout_rad', 'double') if NEED_UNITS: if CPARMS['METRIC'] == 'MINKOWSKI': set_rparm('L_unit', 'double') set_rparm('M_unit', 'double') if CPARMS['METRIC'] == 'MKS': set_rparm('M_unit', 'double') set_rparm('mbh', 'double', default = 1.989e34) if CPARMS['EOS'] == 'EOS_TYPE_GAMMA': set_rparm('gam', 'double', default = 5./3.) set_rparm('cour', 'double', default = 0.9) if util.parm_is_active(CPARMS, 'RADIATION'): set_rparm('cour_cool', 'double', default = 0.25) if util.parm_is_active(CPARMS, 'ELECTRONS'): set_rparm('game', 'double', default = 4./3.) set_rparm('gamp', 'double', default = 5./3.) set_rparm('fel0', 'double', default = 0.01) set_rparm('tptemin', 'double', default = 1.e-3) set_rparm('tptemax', 'double', default = 1.e3) if util.parm_is_active(CPARMS, 'RADIATION'): if not util.parm_is_active(CPARMS, 'ELECTRONS'): set_rparm('tp_over_te', 'double', default = 1.) set_rparm('nph_per_proc', 'double', default = 1.e5) set_rparm('numin', 'double', default = 1.e8) set_rparm('numax', 'double', default = 1.e20) set_rparm('tune_emiss', 'double', default = 1.) set_rparm('tune_scatt', 'double', default = 1.) set_rparm('t0_tune_emiss', 'double', default = -1.) set_rparm('t0_tune_scatt', 'double', default = -1.) set_rparm('thetae_max', 'double', default = 1.e3) set_rparm('sigma_max', 'double', default = 1.) set_rparm('kdotk_tol', 'double', default = 1.e-6) set_rparm('Nph_to_track', 'double', default = 0.); set_rparm('thbin', 'int', default = 8); set_rparm('phibin', 'int', default = 8); if util.parm_is_active(CPARMS, 'FLATEMISS'): set_rparm('cnu_flat', 'double', default = 1.) set_rparm('init_from_grmhd', 'string', default = 'No') set_rparm('DTd', 'double', default = 0.5) set_rparm('DTl', 'double', default = 0.1) set_rparm('DTr', 'double', default = 1000.) set_rparm('DNr', 'integer', default = 1024) set_rparm('DTp', 'integer', default = 100) set_rparm('DTf', 'integer', default = 1) set_rparm('outputdir', 'string', default = './') # some fixes for fortran compilation USE_FORTRAN = util.parm_is_active(CPARMS,'BURROWS_OPACITIES') FORT_USE_MPI = (CPARMS['N1CPU'] > 1 or CPARMS['N2CPU'] > 1 or CPARMS['N3CPU'] > 1) FORT_USE_MPI_STR = 'TRUE' if FORT_USE_MPI else 'FALSE' if CPARMS['N1TOT'] > 1 and CPARMS['N2TOT'] > 1 and CPARMS['N2TOT'] > 1: FORT_NDIM=3 elif ((CPARMS['N1TOT'] > 1 and CPARMS['N2TOT'] > 1) or (CPARMS['N1TOT'] > 1 and CPARMS['N3TOT'] > 1) or (CPARMS['N2TOT'] > 1 and CPARMS['N3TOT'] > 1)): FORT_NDIM=2 else: FORT_NDIM=1 if USE_FORTRAN: # -lgfortran for gcc -lifcore -limf for icc LIB_FLAGS += ' ' + host['FORTLINK'] if len(host['FORTLIB']) > 0: LIBRARIES += ' -L' + host['FORTLIB'] if DEBUG: if 'FDEBUG_FLAGS' not in host.keys(): util.warn("Fortran debug options not set! Using normal fortran flags.") host['FDEBUG_FLAGS'] = host['FCFLAGS'] FCFLAGS = host['FDEBUG_FLAGS'] else: FCFLAGS = host['FCFLAGS'] FCFLAGS += (' -DUSE_MPI=' + FORT_USE_MPI_STR + ' -DNDIM=' + str(FORT_NDIM)) # GET ALL SOURCE FILES SRC_CORE = util.get_files(PATHS['CORE'], '*.c') INC_CORE = util.get_files(PATHS['CORE'], '*.h') if USE_FORTRAN: F90_CORE = util.get_files(PATHS['CORE'], '*.f90') else: F90_CORE = [] SRC_PROB = util.get_files(PATHS['PROB'], '*.c') INC_PROB = util.get_files(PATHS['PROB'], '*.h') # Clean if necessary if CLEAN: util.make_clean(PATHS['SRC']) # COPY SOURCE FILES TO BUILD_DIR for src in SRC_CORE: call(['cp', src, PATHS['SRC'] + src.rsplit('/',1)[1]]) for inc in INC_CORE: call(['cp', inc, PATHS['SRC'] + inc.rsplit('/',1)[1]]) if USE_FORTRAN: for src in F90_CORE: call(['cp', src, PATHS['SRC'] + src.rsplit('/',1)[1]]) for src in SRC_PROB: call(['cp', src, PATHS['SRC'] + src.rsplit('/',1)[1]]) for inc in INC_PROB: call(['cp', inc, PATHS['SRC'] + inc.rsplit('/',1)[1]]) # WRITE PARAMETERS FILE pf = open(PATHS['SRC'] + 'params.h', 'w') for KEY in CPARMS: if isinstance(CPARMS[KEY], str): pf.write("#define " + KEY + " (" + CPARMS[KEY] + ")\n") else: # True/False autocast to 1/0. pf.write("#define " + KEY + " (%g)\n" % CPARMS[KEY]) pf.close() # GET SINGLE LISTS OF ALL SOURCE, OBJECT, AND HEADER FILES SRC_ALL = util.get_files(PATHS['SRC'], '*.c') INC_ALL = util.get_files(PATHS['SRC'], '*.h') SRC = '' OBJ = '' INC = '' for n in range(len(SRC_ALL)): SRC += '%s ' % os.path.basename(SRC_ALL[n]) OBJ += '%s.o ' % os.path.basename(SRC_ALL[n])[:-2] for n in range(len(INC_ALL)): INC += '%s ' % os.path.basename(INC_ALL[n]) if USE_FORTRAN: F90 = '' FOBJ = '' for src in F90_CORE: F90 += '%s ' % os.path.basename(src) FOBJ += '%s.o ' % os.path.basename(src)[:-4] # WRITE MAKEFILE os.chdir(PATHS['SRC']) mf = open('makefile', 'w') mf.write('CC = ' + host['COMPILER'] + '\n') if USE_FORTRAN: mf.write('F90 = ' + host['FORTRAN_COMP'] + '\n') mf.write('CCFLAGS = ' + C_FLAGS + ' ' + LIBRARIES + ' ' + INCLUDES + '\n') if USE_FORTRAN: mf.write('FCFLAGS = ' + FCFLAGS + '\n') mf.write('LIB_FLAGS = ' + LIB_FLAGS + '\n') mf.write('CC_COMPILE = $(CC) $(CCFLAGS) -c' + '\n') mf.write('CC_LOAD = $(CC) $(CCFLAGS)' + '\n') if USE_FORTRAN: mf.write('FSRC = ' + F90 + '\n') mf.write('FOBJ = ' + FOBJ + '\n') else: mf.write('FSRC = \n') mf.write('FOBJ = \n') mf.write('SRC = ' + SRC + '\n') mf.write('OBJ = ' + OBJ + '\n') mf.write('INC = ' + INC + '\n') mf.write('EXE = bhlight' + '\n') mf.write('.c.o:' + '\n') mf.write('\t$(CC_COMPILE) $*.c' + '\n') if USE_FORTRAN: mf.write('%.o: %.f90 makefile' + '\n') mf.write('\t$(F90) $(FCFLAGS) -c $<\n') mf.write('all: $(EXE)' + '\n') mf.write('$(OBJ): $(INC) makefile' + '\n') mf.write('$(EXE): $(OBJ) $(FOBJ) $(INC) makefile' + '\n') mf.write('\t$(CC_LOAD) $(OBJ) $(FOBJ) $(LIB_FLAGS) -o $(EXE)\n') mf.write('clean:\n') mf.write('\t$(RM) $(SRC) $(FSRC) $(OBJ) $(FOBJ) $(EXE) $(INC)\n') mf.close() print("\n COMPILING SOURCE\n") ncomp = 0 first_error = 1 if DEBUG: popen = subprocess.Popen(['make'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) else: popen = subprocess.Popen(['make','-j','10'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) for stdout_line in iter(popen.stdout.readline, ""): if stdout_line.rstrip()[-2:] == '.c' or stdout_line.rstrip()[-4:] == '.f90': print(" [" + util.color.BOLD + util.color.BLUE + "%2d%%" % (100.*float(ncomp)/len(SRC_ALL+F90_CORE)) + util.color.NORMAL + "] " + util.color.BOLD + stdout_line.rsplit(' -c ',1)[1].rstrip().lstrip().split('/')[-1] + util.color.NORMAL) ncomp += 1 for stderr_line in iter(popen.stderr.readline, ""): # THIS ALSO FAILS FOR WARNINGS!!! if first_error == 1: util.warn("COMPILER ERROR") first_error = 0 print(stderr_line.rstrip()) if first_error != 1 and not FORCE: util.warn("COMPILATION FAILED") sys.exit() obj_files = util.get_files(PATHS['SRC'], '*.o') for f in obj_files: os.remove(f) os.rename(PATHS['SRC'] + 'bhlight', PATHS['BUILD'] + 'bhlight') if REMOVE_SRC: import shutil shutil.rmtree(PATHS['SRC']) print("\n BUILD SUCCESSFUL") # CREATE RUNTIME PARAMETERS FILE PARAMFILE = PATHS['BUILD'] + PARAM_NAME if WRITE_PARAM: with open(PARAMFILE, 'w') as pf: pf.write("### RUNTIME PARAMETERS ###\n") pf.write("\n# COORDINATES\n") write_rparm(pf, 'tf') write_rparm(pf, 'dt') if CPARMS['METRIC'] == 'MINKOWSKI': write_rparm(pf, 'x1Min') write_rparm(pf, 'x1Max') write_rparm(pf, 'x2Min') write_rparm(pf, 'x2Max') write_rparm(pf, 'x3Min') write_rparm(pf, 'x3Max') if CPARMS['METRIC'] == 'MKS': write_rparm(pf, 'Rout') if util.parm_is_active(CPARMS, 'RADIATION'): write_rparm(pf, 'Rout_rad') if NEED_UNITS: pf.write("\n# UNITS\n") if CPARMS['METRIC'] == 'MINKOWSKI': write_rparm(pf, 'L_unit') write_rparm(pf, 'M_unit') if CPARMS['METRIC'] == 'MKS': write_rparm(pf, 'mbh') write_rparm(pf, 'M_unit') pf.write("\n# FLUID\n") write_rparm(pf, 'cour') if util.parm_is_active(CPARMS, 'RADIATION'): write_rparm(pf, 'cour_cool') if CPARMS['EOS'] == 'EOS_TYPE_GAMMA': write_rparm(pf, 'gam') if CPARMS['EOS'] == 'EOS_TYPE_TABLE': write_rparm(pf, 'eospath') if util.parm_is_active(CPARMS, 'ELECTRONS'): pf.write("\n# ELECTRONS\n") write_rparm(pf, 'game') write_rparm(pf, 'gamp') write_rparm(pf, 'fel0') write_rparm(pf, 'tptemin') write_rparm(pf, 'tptemax') if util.parm_is_active(CPARMS, 'RADIATION'): pf.write("\n# RADIATION\n") if not util.parm_is_active(CPARMS, 'ELECTRONS'): write_rparm(pf, 'tp_over_te') write_rparm(pf, 'nph_per_proc') write_rparm(pf, 'numin') write_rparm(pf, 'numax') write_rparm(pf, 'tune_emiss') write_rparm(pf, 'tune_scatt') write_rparm(pf, 't0_tune_emiss') write_rparm(pf, 't0_tune_scatt') write_rparm(pf, 'thetae_max') write_rparm(pf, 'sigma_max') write_rparm(pf, 'kdotk_tol') write_rparm(pf, 'Nph_to_track') write_rparm(pf, 'thbin') write_rparm(pf, 'phibin') if util.parm_is_active(CPARMS, 'BURROWS_OPACITIES'): write_rparm(pf, 'opac_param_file') write_rparm(pf, 'opac_file') if util.parm_is_active(CPARMS, 'HDF5_OPACITIES'): write_rparm(pf, 'opac_file') write_rparm(pf, 'init_from_grmhd') pf.write("\n# OUTPUT\n") write_rparm(pf, 'DTd') write_rparm(pf, 'DTl') write_rparm(pf, 'DTr') write_rparm(pf, 'DNr') write_rparm(pf, 'DTp') write_rparm(pf, 'DTf') write_rparm(pf, 'outputdir') if len(RPARMS.keys()) > 0: pf.write("\n# PROBLEM\n") prob_keys = RPARMS.keys() for key in list(prob_keys): write_rparm(pf, key) print("\n RUNTIME PARAMETER FILE CREATED") if MOVEEXEC: os.rename(PATHS['BUILD'] + 'bhlight', PATHS['BUILD'] + '../bhlight') if WRITE_PARAM: os.rename(PATHS['BUILD'] + PARAM_NAME, PATHS['BUILD'] + '../' + PARAM_NAME) print("") sys.exit()
def initializeAuthentication(): #If no file use default but set filename anyway so the dump function will work for i in range(0,15): #Gets the highest numbered of all directories that are named after floating point values(i.e. most recent timestamp) name = util.getHighestNumberedTimeDirectory(directories.usersdir) possibledir = os.path.join(directories.usersdir,name) #__COMPLETE__ is a special file we write to the dump directory to show it as valid if '''__COMPLETE__''' in util.get_files(possibledir): try: f = open(os.path.join(possibledir,'users.json')) temp = json.load(f) f.close() except: p = "same" p2 = "different" while not p == p2: p = "same" p2 = "different" p = input("No users list found. Account 'admin' created. Choose password:"******"Reenter Password:"******"password mismatch") m = hashlib.sha256() r = os.urandom(16) r64 = base64.b64encode(r) m.update(p) m.update(r) pwd = base64.b64encode(m) temp = { "groups": { "Administrators": { "permissions": [ "/admin/settings.edit", "/admin/logging.edit", "/admin/users.edit", "/users/pagelisting.view", "/admin/modules.edit", "/admin/settings.view", "/admin/mainpage.view", "/users/logs.view", "/admin/modules.view" ] } }, "users": { "admin": { "groups": [ "Administrators" ], "password": pwd, "username": "******", "salt": r64 } } } global Users Users = temp['users'] global Groups Groups = temp['groups'] global Tokens Tokens = {} for user in Users: #What an unreadable line! It turs all the dicts in Users into User() instances Users[user] = User(Users[user]) assignNewToken(user) generateUserPermissions() break #We sucessfully found the latest good users.json dump! so we break the loop else: #If there was no flag indicating that this was an actual complete dump as opposed #To an interruption, rename it and try again shutil.copytree(possibledir,os.path.join(directories.usersdir,name+"INCOMPLETE")) shutil.rmtree(possibledir)
'grain_cover': 4 } # extracting features of grains feat_data = result_dir + 'grain_feature.pkl' if os.path.isfile(feat_data): ftrain, y_train, ftest, y_test = pickle.load(open(feat_data, 'rb')) else: grain_particles = { 'damaged_grain': data_dir + 'damaged_grain', 'foreign': data_dir + 'foreign_particles', 'grain': data_dir + 'grain', 'broken_grain': data_dir + 'grain_broken', 'grain_cover': data_dir + 'grain_covered' } grain_partical_list = { k: get_files(v) for k, v in grain_particles.items() } # impurity_list = get_files(impure) # grain_list = get_files(grain) all_partical = [] for i in grain_partical_list: all_partical += grain_partical_list[i] partical_classes = [] for i in grain_partical_list: a = np.zeros(len(grain_class)) a[grain_class[i]] = 1 partical_classes += [a for j in range(len(grain_partical_list[i]))] # out = [[1, 0] for i in range(len(grain_list))] + [[0, 1] for i in range(len(impurity_list))] x_train, y_train, x_test, y_test = make_sets(all_partical, partical_classes, 0.3)
#coding:utf8 from ConfigParser import ConfigParser import pymongo from util import get_files __author__ = 'cy' cf = ConfigParser() cf.read('dev.cfg') city_num = int(cf.get('calculate', 'city_num')) predict_hour = int(cf.get('calculate', 'predict_hour')) file_path = cf.get('file', 'filepath') start_time = cf.get('file', 'start_time') end_time = cf.get('file', 'end_time') gaps = eval(cf.get('calculate', 'gaps')) all_files = (get_files(start_time, end_time)) day_gaps=eval(cf.get('calculate','day_gaps')) mongo_host=cf.get('mongodb','host') mongo_port=int(cf.get('mongodb','port')) mongo_db_name=cf.get('mongodb','db_name') client=pymongo.MongoClient(mongo_host,mongo_port) db=client.get_database(mongo_db_name) bins = eval(cf.get('calculate', 'bins')) collection_name=cf.get('mongodb','collection')
def decode_to_lattices(model, output_dir, model_dir, mfc_list, lm, dict, model_list, gold_mlf): sys.stderr.write('Decoding to lattices\n') output_mlf = '%s/train_recog.mlf' % output_dir results_log = '%s/results.log' % output_dir ## Create a config file to use with HDecode hdecode_config = '%s/hdecode.config' % output_dir fh = open(hdecode_config, 'w') #fh.write('HLANGMODFILTER = "gunzip -c $.gz"\n') fh.write('HNETFILTER = "gunzip -c < $.gz"\n') fh.write('HNETOFILTER = "gzip -c > $.gz"\n') fh.write('RAWMITFORMAT = T\n') fh.write('HPARM: TARGETKIND = MFCC_0_D_A_Z\n') fh.write('GCFREQ = 50\n') fh.write('HLAT:TRACE = 19\n') fh.write('HLVNET:TRACE = 1\n') fh.write('HLVREC:TRACE = 1\n') fh.write('HLVLM:TRACE = 1\n') fh.write('LATPRUNEBEAM = 500.0\n') fh.write('MAXLMLA = 3.0\n') fh.write('BUILDLATSENTEND = T\n') fh.write('FORCELATOUT = F\n') fh.write('STARTWORD = <s>\n') fh.write('ENDWORD = </s>\n') fh.close() ## HDecode parameters utts_per_split = 100 block_size = 5 beam = 150.0 word_end_beam = 125.0 max_model = 10000 lm_scale = 15.0 word_insertion_penalty = 0.0 def hdecode(input, output): cmd = 'HDecode -A -D -V -T 9 -o M -z lat -C %s' % hdecode_config cmd += ' -H %s/MMF' % model_dir cmd += ' -k %d' % block_size cmd += ' -t %f 100.0' % beam cmd += ' -v %f 115.0' % word_end_beam cmd += ' -u %d' % max_model cmd += ' -s %f' % lm_scale cmd += ' -p %f' % word_insertion_penalty cmd += ' -w %s' % lm cmd += ' -S %s' % input cmd += ' -l %s/' % output cmd += ' %s %s' % (dict, model_list) if model.verbose > 0: cmd += ' >%s/%s.log' % (output_dir, os.path.basename(input)) return cmd ## Split up MFC list split_mfc = SplitList(output_dir, mfc_list, by_path=True) ## Create the HDecode commands cmds = [] inputs = split_mfc.get_files() for input in inputs: output = '%s/%s' % (output_dir, split_mfc.get_key(input)) if not os.path.isdir(output): os.makedirs(output) cmds.append(hdecode(input, output)) if model.local == 1: for cmd in cmds: print cmd print os.popen(cmd).read() else: cmds_file = '%s/hdecode.commands' % output_dir fh = open(cmds_file, 'w') for cmd in cmds: fh.write('%s\n' % cmd) fh.close() util.run_parallel(cmds_file, model.jobs, output_dir) ## Copy old mfc list old_mfc_list = '%s/mfc_old.list' % output_dir os.system('cp %s %s' % (mfc_list, old_mfc_list)) ## Prune bad lats from the mfc list lat_ids = [ os.path.basename(f).split('.')[0] for f in util.get_files(output_dir, r'.*\.lat') ] bad_count = 0 fh = open(mfc_list, 'w') for mfc in open(old_mfc_list): id = os.path.basename(mfc.strip()).split('.')[0] ## Check for missing transcriptions if id not in lat_ids: if model.verbose > 1: util.log_write(model.logfh, 'removed bad lat [%s]' % id) bad_count += 1 else: fh.write(mfc) fh.close() util.log_write(model.logfh, 'removed bad lats [%d]' % bad_count) ## Create an MLF from the recognition output outputs = util.get_files(output_dir, r'.*\.rec') os.popen('rm -f %s' % output_mlf) fh = open(output_mlf, 'w') fh.write('#!MLF!#\n') for output in outputs: fh.write('"%s"\n' % output) for line in open(output): if '<s>' in line or '</s>' in line: continue fh.write(line) fh.write('.\n') fh.close() ## Evaluate cmd = 'HResults -h -n -A -T 1' cmd += ' -I %s' % gold_mlf cmd += ' %s %s > %s' % (model_list, output_mlf, results_log) os.system(cmd) print os.popen('cat ' + results_log).read()
def swboard(path_wav, path_trans, config, output, wav_list=[]): """ <wav id> <channelId> <speakerId> <begin time segment> <end time segment> <label> text fsh_60262 1 fsh_60262_A 47.9 49.77 <O,FI,M,STANDARD> I DO IT IS HALLOWEEN """ ## wav files #wav_files = set(util.get_files(path_wav, re.compile('.*\.wav', re.IGNORECASE))) #print 'found wav files [%d]' %len(wav_files) ## load speaker map speaker_by_conv = {} convs_by_speaker = {} for line in open('common/swb_speaker.map'): [conv, speaker] = line.split() speaker_by_conv[conv] = speaker if not speaker in convs_by_speaker: convs_by_speaker[speaker] = [] convs_by_speaker[speaker].append(conv) sorted_speakers = convs_by_speaker.items() sorted_speakers.sort(lambda x,y: len(y[1]) - len(x[1])) ## split convs into sets train1, train2, test = set(), set(), set() history = [1] count1, count2 = 0, 0 for speaker, convs in sorted_speakers: print speaker, convs if len(convs) == 1: test.add(convs[0]) else: if history[-2:] == [1,1] or history[-2:] == [1,2]: train2.update(set(convs)) history.append(2) count2 += 1 else: train1.update(set(convs)) history.append(1) count1 += 1 #print len(train1), len(train2), len(test) #print count1, count2 #sys.exit() ## transcription files trans_files = util.get_files(path_trans, re.compile('.*-trans\.text', re.IGNORECASE)) print 'found transcription files [%d]' %len(trans_files) output1 = output + '_1' output2 = output + '_2' output3 = output + '_3' fh1, fh2, fh3 = open(output1, 'w'), open(output2, 'w'), open(output3, 'w') stm1, stm2, stm3 = open(output1 + '.stm', 'w'), open(output2 + '.stm', 'w'), open(output3 + '.stm', 'w') utt_count = 0 file_count = 0 token_count = 0 word_list = set() for file in trans_files: file_count += 1 if file_count % 100 == 0: print 'transcription files processed [%d]' %file_count dirname = os.path.dirname(file) for line in open(file): items = line.strip().split() id = items[0] [id, ms98, a, utt] = id.split('-') side = id[-1] id = id[:-1] start, end = items[1], items[2] words = map(str.upper, items[3:]) dir = '%s_%s' %(id, side) new_id = '%s%s-ms98-a-%s' %(id, side, utt) wav_file = path_wav + '%s/%s.wav' %(dir, new_id) if not os.path.isfile(wav_file): continue if sum([int(w.startswith('[') or w.endswith(']')) for w in words]) == len(words): continue if len(words) == 0: continue trans = fix_swb_trans(words) if len(trans) == 0: continue if trans.isdigit(): continue conv = (id + '_' + side).replace('sw', '') if conv in train1: fh = fh1 stm = stm1 elif conv in train2: fh = fh2 stm = stm2 elif conv in test: fh = fh3 stm = stm3 else: print 'No set exists for conv: %s' %conv print trans continue fh.write('%s %s %s\n' %(wav_file, config, trans)) stm.write('%s %s %s%s %s %s %s\n' %(new_id, side, id, side, start, end, trans)) utt_count += 1 for word in words: word_list.add(word) token_count += len(words) fh.close() stm.close() print 'Using [%s] wrote [%d] utts to [%s]' %(path_trans, utt_count, output) print 'Tokens [%d] Types [%d]' %(token_count, len(word_list))
text = text + "{:.40f}".format(y).rstrip('0').rstrip('.') + ',' text = text + '\n' return text def addLayer(model): x = model.output x = Dense(1024)(x) x = GlobalMaxPooling2D(name='gmaxpool')(x) model = Model(model.input, x) return model # create models if __name__ == '__main__': files = get_files(filesdir) start = timeit.default_timer() models = dict() print('setting up models...') model = appl.xception.Xception(weights='imagenet', include_top=False) model = addLayer(model) models['xception'] = model model = appl.vgg16.VGG16(weights='imagenet', include_top=False) model = addLayer(model) models['vgg16'] = model model = appl.vgg19.VGG19(weights='imagenet', include_top=False) model = addLayer(model) models['vgg19'] = model model = appl.resnet50.ResNet50(weights='imagenet', include_top=False) model = addLayer(model) models['resnet50'] = model
def build(PROBLEM, PATHS): print("") print("********************************************************************************") print("") print(" BHLIGHT BUILD SCRIPT") print("") print(" OPTIONS:") print(" -help (print this message and exit)") print(" -debug (use debug compile params)") print(" -force (do not abort upon compile-time warnings/errors)") print(" -noclean (do not delete old source files)") print(" -noparam (do not create new parameter file)") print(" -dir /path/to/target (create target dir and build there)") print("") print("********************************************************************************") print("") if is_user_help(): sys.exit() # PROCESS USER INPUT DEBUG = is_user_debug() FORCE = is_user_force() MOVEEXEC = set_dirs(PATHS) NOPARAM = is_user_noparam() NOCLEAN = is_user_noclean() CLEAN = not NOCLEAN WRITE_PARAM = not NOPARAM # get version VERSION = get_version() # PRINT TO TERMINAL AND LOGFILE LOGFILE = os.path.join(PATHS['BUILD'], 'log_build') util.log_output(sys, LOGFILE) # SEARCH FOR MACHINE machines = util.get_files(PATHS['MACHINE'], '*') for n in range(len(machines)): machines[n] = machines[n].split('/')[-1].replace('.py', '') machine = __import__(machines[n]) if machine.matches_host() == True: break del machine try: machine except NameError: util.warn("HOST " + os.uname()[1] + " UNKNOWN"); sys.exit() host = machine.get_options() if DEBUG and 'DEBUG_FLAGS' not in host.keys(): util.warn("Debug compiler options not set! Using normal compiler flags.") host['DEBUG_FLAGS'] = host['COMPILER_FLAGS'] C_FLAGS = '-std=c99 -mcmodel=medium ' if DEBUG: C_FLAGS += host['DEBUG_FLAGS'] else: C_FLAGS += host['COMPILER_FLAGS'] # MATH AND DYNAMIC LINKING LIB_FLAGS = '-lm -ldl' LIBRARIES = '' INCLUDES = '' # GSL host['GSL_DIR'] = util.sanitize_path(host['GSL_DIR']) LIB_FLAGS += (' -lgsl -lgslcblas' + ' -Wl,-rpath=' + host['GSL_DIR'] + 'lib/') LIBRARIES += '-L' + host['GSL_DIR'] + 'lib/' INCLUDES += '-I' + host['GSL_DIR'] + 'include/' # MPI if 'MPI_DIR' in host: host['MPI_DIR'] = util.sanitize_path(host['MPI_DIR']) LIB_FLAGS += (' -Wl,-rpath=' + host['MPI_DIR'] + 'lib/') LIBRARIES += ' -L' + host['MPI_DIR'] + 'lib/' INCLUDES += ' -I' + host['MPI_DIR'] + 'include/' # HDF5 if 'HDF5_DIR' in host: host['HDF5_DIR'] = util.sanitize_path(host['HDF5_DIR']) LIB_FLAGS += (' -lhdf5_hl -lhdf5' +' -Wl,-rpath=' + host['HDF5_DIR'] + 'lib/') LIBRARIES += ' -L' + host['HDF5_DIR'] + 'lib/' INCLUDES += ' -I' + host['HDF5_DIR'] + 'include/' print(" CONFIGURATION\n") def print_config(key, var): print(" " + util.color.BOLD + "{:<15}".format(key) + util.color.NORMAL + str(var)) set_cparm("VERSION", '"{}"'.format(VERSION)) set_cparm("PROBLEM_NAME", '"{}"'.format(PROBLEM)) print_config("VERSION", VERSION) print_config("MACHINE", host['NAME']) print_config("PROBLEM", PROBLEM) print_config("BUILD DIR", PATHS['BUILD']) print_config("COMPILER", host['COMPILER']) print_config("GSL_DIR", host['GSL_DIR']) if 'MPI_DIR' in host: print_config("MPI_DIR", host['MPI_DIR']) if 'HDF5_DIR' in host: print_config("HDF5_DIR", host['HDF5_DIR']) print_config("EXECUTABLE", host['EXECUTABLE']) print_config("C_FLAGS", C_FLAGS) print_config("LIB_FLAGS", LIB_FLAGS) print_config("LIBRARIES", LIBRARIES) print_config("INCLUDES", INCLUDES) print_config("OPENMP", CPARMS['OPENMP']) print("\n COMPILE-TIME PARAMETERS\n") print_config("N1TOT", CPARMS['N1TOT']) print_config("N2TOT", CPARMS['N2TOT']) print_config("N3TOT", CPARMS['N3TOT']) print_config("N1CPU", CPARMS['N1CPU']) print_config("N2CPU", CPARMS['N2CPU']) print_config("N3CPU", CPARMS['N3CPU']) print_config("METRIC", CPARMS['METRIC']) print_config("RECONSTRUCTION", CPARMS['RECONSTRUCTION']) if util.parm_is_active(CPARMS, 'RADIATION'): print_config("RADIATION", CPARMS['RADIATION']) if util.parm_is_active(CPARMS, 'NTH'): print_config("NTH", CPARMS["NTH"]) else: set_cparm("NTH", 8) if util.parm_is_active(CPARMS, 'NPHI'): print_config("NPHI", CPARMS["NPHI"]) else: set_cparm("NPHI", 8) if util.parm_is_active(CPARMS, "NU_BINS_EMISS"): print_config("NU_BINS_EMISS", CPARMS["NU_BINS_EMISS"]) else: set_cparm("NU_BINS_EMISS", 200) if util.parm_is_active(CPARMS, "NU_BINS_SPEC"): print_config("NU_BINS_SPEC", CPARMS["NU_BINS_SPEC"]) else: set_cparm("NU_BINS_SPEC", 200) if util.parm_is_active(CPARMS, "SUPPRESS_FLR_RADIATION"): print_config("SUPPRESS_FLR_RADIATION", CPARMS["SUPPRESS_FLR_RADIATION"]) else: set_cparm("SUPPRESS_FLR_RADIATION", 0) else: set_cparm("RADIATION", 0) if util.parm_is_active(CPARMS, 'ELECTRONS'): print_config("ELECTRONS", CPARMS['ELECTRONS']) else: set_cparm("ELECTRONS", 0) if util.parm_is_active(CPARMS, 'FLOORADV'): print_config("FLOORADV", CPARMS['FLOORADV']) else: set_cparm("FLOORADV", 0) #if util.parm_is_active(CPARMS,'NVAR_PASSIVE'): # print_config("NVAR_PASSIVE", CPARMS["NVAR_PASSIVE"]) #else: # set_cparm("NVAR_PASSIVE", 0) #if util.parm_is_active(CPARMS, 'OUTPUT_EOSVARS'): # print_config("OUTPUT_EOSVARS", CPARMS["OUTPUT_EOSVARS"]) #else: # set_cparm("OUTPUT_EOSVARS", 0) # Set core runtime parameters set_rparm('tf', 'double') set_rparm('dt', 'double') if CPARMS['METRIC'] == 'MINKOWSKI': set_rparm('x1Min', 'double', default = 0.) set_rparm('x1Max', 'double', default = 1.) set_rparm('x2Min', 'double', default = 0.) set_rparm('x2Max', 'double', default = 1.) set_rparm('x3Min', 'double', default = 0.) set_rparm('x3Max', 'double', default = 1.) if CPARMS['METRIC'] == 'MKS': set_rparm('a', 'double', default = 0.5) set_rparm('hslope', 'double', default = 0.3) set_rparm('Rout', 'double', default = 40.) set_rparm('Rout_vis', 'double', default = 40.) if CPARMS['METRIC'] == 'MMKS': set_rparm('a', 'double', default = 0.5) set_rparm('hslope', 'double', default = 0.3) set_rparm('poly_xt', 'double', default = 0.82) set_rparm('poly_alpha', 'double', default = 14.) set_rparm('mks_smooth', 'double', default = 0.5) set_rparm('Rout', 'double', default = 40.) set_rparm('Rout_vis', 'double', default = 40.) if util.parm_is_active(CPARMS, 'RADIATION'): set_rparm('Rout_rad', 'double', default = 40.) if (util.parm_is_active(CPARMS, 'RADIATION') or util.parm_is_active(CPARMS, 'COULOMB')): if CPARMS['METRIC'] == 'MINKOWSKI': set_rparm('L_unit', 'double') set_rparm('M_unit', 'double') if CPARMS['METRIC'] == 'MKS' or CPARMS['METRIC'] == 'MMKS': set_rparm('M_unit', 'double') set_rparm('mbh', 'double', default = 1.989e34) set_rparm('gam', 'double', default = 5./3.) set_rparm('cour', 'double', default = 0.9) if util.parm_is_active(CPARMS, 'ELECTRONS'): set_rparm('game', 'double', default = 4./3.) set_rparm('gamp', 'double', default = 5./3.) set_rparm('fel0', 'double', default = 0.01) set_rparm('tptemin', 'double', default = 1.e-3) set_rparm('tptemax', 'double', default = 1.e3) if util.parm_is_active(CPARMS, 'RADIATION'): if not util.parm_is_active(CPARMS, 'ELECTRONS'): set_rparm('tp_over_te', 'double', default = 1.) set_rparm('nph_per_proc', 'double', default = 1.e5) set_rparm('numin_emiss', 'double', default = 1.e8) set_rparm('numax_emiss', 'double', default = 1.e20) set_rparm('numin_spec', 'double', default = 1.e8) set_rparm('numax_spec', 'double', default = 1.e20) set_rparm('tune_emiss', 'double', default = 1.) set_rparm('tune_scatt', 'double', default = 1.) set_rparm('t0_tune_emiss', 'double', default = -1.) set_rparm('t0_tune_scatt', 'double', default = -1.) set_rparm('thetae_max', 'double', default = 1.e3) set_rparm('sigma_max', 'double', default = 1.) set_rparm('kdotk_tol', 'double', default = 1.e-6) set_rparm('Nph_to_track', 'double', default = 0.); set_rparm('init_from_grmhd', 'string', default = 'No') set_rparm('DTd', 'double', default = 0.5) set_rparm('DTl', 'double', default = 0.1) set_rparm('DTr', 'double', default = 1000.) set_rparm('DNr', 'integer', default = 1024) set_rparm('DTp', 'integer', default = 100) set_rparm('DTf', 'integer', default = 1) set_rparm('outputdir', 'string', default = './') # GET ALL SOURCE FILES SRC_CORE = util.get_files(PATHS['CORE'], '*.c') INC_CORE = util.get_files(PATHS['CORE'], '*.h') SRC_PROB = util.get_files(PATHS['PROB'], '*.c') INC_PROB = util.get_files(PATHS['PROB'], '*.h') # Clean if necessary if CLEAN: util.make_clean(PATHS['SRC']) # COPY SOURCE FILES TO BUILD_DIR for src in SRC_CORE: call(['cp', src, PATHS['SRC'] + src.rsplit('/',1)[1]]) for inc in INC_CORE: call(['cp', inc, PATHS['SRC'] + inc.rsplit('/',1)[1]]) for src in SRC_PROB: call(['cp', src, PATHS['SRC'] + src.rsplit('/',1)[1]]) for inc in INC_PROB: call(['cp', inc, PATHS['SRC'] + inc.rsplit('/',1)[1]]) # WRITE PARAMETERS FILE pf = open(PATHS['SRC'] + 'params.h', 'w') for KEY in CPARMS: if isinstance(CPARMS[KEY], str): pf.write("#define " + KEY + " (" + CPARMS[KEY] + ")\n") else: pf.write("#define " + KEY + " (%g)\n" % CPARMS[KEY]) pf.close() # GET SINGLE LISTS OF ALL SOURCE, OBJECT, AND HEADER FILES SRC_ALL = util.get_files(PATHS['SRC'], '*.c') INC_ALL = util.get_files(PATHS['SRC'], '*.h') SRC = '' OBJ = '' INC = '' for n in range(len(SRC_ALL)): SRC += '%s ' % os.path.basename(SRC_ALL[n]) OBJ += '%s.o ' % os.path.basename(SRC_ALL[n])[:-2] for n in range(len(INC_ALL)): INC += '%s ' % os.path.basename(INC_ALL[n]) # WRITE MAKEFILE os.chdir(PATHS['SRC']) mf = open('makefile', 'w') mf.write('CC = ' + host['COMPILER'] + '\n') mf.write('CCFLAGS = ' + C_FLAGS + ' ' + LIBRARIES + ' ' + INCLUDES + '\n') mf.write('LIB_FLAGS = ' + LIB_FLAGS + '\n') mf.write('CC_COMPILE = $(CC) $(CCFLAGS) -c' + '\n') mf.write('CC_LOAD = $(CC) $(CCFLAGS)' + '\n') mf.write('.c.o:' + '\n') mf.write('\t$(CC_COMPILE) $*.c' + '\n') mf.write('EXE = bhlight' + '\n') mf.write('all: $(EXE)' + '\n') mf.write('SRC = ' + SRC + '\n') mf.write('OBJ = ' + OBJ + '\n') mf.write('INC = ' + INC + '\n') mf.write('$(OBJ): $(INC) makefile' + '\n') mf.write('$(EXE): $(OBJ) $(INC) makefile' + '\n') mf.write('\t$(CC_LOAD) $(OBJ) $(LIB_FLAGS) -o $(EXE)\n') mf.write('clean:\n') mf.write('\t$(RM) $(SRC) $(OBJ) $(EXE) $(INC)\n') mf.close() print("\n COMPILING SOURCE\n") ncomp = 0 first_error = 1 if DEBUG: popen = subprocess.Popen(['make'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) else: popen = subprocess.Popen(['make','-j','10'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) for stdout_line in iter(popen.stdout.readline, ""): if stdout_line.rstrip()[-2:] == '.c': print(" [" + util.color.BOLD + util.color.BLUE + "%2d%%" % (100.*float(ncomp)/len(SRC_ALL)) + util.color.NORMAL + "] " + util.color.BOLD + stdout_line.rsplit('-c',1)[1].rstrip().lstrip().split('/')[-1] + util.color.NORMAL) ncomp += 1 for stderr_line in iter(popen.stderr.readline, ""): # THIS ALSO FAILS FOR WARNINGS!!! if first_error == 1: util.warn("COMPILER ERROR") first_error = 0 print(stderr_line.rstrip()) if first_error != 1 and not FORCE: util.warn("COMPILATION FAILED") sys.exit() obj_files = util.get_files(PATHS['SRC'], '*.o') for f in obj_files: os.remove(f) os.rename(PATHS['SRC'] + 'bhlight', PATHS['BUILD'] + 'bhlight') print("\n BUILD SUCCESSFUL") # CREATE RUNTIME PARAMETERS FILE PARAMFILE = PATHS['BUILD'] + PARAM_NAME if WRITE_PARAM: with open(PARAMFILE, 'w') as pf: pf.write("### RUNTIME PARAMETERS ###\n") pf.write("\n# COORDINATES\n") write_rparm(pf, 'tf') write_rparm(pf, 'dt') if CPARMS['METRIC'] == 'MINKOWSKI': write_rparm(pf, 'x1Min') write_rparm(pf, 'x1Max') write_rparm(pf, 'x2Min') write_rparm(pf, 'x2Max') write_rparm(pf, 'x3Min') write_rparm(pf, 'x3Max') if CPARMS['METRIC'] == 'MKS' or CPARMS['METRIC'] == 'MMKS': write_rparm(pf, 'Rout') if util.parm_is_active(CPARMS, 'RADIATION'): write_rparm(pf, 'Rout_rad') if (util.parm_is_active(CPARMS, 'RADIATION') or util.parm_is_active(CPARMS, 'COULOMB')): pf.write("\n# UNITS\n") if CPARMS['METRIC'] == 'MINKOWSKI': write_rparm(pf, 'L_unit') write_rparm(pf, 'M_unit') if CPARMS['METRIC'] == 'MKS' or CPARMS['METRIC'] == 'MMKS': write_rparm(pf, 'mbh') write_rparm(pf, 'M_unit') pf.write("\n# FLUID\n") write_rparm(pf, 'gam') write_rparm(pf, 'cour') if util.parm_is_active(CPARMS, 'ELECTRONS'): pf.write("\n# ELECTRONS\n") write_rparm(pf, 'game') write_rparm(pf, 'gamp') write_rparm(pf, 'fel0') write_rparm(pf, 'tptemin') write_rparm(pf, 'tptemax') if util.parm_is_active(CPARMS, 'RADIATION'): pf.write("\n# RADIATION\n") if not util.parm_is_active(CPARMS, 'ELECTRONS'): write_rparm(pf, 'tp_over_te') write_rparm(pf, 'nph_per_proc') write_rparm(pf, 'numin_emiss') write_rparm(pf, 'numax_emiss') write_rparm(pf, 'numin_spec') write_rparm(pf, 'numax_spec') write_rparm(pf, 'tune_emiss') write_rparm(pf, 'tune_scatt') write_rparm(pf, 't0_tune_emiss') write_rparm(pf, 't0_tune_scatt') write_rparm(pf, 'thetae_max') write_rparm(pf, 'sigma_max') write_rparm(pf, 'kdotk_tol') write_rparm(pf, 'Nph_to_track') pf.write("\n# OUTPUT\n") write_rparm(pf, 'DTd') write_rparm(pf, 'DTl') write_rparm(pf, 'DTr') write_rparm(pf, 'DNr') write_rparm(pf, 'DTp') write_rparm(pf, 'DTf') write_rparm(pf, 'outputdir') if len(RPARMS.keys()) > 0: pf.write("\n# PROBLEM\n") prob_keys = RPARMS.keys() for key in list(prob_keys): write_rparm(pf, key) print("\n RUNTIME PARAMETER FILE CREATED") if MOVEEXEC: os.rename(PATHS['BUILD'] + 'bhlight', PATHS['BUILD'] + '../bhlight') if WRITE_PARAM: os.rename(PATHS['BUILD'] + PARAM_NAME, PATHS['BUILD'] + '../' + PARAM_NAME) print("") sys.exit()
import tensorflow as tf from preprocess_images.Image import Image from model.style_transform import StyleTransform, get_style_feature import params as P import time import util import os style_name_list = ['la_muse', 'wave_crop', 'mirror', 'starry_night', 'udnie'] style_name = 'wave_crop' style_p = P.st_style_image_path_dict.get(style_name, 'resource/style/mirror.jpg') style_image = Image(style_p) style_image.image_resize(P.width, P.high) style_image.extend_dim() trains_files = util.get_files(P.st_train_path) model_saved_path = P.st_model_saved_dir + style_name + '/' style_features = get_style_feature(style_image, '/cpu:0', style_name) with tf.Session() as session: writer = tf.summary.FileWriter(P.st_logs + style_name + "_{}".format(time.time())) model = StyleTransform(style_features, P.st_batch_size, width=P.width, height=P.high, content_weight=P.st_content_weight, style_weight=P.st_style_weight, tv_weight=P.st_tv_weight) model.create_network()
TASK_NAME = 'COVID Lung Ultrasound' TRAINING_TYPE = 'federated' # Currently we take the first `NUM_IMAGES` in the folder for each peer. We should make a more complex distribution. NUM_IMAGES = 10 # paths to folders containing covid positive and coivd negative patients POSITIVE_CLASS_PATH = r'covid-positive' NEGATIVE_CLASS_PATH = r'covid-negative' start_time = time.time() op = webdriver.ChromeOptions() op.add_argument('headless') drivers = [webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS)] # drivers = [webdriver.Chrome(ChromeDriverManager().install(), opt) for i in range(NUM_PEERS)] positive_files = get_files(POSITIVE_CLASS_PATH, NUM_IMAGES, '.png') negative_files = get_files(NEGATIVE_CLASS_PATH, NUM_IMAGES, '.png') if DATA_SPLIT == 'partition': pos_partitions = partition(positive_files, NUM_PEERS) neg_partitions = partition(negative_files, NUM_PEERS) elif DATA_SPLIT == 'rpartition': pos_partitions = r_partition(positive_files, NUM_PEERS) neg_partitions = r_partition(negative_files, NUM_PEERS) elif DATA_SPLIT == 'spartition': pos_partitions = s_partition(positive_files, RATIOS) neg_partitions = s_partition(negative_files, RATIOS) for index, driver in enumerate(drivers): # Click 'Start Building' on home page find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE)
# paths to the file containing the CSV file of Titanic passengers with 12 columns IMAGE_FILE_PATH = r'CIFAR10' LABEL_FILE_PATH = 'labels.csv' NUM_IMAGES = 10 # Download and extract chromedriver from here: https://sites.google.com/a/chromium.org/chromedriver/downloads op = webdriver.ChromeOptions() op.add_argument('headless') # You can add options=op for chrome headless mode # drivers = [webdriver.Chrome(ChromeDriverManager().install(), options=op) for i in range(NUM_PEERS)] drivers = [webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS)] start_time = time.time() if DATA_SPLIT == 'partition': partitions = partition(get_files(IMAGE_FILE_PATH, NUM_IMAGES, '.png'), NUM_PEERS) elif DATA_SPLIT == 'spartition': partitions = s_partition(get_files(IMAGE_FILE_PATH, NUM_IMAGES, '.png'), RATIOS) elif DATA_SPLIT == 'rpartition': partitions = r_partition(get_files(IMAGE_FILE_PATH, NUM_IMAGES, '.png'), NUM_PEERS) for index, driver in enumerate(drivers): find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE) # Upload files on Task Training time.sleep(6) if DATA_SPLIT != 'iid': driver.find_element_by_id('hidden-input_cifar10_Images').send_keys(' \n '.join(partitions[index])) driver.find_element_by_id('hidden-input_cifar10_Labels').send_keys(os.path.abspath(LABEL_FILE_PATH))
def decode_to_lattices(model, output_dir, model_dir, mfc_list, lm, dict, model_list, gold_mlf): sys.stderr.write('Decoding to lattices\n') output_mlf = '%s/train_recog.mlf' %output_dir results_log = '%s/results.log' %output_dir ## Create a config file to use with HDecode hdecode_config = '%s/hdecode.config' %output_dir fh = open(hdecode_config, 'w') #fh.write('HLANGMODFILTER = "gunzip -c $.gz"\n') fh.write('HNETFILTER = "gunzip -c < $.gz"\n') fh.write('HNETOFILTER = "gzip -c > $.gz"\n') fh.write('RAWMITFORMAT = T\n') fh.write('HPARM: TARGETKIND = MFCC_0_D_A_Z\n') fh.write('GCFREQ = 50\n') fh.write('HLAT:TRACE = 19\n') fh.write('HLVNET:TRACE = 1\n') fh.write('HLVREC:TRACE = 1\n') fh.write('HLVLM:TRACE = 1\n') fh.write('LATPRUNEBEAM = 500.0\n') fh.write('MAXLMLA = 3.0\n') fh.write('BUILDLATSENTEND = T\n') fh.write('FORCELATOUT = F\n') fh.write('STARTWORD = <s>\n') fh.write('ENDWORD = </s>\n') fh.close() ## HDecode parameters utts_per_split = 100 block_size = 5 beam = 150.0 word_end_beam = 125.0 max_model = 10000 lm_scale = 15.0 word_insertion_penalty = 0.0 def hdecode(input, output): cmd = 'HDecode -A -D -V -T 9 -o M -z lat -C %s' %hdecode_config cmd += ' -H %s/MMF' %model_dir cmd += ' -k %d' %block_size cmd += ' -t %f 100.0' %beam cmd += ' -v %f 115.0' %word_end_beam cmd += ' -u %d' %max_model cmd += ' -s %f' %lm_scale cmd += ' -p %f' %word_insertion_penalty cmd += ' -w %s' %lm cmd += ' -S %s' %input cmd += ' -l %s/' %output cmd += ' %s %s' %(dict, model_list) if model.verbose > 0: cmd += ' >%s/%s.log' %(output_dir, os.path.basename(input)) return cmd ## Split up MFC list split_mfc = SplitList(output_dir, mfc_list, by_path=True) ## Create the HDecode commands cmds = [] inputs = split_mfc.get_files() for input in inputs: output = '%s/%s' %(output_dir, split_mfc.get_key(input)) if not os.path.isdir(output): os.makedirs(output) cmds.append(hdecode(input, output)) if model.local == 1: for cmd in cmds: print cmd print os.popen(cmd).read() else: cmds_file = '%s/hdecode.commands' %output_dir fh = open(cmds_file, 'w') for cmd in cmds: fh.write('%s\n' %cmd) fh.close() util.run_parallel(cmds_file, model.jobs, output_dir) ## Copy old mfc list old_mfc_list = '%s/mfc_old.list' %output_dir os.system('cp %s %s' %(mfc_list, old_mfc_list)) ## Prune bad lats from the mfc list lat_ids = [os.path.basename(f).split('.')[0] for f in util.get_files(output_dir, r'.*\.lat')] bad_count = 0 fh = open(mfc_list, 'w') for mfc in open(old_mfc_list): id = os.path.basename(mfc.strip()).split('.')[0] ## Check for missing transcriptions if id not in lat_ids: if model.verbose > 1: util.log_write(model.logfh, 'removed bad lat [%s]' %id) bad_count += 1 else: fh.write(mfc) fh.close() util.log_write(model.logfh, 'removed bad lats [%d]' %bad_count) ## Create an MLF from the recognition output outputs = util.get_files(output_dir, r'.*\.rec') os.popen('rm -f %s' %output_mlf) fh = open(output_mlf, 'w') fh.write('#!MLF!#\n') for output in outputs: fh.write('"%s"\n' %output) for line in open(output): if '<s>' in line or '</s>' in line: continue fh.write(line) fh.write('.\n') fh.close() ## Evaluate cmd = 'HResults -h -n -A -T 1' cmd += ' -I %s' %gold_mlf cmd += ' %s %s > %s' %(model_list, output_mlf, results_log) os.system(cmd) print os.popen('cat ' + results_log).read()
#coding:utf8 from ConfigParser import ConfigParser import pymongo from util import get_files __author__ = 'cy' cf = ConfigParser() cf.read('dev.cfg') city_num = int(cf.get('calculate', 'city_num')) predict_hour = int(cf.get('calculate', 'predict_hour')) file_path = cf.get('file', 'filepath') start_time = cf.get('file', 'start_time') end_time = cf.get('file', 'end_time') gaps = eval(cf.get('calculate', 'gaps')) all_files = (get_files(start_time, end_time)) day_gaps = eval(cf.get('calculate', 'day_gaps')) mongo_host = cf.get('mongodb', 'host') mongo_port = int(cf.get('mongodb', 'port')) mongo_db_name = cf.get('mongodb', 'db_name') client = pymongo.MongoClient(mongo_host, mongo_port) db = client.get_database(mongo_db_name) bins = eval(cf.get('calculate', 'bins')) collection_name = cf.get('mongodb', 'collection')
def phonemark_lattices(model, lattice_dir, output_dir, model_dir, mfc_list, lm, dict, model_list): sys.stderr.write('Phonemarking lattices\n') ## Create a config file to use with HDecode hdecode_config = '%s/hdecode.config' %output_dir fh = open(hdecode_config, 'w') #fh.write('HLANGMODFILTER = "gunzip -c $.gz"\n') fh.write('HNETFILTER = "gunzip -c < $.gz"\n') fh.write('HNETOFILTER = "gzip -c > $.gz"\n') fh.write('RAWMITFORMAT = T\n') fh.write('HPARM: TARGETKIND = MFCC_0_D_A_Z\n') fh.write('GCFREQ = 50\n') fh.write('HLAT:TRACE = 19\n') fh.write('HLVNET:TRACE = 1\n') fh.write('HLVREC:TRACE = 1\n') fh.write('HLVLM:TRACE = 1\n') fh.write('LATPRUNEBEAM = 500.0\n') fh.write('MAXLMLA = 3.0\n') fh.write('BUILDLATSENTEND = T\n') fh.write('FORCELATOUT = F\n') fh.write('STARTWORD = <s>\n') fh.write('ENDWORD = </s>\n') fh.close() ## HDecode parameters utts_per_split = 100 block_size = 5 beam = 200.0 lm_scale = 15.0 word_insertion_penalty = 0.0 def hdecode_mod(input, path): input_dir = '%s/%s/' %(lattice_dir, path) if not os.path.isdir(input_dir): input_dir = '%s/%s/' %(lattice_dir, path.replace('_', '')) cmd = 'HDecode.mod -A -D -V -T 9 -q tvaldm -z lat -X lat -C %s' %hdecode_config cmd += ' -H %s/MMF' %model_dir cmd += ' -k %d' %block_size cmd += ' -t %f' %beam cmd += ' -s %f' %lm_scale cmd += ' -p %f' %word_insertion_penalty cmd += ' -w' # %s' %lm cmd += ' -S %s' %input cmd += ' -l %s/%s/' %(output_dir, path) cmd += ' -L %s' %input_dir cmd += ' %s %s' %(dict, model_list) if model.verbose > 0: cmd += ' >%s/%s.log' %(output_dir, os.path.basename(input)) return cmd ## Split up MFC list with unix split split_mfc = SplitList(output_dir, mfc_list, by_path=True) ## Create the HDecode commands cmds = [] inputs = split_mfc.get_files() for input in inputs: key = split_mfc.get_key(input) new_output = '%s/%s' %(output_dir, key) if not os.path.isdir(new_output): os.makedirs(new_output) cmds.append(hdecode_mod(input, key)) if model.local == 1: for cmd in cmds: print cmd print os.popen(cmd).read() else: cmds_file = '%s/hdecode_mod.commands' %output_dir fh = open(cmds_file, 'w') for cmd in cmds: fh.write('%s\n' %cmd) fh.close() util.run_parallel(cmds_file, model.jobs, output_dir) ## Copy old mfc list old_mfc_list = '%s/mfc_old.list' %output_dir os.system('cp %s %s' %(mfc_list, old_mfc_list)) ## Prune bad lats from the mfc list lat_ids = [os.path.basename(f).split('.')[0] for f in util.get_files(output_dir, r'.*\.lat')] bad_count = 0 fh = open(mfc_list, 'w') for mfc in open(old_mfc_list): id = os.path.basename(mfc.strip()).split('.')[0] ## Check for missing transcriptions if id not in lat_ids: if model.verbose > 1: util.log_write(model.logfh, 'removed bad lat [%s]' %id) bad_count += 1 else: fh.write(mfc) fh.close() util.log_write(model.logfh, 'removed bad lats [%d]' %bad_count)
def dumpLogFile(): if config['log-format'] == 'normal': def dump(j,f): f.write(json.dumps(j,sort_keys=True,indent=1).encode()) elif config['log-format'] == 'tiny': def dump(j,f): f.write(json.dumps(j,sort_keys=True,separators=(',',':')).encode()) elif config['log-format'] == 'pretty': def dump(j,f): f.write(json.dumps(j,sort_keys=True,indent=4, separators=(',', ': ')).encode()) else: def dump(j,f): f.write(json.dumps(j,sort_keys=True,indent=1).encode()) messagebus.postMessage("system/notifications","Invalid config option for 'log-format' so defaulting to normal") if config['log-compress'] == 'bz2': openlog= bz2.BZ2File ext = '.json.bz2' elif config['log-compress'] == 'gzip': openlog = gzip.GzipFile ext = '.json.gz' elif config['log-compress'] == 'none': openlog = open ext = '.json' else: openlog = open messagebus.postMessage("system/notifications","Invalid config option for 'log-compress' so defaulting to no compression") global log,loglistchanged global approxtotallogentries with savelock: temp = log log = defaultdict(list) approxtotallogentries = 0 if loglistchanged: #Save the list of things to dump with open(os.path.join(directories.logdir,"whattosave.txt"),'w') as f: for i in toSave: f.write(i+'\n') loglistchanged = False #Get rid of anything that is not in the list of things to dump to the log temp2 = {} saveset = set(toSave) for i in temp: #Parsetopic is a function that returns all subscriptions that would match a topic if not set(messagebus.MessageBus.parseTopic(i)).isdisjoint(saveset): temp2[i] = temp[i] temp = temp2 #If there is no log entries to save, don't dump an empty file. if not temp: return where =os.path.join(directories.logdir,'dumps') #Actually dump the log. with openlog(os.path.join(where,str(time.time())+ext),'wb') as f: print() dump(temp,f) f.close() asnumbers = {} for i in util.get_files(where): try: #Remove extensions if i.endswith(".json"): asnumbers[float(i[:-5])] = i elif i.endswith(".json.gz"): asnumbers[float(i[:-8])] = i elif i.endswith(".json.bz2"): asnumbers[float(i[:-9])] = i except ValueError: pass maxsize = unitsofmeasure.strToIntWithSIMultipliers(config['keep-log-files']) size = 0 #Loop over all the old log dumps and add up the sizes for i in util.get_files(where): size = size + os.path.getsize(os.path.join(where,i)) #Get rid of oldest log dumps until the total size is within the limit for i in sorted(asnumbers.keys()): if size <= maxsize: break size = size - os.path.getsize(os.path.join(where,i)) os.remove(os.path.join(where,asnumbers[i]))
NUM_IMAGES = 20 # Digit folder paths, change to \ for macOS DIGIT_CLASS_PATHS = [ r'0', r'1', r'2', r'3', r'4', r'5', r'6', r'7', r'8', r'9' ] start_time = time.time() op = webdriver.ChromeOptions() op.add_argument('headless') drivers = [ webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS) ] digit_files = [ get_files(DIGIT_CLASS_PATHS[i], NUM_IMAGES, '.jpg') for i in range(len(DIGIT_CLASS_PATHS)) ] if DATA_SPLIT == 'partition': digit_partitions = [ partition(digit_files[i], NUM_PEERS) for i in range(len(digit_files)) ] elif DATA_SPLIT == 'rpartition': digit_partitions = [ r_partition(digit_files[i], NUM_PEERS) for i in range(len(digit_files)) ] elif DATA_SPLIT == 'spartition': digit_partitions = [ s_partition(digit_files[i], RATIOS) for i in range(len(digit_files)) ]
def generate(): impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots" # args["ip"] compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites" # args["cp"] with open("tms2.json", "r") as tm: it = json.load(tm) goodUris = [] with open("gooduris_20160225.txt", "r") as read: for uri in map(lambda line: line.rstrip("\n"), read): goodUris.append(uri) color = {} # type: dict[str,cs] with open("temporalPairs.csv","r") as read: reader = csv.DictReader(read) for row in reader: color[row['site']] = cs(row) with open("compositeToComposite.csv","r") as read: reader = csv.DictReader(read) for row in reader: arsim = row['alsumRandomSim'] atsim = row['alsumTemporalSim'] color[row['site']].ctcRsim = arsim color[row['site']].ctcTsim = atsim with open("alSumVSrandom_onetoone.csv","r") as read: reader = csv.DictReader(read) for row in reader: arsim = row['average'] color[row['site']].otoRsim = arsim with open("alSumVStemporalInterval_onetoone.csv","r") as read: reader = csv.DictReader(read) for row in reader: arsim = row['average'] color[row['site']].otoTsim = arsim with open("wins.csv","r") as read: reader = csv.DictReader(read) for row in reader: if color.get(row['site'],None) is not None: color[row['site']].won['r'] = row['awr'] color[row['site']].won['ti'] = row['awt'] tms = it['tms'] timeMaps = {} # type: dict[str,TM] for s in tms: it = TM(s) timeMaps[it.getURIKey()] = it tmk = list(filter(lambda x: len(x) > 2, timeMaps.keys())) compisits = get_files(compath, lambda f: "allTheSame" not in f and check_if_goodURI(f, goodUris) and "interval" not in f) print(compisits) uniqueComposite = set() for c in compisits: uniqueComposite.add(gsite(c)) compisits = sorted(list(uniqueComposite)) # 640 641 # self.site, self.alSum, self.random, self.aVr, self.temporal, self.aVt, # self.tmNumMementos, self.tmTimeSpan, # self.tmNumM2k, self.tmTimeSpan2k, self.tmNumM05k, self.tmTimeSpan05k # self.won['Random'], # self.won['TemporalInterval'] with open("allTm2.csv","w+") as out: out.write("site,ah,mh,mdif,nmemento,timespan,nummtwo,twotimespan,numof,timespanof,aWP,moto,mtcr,method\n") for c in sorted(compisits): # print(c) for tmkey in filter(lambda x: len(x) > 2, tmk): if tmkey in c: print(timeMaps[tmkey].timeSpan(), timeMaps[tmkey].numMentos, timeMaps[tmkey].timeSpanAfter(2000), timeMaps[tmkey].numMementosAfter(2000), timeMaps[tmkey].timeSpanAfter(2005), timeMaps[tmkey].numMementosAfter(2005)) cc = color[tmkey] cc.setTMInfo(timeMaps[tmkey].timeSpan(), timeMaps[tmkey].numMentos, timeMaps[tmkey].timeSpanAfter(2000), timeMaps[tmkey].numMementosAfter(2000), timeMaps[tmkey].timeSpanAfter(2005), timeMaps[tmkey].numMementosAfter(2005)) out.write(cc.getRString()) out.write(cc.getTString()) print("______________________________")