def makeBins(region, chrom_size, width, step=None): bins = ordict() offsets = ordict() nbin = 0 if step is None: step = width elif step > width: raise ValueError('step size({}) larger than bin width({})'.format(step, width)) if region is None: for chrom in chrom_size: offsets[chrom] = nbin size = chrom_size[chrom] bin_ends = np.arange(width, size+0.1, step).astype(int) bin_starts = bin_ends - width n = len(bin_ends) if bin_ends[n-1] < size: bin_ends = np.append(bin_ends, size) bin_starts = np.append(bin_starts, bin_starts[n-1]+step) n += 1 bins[chrom] = np.vstack((bin_starts,bin_ends,bin_ends-bin_starts)).transpose() nbin += n else: chrom,start,end = parseRegion(region, chrom_size) offsets[chrom] = nbin bin_ends = np.arange(start+width, end+0.1, step).astype(int) bin_starts = bin_ends - width n = len(bin_ends) if bin_ends[n-1] < end: bin_ends = np.append(bin_ends, end) bin_starts = np.append(bin_starts, bin_starts[n-1]+step) n += 1 bins[chrom] = np.vstack((bin_starts,bin_ends,bin_ends-bin_starts)).transpose() nbin += n logging.info('{} bins made'.format(nbin)) return bins,offsets,nbin
def mapping_vector_name(mapping, name): #use orderdict #http://stackoverflow.com/questions/5629023/key-order-in-python-dicionaries if not isinstance(mapping, list): return name for key in ordict(mapping).keys(): if key in name: return ordict(mapping)[key] return name
def get_env_info(env): assert isinstance(env, Env) distri = env.getModelDistri() info = ordict() info['max'] = distri['max'] info['avg'] = distri['avg'] info['min'] = distri['min'] return info
def read_fai(filename): '''extract an OrderedDict of {chrom:size} from a genome.fa.fai''' chrom_size = ordict() with open(filename) as f: for line in f: chrom,size,offset,line_size,line_byte = line.rstrip().split() size = int(size) chrom_size[chrom] = size return chrom_size
def get_agent_info(env, agent): assert isinstance(env, Env) and isinstance(agent, Agent) info = ordict() info['value'] = env.getValue(agent.state_now) if agent.a_plan is None: info['plan_goal'] = None else: info['plan_goal'] = agent.a_plan.goal_value return info
def prepare(self, strings): counts = collections.Counter(self.tokenize(" ".join(strings))) tokens = list(reversed(sorted(counts.keys(), key=counts.__getitem__))) rare_tokens = set(it.takewhile(lambda token: counts[token] < 5, tokens)) common_tokens = set(tokens) - rare_tokens tokenmap = ordict( (token, code) for code, token in enumerate(common_tokens)) inverse_tokenmap = ordict( (code, token) for code, token in enumerate(common_tokens)) # map rare tokens to a common <UNK> code tokenmap.update((token, len(common_tokens)) for token in rare_tokens) inverse_tokenmap[len(common_tokens)] = "<UNK>" self.tokenmap = tokenmap self.inverse_tokenmap = inverse_tokenmap
def readChromLen(filename): '''extract an OrderedDict of {chrom:size} from a genome.fa.fai''' chrom_size = ordict() with open(filename) as f: for line in f: fields = line.rstrip().split() chrom = fields[0] size = int(fields[1]) chrom_size[chrom] = size return chrom_size
def make_bins(self): '''generate bins of a given resolution, with the constraint of region/chromosome sizes''' if self.regions is None: raise Exception('regions is None, this should not happen') bins = ordict() offsets = ordict() nbin = 0 for region in self.regions: reg = '{}:{}-{}'.format(*region) if self.resolution is not None: bin_starts = np.arange(region[1], region[2], self.resolution, dtype=int) bin_ends = np.append(bin_starts[1:], region[2]) else: bin_starts = np.array([region[1]], dtype=int) bin_ends = np.array([region[2]], dtype=int) offsets[reg] = nbin bins[reg] = np.vstack((bin_starts,bin_ends,bin_ends-bin_starts)).transpose() nbin += len(bins[reg]) self.bins = bins self.offsets = offsets self.nbin = nbin
def add_env_record(self, env, T, up_info=None): assert isinstance(env, Env) if up_info is None: info = Record.get_env_info(env) # logging.info(info) else: info = ordict() info['max'] = up_info['nkinfo']['max'] info['avg'] = up_info['nkinfo']['avg'] info['min'] = up_info['nkinfo']['min'] # logging.info(info) self.env.append((T, info)) return info
def main(): datasource = datasets.MscocoNP(H(data_dir="/Tmp/cooijmat/mscoco", hp=H(caption=H(token="character")))) tfrecord_dir = os.environ["MSCOCO_TFRECORD_DIR"] tf.gfile.MakeDirs(tfrecord_dir) tokenizers = ordict((token, datasource.get_tokenizer(token)) for token in "character word".split()) for token, tokenizer in tokenizers.items(): tokenmap_path = os.path.join(tfrecord_dir, "tokenmap_%s.pkl" % token) print "writing to", tokenmap_path pkl.dump(tokenizer.tokenmap, open(tokenmap_path, "wb")) print "done" def _to_sequence_example(image_path): identifier = os.path.splitext(os.path.basename(image_path))[0] caption_words = tokenizers["word"].process(datasource.get_caption_string(identifier)) caption_characters = tokenizers["character"].process(datasource.get_caption_string(identifier)) with tf.gfile.FastGFile(image_path, "rb") as f: jpeg = f.read() return tf.train.SequenceExample( context=tf.train.Features(feature={ "image/identifier": _bytes_feature(identifier), "image/data": _bytes_feature(jpeg), }), feature_lists=tf.train.FeatureLists(feature_list={ "image/caption_characters": _int64_feature_list(caption_characters), "image/caption_words": _int64_feature_list(caption_words), })) for fold in "train valid".split(): output_path = os.path.join(tfrecord_dir, fold + ".tfrecords") print "writing to", output_path writer = tf.python_io.TFRecordWriter(output_path) for filename in datasource.get_filenames(fold): example = _to_sequence_example(filename) writer.write(example.SerializeToString()) writer.close()
def dictionary_init(option): if option == "nodes": return {'cadejos-0': 0, 'cadejos-1': 0, 'cadejos-2': 0, 'cadejos-3': 0, 'cadejos-4': 0, 'zarate-0a': 0, 'zarate-0b': 0, 'zarate-0c': 0, 'zarate-0d': 0, 'zarate-1a': 0, 'zarate-1b': 0, 'zarate-1c': 0, 'zarate-1d': 0, 'zarate-2a': 0, 'zarate-2b': 0, 'zarate-2c': 0, 'zarate-2d': 0, 'zarate-3a': 0, 'zarate-3b': 0, 'zarate-3c': 0, 'zarate-3d': 0, 'zarate-4a': 0, 'zarate-4b': 0, 'zarate-4c': 0, 'zarate-4d': 0, 'tule-00': 0, 'tule-01': 0, 'tule-02': 0, 'tule-03': 0} elif option == "queues": return {'total': 0, 'cadejos': 0, 'zarate': 0, 'tule': 0, 'cpu-n4h24': 0, 'cpu-n3h72': 0, 'cpu-n5h24': 0, 'gpu-n2h24': 0, 'gpu-n1h72': 0, 'phi-debug': 0, 'phi-n2h72': 0, 'phi-n3h24': 0, 'phi-n5h24': 0, 'phi-n6h24': 0, 'phi-n1h72': 0, 'phi-n6h96': 0, 'phi-n18h72': 0, 'debug': 0, 'k40': 0, 'cpu-debug': 0, 'gpu-debug': 0} elif option == "days": d = ordict() d["Monday"] = 0 d["Tuesday"] = 0 d["Wednesday"] = 0 d["Thursday"] = 0 d["Friday"] = 0 d["Saturday"] = 0 d["Sunday"] = 0 return d elif option == "hours": return {'00:00 - 01:00': 0, '01:00 - 02:00': 0, '02:00 - 03:00': 0, '03:00 - 04:00': 0, '04:00 - 05:00': 0, '05:00 - 06:00': 0, '06:00 - 07:00': 0, '07:00 - 08:00': 0, '08:00 - 09:00': 0, '09:00 - 10:00': 0, '10:00 - 11:00': 0, '11:00 - 12:00': 0, '12:00 - 13:00': 0, '13:00 - 14:00': 0, '14:00 - 15:00': 0, '15:00 - 16:00': 0, '16:00 - 17:00': 0, '17:00 - 18:00': 0, '18:00 - 19:00': 0, '19:00 - 20:00': 0, '20:00 - 21:00': 0, '21:00 - 22:00': 0, '22:00 - 23:00': 0, '23:00 - 24:00': 0} else: return {}
def histogram(Y, cluster_sizes, link): ''' Plot histogram of cluster sizes and integrated standard deviation.''' from collections import OrderedDict as ordict from mpl_toolkits.axes_grid1 import host_subplot import mpl_toolkits.axisartist as AA cl = ordict(sorted(cluster_sizes.iteritems(), key=lambda x: x[1])) #Y = ordict(sorted(Y.iteritems(), key=lambda x: cl[x[0]])) #X = ordict(zip( range(len(cluster_sizes)), cl.itervalues() )) fig = plt.figure(figsize=(15,8),dpi=80) #ax = fig.add_subplot(1,1,1) host = host_subplot(111, axes_class=AA.Axes) plt.subplots_adjust(right=0.75) test = host.twinx() bottom = zeros(len(cl.keys())) size = host.bar(cl.keys(), cl.values(), align='center', width=0.4, color='black', label="Cluster Size", bottom=bottom) var = test.plot(Y.keys(), Y.values(), color="red", marker='o', linestyle='-', linewidth=2, label="Cluster $\sigma$") host.set_xlabel("Identified Cluster") host.set_ylabel("Cluster Size") test.set_ylabel("Integrated Cluster $\sigma$") #test.set_ylim(0.0, 1.0) host.set_yscale('log') #host.set_axisbelow(True) #host.set_xticks(X.keys(), cl.keys()) host.legend(title=link) plt.savefig(pathjoin("TESTS",link,"plots",'hist_'+link+'.png'),format='png', dpi=90) #plt.show() plt.close("all")
nhidden = 80 batch_size = 100 import argparse parser = argparse.ArgumentParser() parser.add_argument("--seed", type=int, default=1) parser.add_argument("--num-epochs", type=int, default=200) parser.add_argument("--batch-size", type=int, default=100) parser.add_argument("--learning-rate", type=float, default=1e-3) parser.add_argument("--skips", nargs="+", type=int, default=[]) parser.add_argument("--instance-dependent", action="store_true") args = parser.parse_args() np.random.seed(args.seed) inputs = ordict(features=T.matrix("features"), targets=T.ivector("targets")) x, y = inputs["features"], inputs["targets"] theano.config.compute_test_value = "warn" x.tag.test_value = np.random.random((11, 784)).astype(theano.config.floatX) y.tag.test_value = np.random.random_integers(low=0, high=9, size=(11,)).astype(np.int32) # move time axis before batch axis x = x.T # (time, batch, features) x = x.reshape((x.shape[0], x.shape[1], 1)) Wx = theano.shared(util.orthogonal((1, nhidden)), name="Wx") bx = theano.shared(np.zeros((nhidden,), dtype=theano.config.floatX), name="bx") Wy = theano.shared(util.orthogonal((nhidden, nclasses)), name="Wy")
(opts.folderName, POI)) os.system( "cp CMS-HGG_mva_13TeV_datacard.perProc.root SystematicsTable%s/%s/." % (opts.folderName, POI)) for ng in nuisance_groups: writeJobFileAndSubmit( "%s/SystematicsTable%s/%s" % (os.getcwd(), opts.folderName, POI), ng, POI) if opts.makeTable: overall_array = {} sumInQuad = {} for POI in POIs: sumInQuad[POI] = 0. #poi_array = {} poi_array = ordict() directory = "%s/SystematicsTable%s/%s" % (os.getcwd(), opts.folderName, POI) nominalValues = values = getUpDownUncertainties(directory, "none", POI) for ng in nuisance_groups: values = getUpDownUncertainties(directory, ng, POI) #print ng, nominalValues, values thisUpWrtCentral = (abs(nominalValues[0]**2 - values[0]**2))**(0.5) thisDownWrtCentral = (abs(nominalValues[1]**2 - values[1]**2))**(0.5) thisSymmWrtCentral = (abs(nominalValues[2]**2 - values[2]**2))**(0.5) poi_array[ng] = [ thisUpWrtCentral, thisDownWrtCentral, thisSymmWrtCentral ] if ng != "all":
def prepare(self, strings): tokens = set(self.tokenize("".join(strings))) self.tokenmap = ordict( (token, code) for code, token in enumerate(tokens)) self.inverse_tokenmap = ordict( (code, token) for code, token in enumerate(common_tokens))
fs = open("lang/specials.txt", "r") specials = fs.read().splitlines() specials = [x.strip() for x in specials] specials = [x for x in specials if x != ""] specials = [x.split() for x in specials if not x.startswith("#")] specialTexts = [x[0] for x in specials] specialNames = [x[1] for x in specials] fs = open("lang/grammar.txt", "r") grammarTxt = fs.read() grammarTxt = [x.strip() for x in grammarTxt.splitlines()] grammarTxt = filter(None, grammarTxt) grammar = ordict() nonterm = None for line in grammarTxt: line = line.split() if line[0] == "|" and nonterm: if nonterm not in grammar: grammar[nonterm] = [line[1:]] else: grammar[nonterm].append(line[1:]) else: nonterm = line[0] for i, nonterm in enumerate(grammar):
def order_key(queue): return ordict(sorted(queue.items(), key=lambda t: t[0], reverse=False))
def order_dict(queue): return ordict(sorted(queue.items(), key=lambda t: t[1], reverse=True))
def batches(dataset, batch_size=100): indices = list(range(len(dataset["features"]))) np.random.shuffle(indices) for offset in range(0, len(indices), batch_size): batch_indices = indices[offset:offset + batch_size] yield ordict((source, value[batch_indices]) for source, value in dataset.items())
def call(fn, **input_values): return ordict(zip(outputs, fn(**input_values)))
#ECH_Rf_A5 = [Float, 'W7X_ECH_Rf_A5', Average, 1], #ECH_Rf_B5 = [Float, 'W7X_ECH_Rf_B5', Average, 1], #ECH_Rf_C5 = [Float, 'W7X_ECH_Rf_C5', Average, 1], #ECH_Rf_D5 = [Float, 'W7X_ECH_Rf_D5', Average, 1], ) # Scraper study diags = ordict(iA = [Float, 'W7X_IPlanar_A', Average, 1], iB = [Float, 'W7X_IPlanar_B', Average, 1], i1 = [Float, 'W7X_INonPlanar_1', Average, 1], i2 = [Float, 'W7X_INonPlanar_2', Average, 1], i3 = [Float, 'W7X_INonPlanar_3', Average, 1], NBI4_I = [Float, 'W7X_NBI4_I', fPeak, 1], NBI4_U = [Float, 'W7X_NBI4_U', fPeak, 1], scr_1 = [Float, 'W7X_STDU_LP01_I', Std, 3], scr_2 = [Float, 'W7X_STDU_LP02_I', Std, 3], scr_3 = [Float, 'W7X_STDU_LP03_I', Std, 3], scr_4 = [Float, 'W7X_STDU_LP04_I', Std, 3], scr_5 = [Float, 'W7X_STDU_LP05_I', Std, 3], scr_6 = [Float, 'W7X_STDU_LP06_I', Std, 3], scr_7 = [Float, 'W7X_STDU_LP07_I', Std, 3], scr_8 = [Float, 'W7X_STDU_LP08_I', Std, 3], TotECH = [Float, 'W7X_TotECH', Peak, 1], WDIA_TRI = [Float, 'W7X_WDIA_TRI', Peak, 1], IP_CONT = [Float, 'W7X_ROG_CONT', Peak, 1], neL = [Float, 'W7X_neL', Peak, 1], ) # for MHD study diags = ordict() diags.update(dict(PkrSPD4124time = [Float, 'W7X_MIR_4124', rawPeakSPDtime, 8])) diags.update(dict(PkrSPD4124 = [Float, 'W7X_MIR_4124', rawPeakSPD, 3]))
def AsDict(self): return ordict(self.Items())
def __init__(self, items=(), **kwargs): self.Data = ordict() self.Update(items) self.Update(kwargs)
def get_socl_net_info(socl_net): assert isinstance(socl_net, SoclNet) info = ordict() info['net'] = socl_net return info