Exemplo n.º 1
0
def makeBins(region, chrom_size, width, step=None):
    bins = ordict()
    offsets = ordict()
    nbin = 0
    if step is None:
        step = width
    elif step > width:
        raise ValueError('step size({}) larger than bin width({})'.format(step, width))
    if region is None:
        for chrom in chrom_size:
            offsets[chrom] = nbin
            size = chrom_size[chrom]
            bin_ends = np.arange(width, size+0.1, step).astype(int)
            bin_starts = bin_ends - width
            n = len(bin_ends)
            if bin_ends[n-1] < size:
                bin_ends = np.append(bin_ends, size)
                bin_starts = np.append(bin_starts, bin_starts[n-1]+step)
                n += 1
            bins[chrom] = np.vstack((bin_starts,bin_ends,bin_ends-bin_starts)).transpose()
            nbin += n
    else:
        chrom,start,end = parseRegion(region, chrom_size)
        offsets[chrom] = nbin
        bin_ends = np.arange(start+width, end+0.1, step).astype(int)
        bin_starts = bin_ends - width
        n = len(bin_ends)
        if bin_ends[n-1] < end:
            bin_ends = np.append(bin_ends, end)
            bin_starts = np.append(bin_starts, bin_starts[n-1]+step)
            n += 1
        bins[chrom] = np.vstack((bin_starts,bin_ends,bin_ends-bin_starts)).transpose()
        nbin += n
    logging.info('{} bins made'.format(nbin))
    return bins,offsets,nbin
Exemplo n.º 2
0
def mapping_vector_name(mapping, name):
    #use orderdict
    #http://stackoverflow.com/questions/5629023/key-order-in-python-dicionaries
    if not isinstance(mapping, list):
        return name
    for key in ordict(mapping).keys():
        if key in name:
            return ordict(mapping)[key]
    return name
Exemplo n.º 3
0
def mapping_vector_name(mapping, name):
    #use orderdict
    #http://stackoverflow.com/questions/5629023/key-order-in-python-dicionaries
    if not isinstance(mapping, list):
        return name
    for key in ordict(mapping).keys():
        if key in name:
            return ordict(mapping)[key]
    return name
Exemplo n.º 4
0
 def get_env_info(env):
     assert isinstance(env, Env)
     distri = env.getModelDistri()
     info = ordict()
     info['max'] = distri['max']
     info['avg'] = distri['avg']
     info['min'] = distri['min']
     return info
Exemplo n.º 5
0
 def read_fai(filename):
     '''extract an OrderedDict of {chrom:size} from a genome.fa.fai'''
     chrom_size = ordict()
     with open(filename) as f:
         for line in f:
             chrom,size,offset,line_size,line_byte = line.rstrip().split()
             size = int(size)
             chrom_size[chrom] = size
     return chrom_size
Exemplo n.º 6
0
 def get_agent_info(env, agent):
     assert isinstance(env, Env) and isinstance(agent, Agent)
     info = ordict()
     info['value'] = env.getValue(agent.state_now)
     if agent.a_plan is None:
         info['plan_goal'] = None
     else:
         info['plan_goal'] = agent.a_plan.goal_value
     return info
Exemplo n.º 7
0
    def prepare(self, strings):
        counts = collections.Counter(self.tokenize(" ".join(strings)))
        tokens = list(reversed(sorted(counts.keys(), key=counts.__getitem__)))
        rare_tokens = set(it.takewhile(lambda token: counts[token] < 5,
                                       tokens))
        common_tokens = set(tokens) - rare_tokens

        tokenmap = ordict(
            (token, code) for code, token in enumerate(common_tokens))
        inverse_tokenmap = ordict(
            (code, token) for code, token in enumerate(common_tokens))

        # map rare tokens to a common <UNK> code
        tokenmap.update((token, len(common_tokens)) for token in rare_tokens)
        inverse_tokenmap[len(common_tokens)] = "<UNK>"

        self.tokenmap = tokenmap
        self.inverse_tokenmap = inverse_tokenmap
Exemplo n.º 8
0
def readChromLen(filename):
    '''extract an OrderedDict of {chrom:size} from a genome.fa.fai'''
    chrom_size = ordict()
    with open(filename) as f:
        for line in f:
            fields = line.rstrip().split()
            chrom = fields[0]
            size = int(fields[1])
            chrom_size[chrom] = size
    return chrom_size
Exemplo n.º 9
0
 def make_bins(self):
     '''generate bins of a given resolution, with the constraint of region/chromosome sizes'''
     if self.regions is None:
         raise Exception('regions is None, this should not happen')
     bins = ordict()
     offsets = ordict()
     nbin = 0
     for region in self.regions:
         reg = '{}:{}-{}'.format(*region)
         if self.resolution is not None:
             bin_starts = np.arange(region[1], region[2], self.resolution, dtype=int)
             bin_ends = np.append(bin_starts[1:], region[2])
         else:
             bin_starts = np.array([region[1]], dtype=int)
             bin_ends = np.array([region[2]], dtype=int)
         offsets[reg] = nbin
         bins[reg] = np.vstack((bin_starts,bin_ends,bin_ends-bin_starts)).transpose()
         nbin += len(bins[reg])
     self.bins = bins
     self.offsets = offsets
     self.nbin = nbin
Exemplo n.º 10
0
 def add_env_record(self, env, T, up_info=None):
     assert isinstance(env, Env)
     if up_info is None:
         info = Record.get_env_info(env)
         # logging.info(info)
     else:
         info = ordict()
         info['max'] = up_info['nkinfo']['max']
         info['avg'] = up_info['nkinfo']['avg']
         info['min'] = up_info['nkinfo']['min']
         # logging.info(info)
     self.env.append((T, info))
     return info
Exemplo n.º 11
0
def main():
  datasource = datasets.MscocoNP(H(data_dir="/Tmp/cooijmat/mscoco", hp=H(caption=H(token="character"))))

  tfrecord_dir = os.environ["MSCOCO_TFRECORD_DIR"]
  tf.gfile.MakeDirs(tfrecord_dir)

  tokenizers = ordict((token, datasource.get_tokenizer(token))
                      for token in "character word".split())

  for token, tokenizer in tokenizers.items():
    tokenmap_path = os.path.join(tfrecord_dir, "tokenmap_%s.pkl" % token)
    print "writing to", tokenmap_path
    pkl.dump(tokenizer.tokenmap, open(tokenmap_path, "wb"))
    print "done"

  def _to_sequence_example(image_path):
    identifier = os.path.splitext(os.path.basename(image_path))[0]
    caption_words = tokenizers["word"].process(datasource.get_caption_string(identifier))
    caption_characters = tokenizers["character"].process(datasource.get_caption_string(identifier))
    with tf.gfile.FastGFile(image_path, "rb") as f:
      jpeg = f.read()
    return tf.train.SequenceExample(
      context=tf.train.Features(feature={
        "image/identifier": _bytes_feature(identifier),
        "image/data": _bytes_feature(jpeg),
       }),
      feature_lists=tf.train.FeatureLists(feature_list={
        "image/caption_characters": _int64_feature_list(caption_characters),
        "image/caption_words": _int64_feature_list(caption_words),
       }))

  for fold in "train valid".split():
    output_path = os.path.join(tfrecord_dir, fold + ".tfrecords")
    print "writing to", output_path
    writer = tf.python_io.TFRecordWriter(output_path)
    for filename in datasource.get_filenames(fold):
      example = _to_sequence_example(filename)
      writer.write(example.SerializeToString())
    writer.close()
Exemplo n.º 12
0
def dictionary_init(option):
    if option == "nodes":
        return {'cadejos-0': 0, 'cadejos-1': 0, 'cadejos-2': 0, 'cadejos-3': 0,
                'cadejos-4': 0, 'zarate-0a': 0, 'zarate-0b': 0, 'zarate-0c': 0,
                'zarate-0d': 0, 'zarate-1a': 0, 'zarate-1b': 0, 'zarate-1c': 0,
                'zarate-1d': 0, 'zarate-2a': 0, 'zarate-2b': 0, 'zarate-2c': 0,
                'zarate-2d': 0, 'zarate-3a': 0, 'zarate-3b': 0, 'zarate-3c': 0,
                'zarate-3d': 0, 'zarate-4a': 0, 'zarate-4b': 0, 'zarate-4c': 0,
                'zarate-4d': 0, 'tule-00': 0, 'tule-01': 0, 'tule-02': 0,
                'tule-03': 0}
    elif option == "queues":
        return {'total': 0, 'cadejos': 0, 'zarate': 0, 'tule': 0,
                'cpu-n4h24': 0, 'cpu-n3h72': 0, 'cpu-n5h24': 0, 'gpu-n2h24': 0,
                'gpu-n1h72': 0, 'phi-debug': 0, 'phi-n2h72': 0, 'phi-n3h24': 0,
                'phi-n5h24': 0, 'phi-n6h24': 0, 'phi-n1h72': 0, 'phi-n6h96': 0,
                'phi-n18h72': 0, 'debug': 0, 'k40': 0, 'cpu-debug': 0,
                'gpu-debug': 0}
    elif option == "days":
        d = ordict()
        d["Monday"] = 0
        d["Tuesday"] = 0
        d["Wednesday"] = 0
        d["Thursday"] = 0
        d["Friday"] = 0
        d["Saturday"] = 0
        d["Sunday"] = 0
        return d
    elif option == "hours":
        return {'00:00 - 01:00': 0, '01:00 - 02:00': 0, '02:00 - 03:00': 0,
                '03:00 - 04:00': 0, '04:00 - 05:00': 0, '05:00 - 06:00': 0,
                '06:00 - 07:00': 0, '07:00 - 08:00': 0, '08:00 - 09:00': 0,
                '09:00 - 10:00': 0, '10:00 - 11:00': 0, '11:00 - 12:00': 0,
                '12:00 - 13:00': 0, '13:00 - 14:00': 0, '14:00 - 15:00': 0,
                '15:00 - 16:00': 0, '16:00 - 17:00': 0, '17:00 - 18:00': 0,
                '18:00 - 19:00': 0, '19:00 - 20:00': 0, '20:00 - 21:00': 0,
                '21:00 - 22:00': 0, '22:00 - 23:00': 0, '23:00 - 24:00': 0}
    else:
        return {}
Exemplo n.º 13
0
def histogram(Y, cluster_sizes, link):  
    ''' Plot histogram of cluster sizes and integrated standard deviation.'''
    
    from collections import OrderedDict as ordict
    from mpl_toolkits.axes_grid1 import host_subplot
    import mpl_toolkits.axisartist as AA
    
    cl = ordict(sorted(cluster_sizes.iteritems(), key=lambda x: x[1]))
    #Y  = ordict(sorted(Y.iteritems(), key=lambda x: cl[x[0]]))
    #X  = ordict(zip( range(len(cluster_sizes)), cl.itervalues() ))

    fig = plt.figure(figsize=(15,8),dpi=80)
    #ax  = fig.add_subplot(1,1,1)

    host = host_subplot(111, axes_class=AA.Axes)
    plt.subplots_adjust(right=0.75)
    test = host.twinx()
    
    bottom = zeros(len(cl.keys()))

    size = host.bar(cl.keys(), cl.values(), align='center', width=0.4, color='black', label="Cluster Size", bottom=bottom)
    var  = test.plot(Y.keys(), Y.values(), color="red", marker='o', linestyle='-', linewidth=2, label="Cluster $\sigma$")

    host.set_xlabel("Identified Cluster")
    host.set_ylabel("Cluster Size")
    test.set_ylabel("Integrated Cluster $\sigma$")
    #test.set_ylim(0.0, 1.0)
    host.set_yscale('log') 
    
    #host.set_axisbelow(True)
    #host.set_xticks(X.keys(), cl.keys())

    host.legend(title=link)

    plt.savefig(pathjoin("TESTS",link,"plots",'hist_'+link+'.png'),format='png', dpi=90)
    #plt.show()
    plt.close("all")    
Exemplo n.º 14
0
    nhidden = 80
    batch_size = 100

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--seed", type=int, default=1)
    parser.add_argument("--num-epochs", type=int, default=200)
    parser.add_argument("--batch-size", type=int, default=100)
    parser.add_argument("--learning-rate", type=float, default=1e-3)
    parser.add_argument("--skips", nargs="+", type=int, default=[])
    parser.add_argument("--instance-dependent", action="store_true")
    args = parser.parse_args()

    np.random.seed(args.seed)

    inputs = ordict(features=T.matrix("features"),
                    targets=T.ivector("targets"))
    x, y = inputs["features"], inputs["targets"]

    theano.config.compute_test_value = "warn"
    x.tag.test_value = np.random.random((11, 784)).astype(theano.config.floatX)
    y.tag.test_value = np.random.random_integers(low=0, high=9, size=(11,)).astype(np.int32)

    # move time axis before batch axis
    x = x.T

    # (time, batch, features)
    x = x.reshape((x.shape[0], x.shape[1], 1))

    Wx = theano.shared(util.orthogonal((1, nhidden)), name="Wx")
    bx = theano.shared(np.zeros((nhidden,), dtype=theano.config.floatX), name="bx")
    Wy = theano.shared(util.orthogonal((nhidden, nclasses)), name="Wy")
Exemplo n.º 15
0
            (opts.folderName, POI))
        os.system(
            "cp CMS-HGG_mva_13TeV_datacard.perProc.root SystematicsTable%s/%s/."
            % (opts.folderName, POI))
        for ng in nuisance_groups:
            writeJobFileAndSubmit(
                "%s/SystematicsTable%s/%s" %
                (os.getcwd(), opts.folderName, POI), ng, POI)

if opts.makeTable:
    overall_array = {}
    sumInQuad = {}
    for POI in POIs:
        sumInQuad[POI] = 0.
        #poi_array = {}
        poi_array = ordict()
        directory = "%s/SystematicsTable%s/%s" % (os.getcwd(), opts.folderName,
                                                  POI)
        nominalValues = values = getUpDownUncertainties(directory, "none", POI)
        for ng in nuisance_groups:
            values = getUpDownUncertainties(directory, ng, POI)
            #print ng, nominalValues, values
            thisUpWrtCentral = (abs(nominalValues[0]**2 - values[0]**2))**(0.5)
            thisDownWrtCentral = (abs(nominalValues[1]**2 -
                                      values[1]**2))**(0.5)
            thisSymmWrtCentral = (abs(nominalValues[2]**2 -
                                      values[2]**2))**(0.5)
            poi_array[ng] = [
                thisUpWrtCentral, thisDownWrtCentral, thisSymmWrtCentral
            ]
            if ng != "all":
Exemplo n.º 16
0
 def prepare(self, strings):
     tokens = set(self.tokenize("".join(strings)))
     self.tokenmap = ordict(
         (token, code) for code, token in enumerate(tokens))
     self.inverse_tokenmap = ordict(
         (code, token) for code, token in enumerate(common_tokens))
Exemplo n.º 17
0
fs = open("lang/specials.txt", "r")

specials = fs.read().splitlines()
specials = [x.strip() for x in specials]
specials = [x for x in specials if x != ""]
specials = [x.split() for x in specials if not x.startswith("#")]

specialTexts = [x[0] for x in specials]
specialNames = [x[1] for x in specials]

fs = open("lang/grammar.txt", "r")

grammarTxt = fs.read()
grammarTxt = [x.strip() for x in grammarTxt.splitlines()]
grammarTxt = filter(None, grammarTxt)
grammar = ordict()
nonterm = None

for line in grammarTxt:

    line = line.split()

    if line[0] == "|" and nonterm:
        if nonterm not in grammar:
            grammar[nonterm] = [line[1:]]
        else:
            grammar[nonterm].append(line[1:])
    else:
        nonterm = line[0]

for i, nonterm in enumerate(grammar):
Exemplo n.º 18
0
def order_key(queue):
    return ordict(sorted(queue.items(), key=lambda t: t[0], reverse=False))
Exemplo n.º 19
0
def order_dict(queue):
    return ordict(sorted(queue.items(), key=lambda t: t[1], reverse=True))
Exemplo n.º 20
0
def batches(dataset, batch_size=100):
    indices = list(range(len(dataset["features"])))
    np.random.shuffle(indices)
    for offset in range(0, len(indices), batch_size):
        batch_indices = indices[offset:offset + batch_size]
        yield ordict((source, value[batch_indices]) for source, value in dataset.items())
Exemplo n.º 21
0
 def call(fn, **input_values):
     return ordict(zip(outputs, fn(**input_values)))
Exemplo n.º 22
0
             #ECH_Rf_A5 = [Float, 'W7X_ECH_Rf_A5', Average, 1],
             #ECH_Rf_B5 = [Float, 'W7X_ECH_Rf_B5', Average, 1],
             #ECH_Rf_C5 = [Float, 'W7X_ECH_Rf_C5', Average, 1],
             #ECH_Rf_D5 = [Float, 'W7X_ECH_Rf_D5', Average, 1],
             )
# Scraper study
diags = ordict(iA = [Float, 'W7X_IPlanar_A', Average, 1],
               iB = [Float, 'W7X_IPlanar_B', Average, 1],
               i1 = [Float, 'W7X_INonPlanar_1', Average, 1],
               i2 = [Float, 'W7X_INonPlanar_2', Average, 1],
               i3 = [Float, 'W7X_INonPlanar_3', Average, 1],
               NBI4_I = [Float, 'W7X_NBI4_I', fPeak, 1],
               NBI4_U = [Float, 'W7X_NBI4_U', fPeak, 1],
               scr_1 = [Float, 'W7X_STDU_LP01_I', Std, 3],
               scr_2 = [Float, 'W7X_STDU_LP02_I', Std, 3],
               scr_3 = [Float, 'W7X_STDU_LP03_I', Std, 3],
               scr_4 = [Float, 'W7X_STDU_LP04_I', Std, 3],
               scr_5 = [Float, 'W7X_STDU_LP05_I', Std, 3],
               scr_6 = [Float, 'W7X_STDU_LP06_I', Std, 3],
               scr_7 = [Float, 'W7X_STDU_LP07_I', Std, 3],
               scr_8 = [Float, 'W7X_STDU_LP08_I', Std, 3],
               TotECH = [Float, 'W7X_TotECH', Peak, 1],
               WDIA_TRI = [Float, 'W7X_WDIA_TRI', Peak, 1],
               IP_CONT = [Float, 'W7X_ROG_CONT', Peak, 1],
               neL = [Float, 'W7X_neL', Peak, 1],
)
# for MHD study
diags = ordict()

diags.update(dict(PkrSPD4124time = [Float, 'W7X_MIR_4124', rawPeakSPDtime, 8]))

diags.update(dict(PkrSPD4124 = [Float, 'W7X_MIR_4124', rawPeakSPD, 3]))
Exemplo n.º 23
0
 def AsDict(self):
     return ordict(self.Items())
Exemplo n.º 24
0
 def __init__(self, items=(), **kwargs):
     self.Data = ordict()
     self.Update(items)
     self.Update(kwargs)
Exemplo n.º 25
0
 def get_socl_net_info(socl_net):
     assert isinstance(socl_net, SoclNet)
     info = ordict()
     info['net'] = socl_net
     return info