Пример #1
0
def newload(filename, verbose=1):
    """ Intended to replace load() in numpy
    """
    from numpy import load as loadz
    from numpy import cumsum
    dic = loadz(filename)
    #    if dic['version'] != None:
    #    if len((dic.files=='version').nonzero())>0:
    if len(dic.files) > 3:
        if verbose > 2: print("local v%d " % (dic['version'])),
    else:
        if verbose > 2: print("local v0: simple "),
        return (dic)  # quick, minimal return

    if verbose > 2: print(' contains %s' % dic.files)
    signalexpr = dic['signalexpr']
    timebaseexpr = dic['timebaseexpr']
    # savez saves ARRAYS always, so have to turn array back into scalar
    exec(signalexpr.tolist())
    exec(timebaseexpr.tolist())
    return ({
        "signal": signal,
        "timebase": timebase,
        "parent_element": dic['parent_element']
    })
Пример #2
0
def load_knowledge(net, filepath):
    """
		Load the weights and biasses for an already network from disk.
	"""
    reloaded = loadz(filepath)
    knowledge = [(name, reloaded[name]) for name in sorted(reloaded.keys())]
    set_knowledge(net, knowledge)
Пример #3
0
def test_compress(file=None, verbose=0, eps=0, debug=False, maxcount=0):
    """ Used in developing the save compress routines.  Not tested since then
    >>> test_compress()
    Looks like it only saves the time series, not the rest.
    """
    from numpy import load as loadz
    print("Testing %s" % file)
    if file is None: file='18993_densitymediaIR.npz'
    test=loadz(file)
    stat=os.stat(file)

    if verbose > 0: print("=========== testing signal compression ===========")
    sig=discretise_array(test['signal'],eps=eps,verbose=verbose,maxcount=maxcount)
    if verbose > 0: print("=========== testing timebase compression===========")
    tim=discretise_array(test['timebase'],eps=eps,verbose=verbose)
    print('  File length %d bytes, %d samples, %.3g bytes/sample' % (
            stat.st_size ,len(sig['iarr']),
            float(stat.st_size)/len(sig['iarr'])))
    temp='temp.npz'
    savez(temp,sig['iarr'])
    print("    compressed to %d bytes" % os.stat(temp).st_size)
    savez(temp,diff(sig['iarr']))
    print("      differences compressed to %d bytes" % os.stat(temp).st_size)
    savez(temp,diff(diff(sig['iarr'])))
    print("      double differences compressed to %d bytes" % os.stat(temp).st_size)
    print("    time compressed to %d bytes" % os.stat(temp).st_size)
    savez(temp,diff(tim['iarr']))
    print("      difference compressed to %d" % os.stat(temp).st_size)
    savez(temp,diff(diff(tim['iarr'])))
    print("      double difference compressed to %d" % os.stat(temp).st_size)
    if debug: xx=1/0
Пример #4
0
 def deserialize(filename, axes):
     xydata = np.loadz(filename)
     ax_lines_xydata = {}
     for key, val in xydata.items():
         _, istr, _, jstr, xy = key.split("_")
         i, j = int(istr), int(jstr)
         ax_lines_xydata.setdefault(i, {}).setdefault(j, {})[xy] = val
     return ax_lines_xydata
Пример #5
0
    def load_fft_cache(self, fname):

        a = np.loadz(fname)

        self._aps_dd = a['dd']
        self._aps_dv = a['dv']
        self._aps_vv = a['vv']

        self._aps_cache = True
Пример #6
0
def show_npz(path):

    data = np.loadz(path)  # dict of arrays

    # if keyError occurs, first print data.keys()
    first = data["arr_0"]
    second = data["arr_1"]
    third = data["arr_2"]

    show_img(first)
    show_img(second)
    show_img(third)
Пример #7
0
def newload(filename, verbose=1):
    """ Intended to replace load() in numpy
    """
    from numpy import load as loadz
    from numpy import cumsum
    dic=loadz(filename)
#    if dic['version'] != None:
#    if len((dic.files=='version').nonzero())>0:
    if len(dic.files)>3:
        if verbose>2: print ("local v%d " % (dic['version'])),
    else: 
        if verbose>2: print("local v0: simple "),
        return(dic)  # quick, minimal return

    if verbose>2: print(' contains %s' % dic.files)
    signalexpr=dic['signalexpr']
    timebaseexpr=dic['timebaseexpr']
# savez saves ARRAYS always, so have to turn array back into scalar    
    exec(signalexpr.tolist())
    exec(timebaseexpr.tolist())
    return({"signal":signal, "timebase":timebase, "parent_element": dic['parent_element']})
Пример #8
0
def test_compress(file=None, verbose=0, eps=0, debug=False, maxcount=0):
    """ Used in developing the save compress routines.  Not tested since then

    >>> test_compress()

    Looks like it only saves the time series, not the rest.
    """
    from numpy import load as loadz
    print("Testing %s" % file)
    if file is None: file = '18993_densitymediaIR.npz'  # on drive-n-go
    test = loadz(file)
    stat = os.stat(file)

    if verbose > 0: print("=========== testing signal compression ===========")
    sig = discretise_array(test['signal'],
                           eps=eps,
                           verbose=verbose,
                           maxcount=maxcount)
    if verbose > 0:
        print("=========== testing timebase compression===========")
    tim = discretise_array(test['timebase'], eps=eps, verbose=verbose)
    print('  File length %d bytes, %d samples, %.3g bytes/sample' %
          (stat.st_size, len(
              sig['iarr']), float(stat.st_size) / len(sig['iarr'])))
    temp = 'temp.npz'
    savez(temp, sig['iarr'])
    print("    compressed to %d bytes" % os.stat(temp).st_size)
    savez(temp, diff(sig['iarr']))
    print("      differences compressed to %d bytes" % os.stat(temp).st_size)
    savez(temp, diff(diff(sig['iarr'])))
    print("      double differences compressed to %d bytes" %
          os.stat(temp).st_size)
    print("    time compressed to %d bytes" % os.stat(temp).st_size)
    savez(temp, diff(tim['iarr']))
    print("      difference compressed to %d" % os.stat(temp).st_size)
    savez(temp, diff(diff(tim['iarr'])))
    print("      double difference compressed to %d" % os.stat(temp).st_size)
    if debug: xx = 1 / 0
Пример #9
0
def newloadv3(filename, verbose=1):
    """ This the the version that works in python 3, but can't handle Nans
    Intended to replace load() in numpy
    counterpart is data/savez_compress.py
    """
    from numpy import load as loadz
    from numpy import cumsum, array
    dic=loadz(filename)
#    if dic['version'] != None:
#    if len((dic.files=='version').nonzero())>0:
    if len(dic.files)>3:
        if verbose>2: print ("local v%d " % (dic['version']),end='')
    else: 
        if verbose>2: print("local v0: simple ", end='')
        return(dic)  # quick, minimal return

    if verbose>2: print(' contains %s' % dic.files)
    signalexpr=dic['signalexpr']
    timebaseexpr=dic['timebaseexpr']
    if 'time_unit_in_seconds' in dic:
        timeunitexpr = dic['time_unit_in_seconds']
    else:
        timeunitexpr = array(1)

    # savez saves ARRAYS always, so have to turn array back into scalar    
    # exec(signalexpr.tolist())
    # Changed exec code to eval for python3, otherwise the name was not defined
    #   for the target variables - they could only be accessed with 
    #   e.g. locals().signal 
    # retdic = {"signal":locals()['signal'], "timebase":locals()['timebase'], 
    #           "parent_element": dic['parent_element']}
    # Sucess using eval instead of exec
    signal = eval(signalexpr.tolist().split(b'=')[1])
    time_unit_in_seconds = timeunitexpr.tolist()
    timebase = time_unit_in_seconds * eval(timebaseexpr.tolist().split(b'=')[1])
    retdic = {"signal":signal, "timebase":timebase, "parent_element":
              dic['parent_element'], "params": dic['params'].tolist()}
    return(retdic)
Пример #10
0
def main():
    argument_parser = ArgumentParser()
    argument_parser.add_argument("--matrix", type=str, required=True)
    argument_parser.add_argument(
        "--method",
        type=str,
        required=False,
        choices=['zca', 'pca', 'cholesky', 'zca_cor', 'pca_cor'],
        default='zca')

    args = argument_parser.parse_args()

    matrix_path: str
    matrix_path = args.matrix
    matrix_whitened = None
    if matrix_path.endswith(".npy"):
        matrix_whitened = whiten(np.load(matrix_path), method=args.method)
    elif matrix_path.endswith(".npz"):
        matrix_whitened = whiten(np.loadz(matrix_path).toarray(),
                                 method=args.method)

    output_path = matrix_path[:-4] + f"_whitened-{args.method}.npy"
    np.save(output_path, matrix_whitened)
Пример #11
0
def newload(filename, verbose=verbose):
    """ Intended to replace load() in numpy
    This is being used with nan data.  The version in data/base.py is closer to
    python 3 compatible, but can't deal with the nans yet.
    """
    from numpy import load as loadz
    from numpy import cumsum
    dic=loadz(filename)
#    if dic['version'] != None:
#    if len((dic.files=='version').nonzero())>0:
    if len(dic.files)>3:
        if verbose>2: print ("local v%d " % (dic['version'])),
    else: 
        if verbose>2: print("local v0: simple "),
        return(dic)  # quick, minimal return

    if verbose>2: print(' contains %s' % dic.files)
    # savez saves ARRAYS always, so have to turn array back into scalar    
    signalexpr=dic['signalexpr'].tolist()
    timebaseexpr=dic['timebaseexpr'].tolist()
    # fixup for files written with np.nan removal and and cumsum
    if ('cumsum' in timebaseexpr) and ('np.nan' in timebaseexpr):
        print('!!!!!!!!!!!! faking a fixup of nans with cumsum !!!!!!!!!!!!!!!!!!')
        timebaseexpr = timebaseexpr.replace("timebase=",
                             "temp=").replace("*2e-06","\ntimebase=temp*2e-06")
        timebaseexpr = timebaseexpr.replace("== dic['rawtimebase']","== temp")

    exec(signalexpr)
    if dic['version'] <= 103 and timebaseexpr.startswith('timebase=0+'):  # bdb103
        timebaseexpr = timebaseexpr.replace('timebase=0+','timebase=0.+')

    exec(timebaseexpr)
    retdic = {"signal":signal, "timebase":timebase, "parent_element":
              dic['parent_element']}

    if 'params' in dic: retdic.update({"params": dic['params'].tolist()})
    return(retdic)
Пример #12
0
def newload(filename, verbose=verbose):
    """ Intended to replace load() in numpy
    This is being used with nan data.  The version in data/base.py is closer to
    python 3 compatible, but can't deal with the nans yet.
    """
    from numpy import load as loadz
    from numpy import cumsum
    dic = loadz(filename, allow_pickle=True)
    #    if dic['version'] != None:
    #    if len((dic.files=='version').nonzero())>0:
    if len(dic.files) > 3:
        if verbose > 2: print("local v%d " % (dic['version'])),
    else:
        if verbose > 2: print("local v0: simple "),
        return (dic)  # quick, minimal return

    if verbose > 2: print(' contains %s' % dic.files)
    # savez saves ARRAYS always, so have to turn array back into scalar
    signalexpr = dic['signalexpr'].tolist()
    timebaseexpr = dic['timebaseexpr'].tolist()

    exec(signalexpr)

    # fixup for (old) files written with both np.nan removal and and cumsum
    # e.g. "timebase=0+dic['rawtimebase']*0.0064514\nmaxint = np.iinfo(dic['rawtimebase'].dtype).max\nwnan = np.where(maxint == dic['rawtimebase'])[0]\nif len(wnan)>0:\n    timebase[wnan]=np.nan"
    # the problem here is that the maxint value is scaled BEFORE comparison.
    # the fix is to scale after comparison
    #
    # A correct solution is (without delta_encode_time)
    # perhaps the only files with this error also have a cumsum?
    #   timebase=0+dic['rawtimebase']*0.0064514
    #   maxint = np.iinfo(dic['rawtimebase'].dtype).max
    #   wnan = np.where(maxint == dic['rawtimebase'])[0]
    #   if len(wnan)>0:
    #      timebase[wnan]=np.nan

    # *Without* care to avoid nans in cumsum
    if ('cumsum'
            in timebaseexpr) and ('np.nan'
                                  in timebaseexpr) and dic['version'] < 105:
        print(
            'newload: !!! kludging a fixup of nans in a timebase with cumsum !!'
        )
        if "*2e-06" not in timebaseexpr:
            raise LookupError(
                '*2e-06 expected in timebaseexpr:\n' + timebaseexpr +
                "\n Delete this line in save compress.py to try a good fixup")
            dt_embedded = timebaseexpr.split("'rawtimebase'])*")[-1]
            try:
                testing = float(dt_embedded)
            except Exception as reason:
                raise ValueError('newload: failed to find a factor in cumsum ',
                                 str(reason))
            timebaseexpr = timebaseexpr.replace("timebase=", "temp=").replace(
                '*' + dt_embedded, "\ntimebase=temp*" + dt_embedded)

        else:
            timebaseexpr = timebaseexpr.replace("timebase=", "temp=").replace(
                "*2e-06", "\ntimebase=temp*2e-06")
        timebaseexpr = timebaseexpr.replace("== dic['rawtimebase']", "== temp")

    # check that one doesn't sneak through - the old fix was temp, the new (106) one is detectable by iinfo
    if ('cumsum' in timebaseexpr) and ('np.nan' in timebaseexpr) and not (
        ('temp' in timebaseexpr) or ('iinfo' in timebaseexpr)):
        raise ValueError(
            'timebaseexpr has a potential issue with maxint and nans\n' +
            timebaseexpr)

    if dic['version'] <= 104 and timebaseexpr.startswith(
            'timebase=0+'):  # bdb103 - needed for 104 too
        timebaseexpr = timebaseexpr.replace('timebase=0+', 'timebase=0.+')

    exec(timebaseexpr)
    retdic = {
        "signal": signal,
        "timebase": timebase,
        "parent_element": dic['parent_element']
    }

    if not dic.allow_pickle:  # should be able to remove this, just a backstop
        pyfusion.utils.warn('resetting allow_pickle')
        dic.allow_pickle = True

    if 'params' in dic: retdic.update({"params": dic['params'].tolist()})
    if (len(signal) > 5e5) and (verbose > 0):
        print('Need to call with verbose>0 to see type of large data files')
    else:
        if 'rawsignal' in dic:
            retdic.update(dict(signaltype=type(dic['rawsignal'][0])))
        if 'rawtimebase' in dic:
            retdic.update(dict(timebasetype=type(dic['rawtimebase'][0])))

    return (retdic)
def extract_features(feature_extractor,
                     dataset,
                     batch_size,
                     loader_params,
                     average=True,
                     num_gpus=1,
                     average_num=None,
                     preemption_protection=False,
                     is_leader=True):

    sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=False)\
            if num_gpus > 1 else None
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         sampler=sampler,
                                         num_workers=loader_params.num_workers,
                                         pin_memory=loader_params.pin_memory,
                                         drop_last=False)

    features = None
    count = 0
    starting_iter = -1
    if preemption_protection and os.path.exists('feature_extraction.tmp.npz'):
        data = np.loadz('feature_extraction.tmp.npz')
        features = torch.Tensor(data['features']).cuda()
        count = data['count']
        starting_iter = data['curr_iter']
    for curr_iter, (inputs, labels) in enumerate(loader):
        if preemption_protection and curr_iter <= starting_iter:
            continue
        inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True)
        curr_features = feature_extractor.extract(inputs)
        if average and average_num is None:
            curr_features = torch.sum(curr_features, dim=0)
            if num_gpus > 1:
                torch.distributed.all_reduce(curr_features)
            features = (
                features + curr_features.detach().cpu()
            ) if features is not None else curr_features.detach().cpu()
        elif average:
            num_features = len(dataset) // average_num
            if num_gpus > 1:
                curr_features = distributed_gather_features(
                    curr_features, batch_size, num_gpus)
            if features is None:
                features = torch.zeros(num_features, curr_features.size(-1))
            if count + curr_features.size(0) > num_features:
                remainder = count + curr_features.size(0) - num_features
                features[count:, :] += curr_features[:num_features -
                                                     count, :].detach().cpu()
                offset = 0
                while remainder > num_features:
                    features += curr_features[offset + num_features -
                                              count:2 * num_features - count +
                                              offset].detach().cpu()
                    offset += num_features
                    remainder -= num_features
                features[:remainder, :] += curr_features[
                    offset + num_features - count:, :].detach().cpu()
                count = remainder
            else:
                features[
                    count:count +
                    curr_features.size(0), :] += curr_features.detach().cpu()
                count += curr_features.size(0)
                count = count % num_features

        else:
            if num_gpus > 1:
                curr_features = distributed_gather_features(
                    curr_features, batch_size, num_gpus)

            if features is None:
                features = torch.zeros(len(dataset), curr_features.size(-1))
            features[count:count +
                     curr_features.size(0), :] = curr_features.detach().cpu()
            count += curr_features.size(0)

        if preemption_protection and curr_iter % 5000 == 0 and is_leader:
            np.savez('feature_extraction.tmp.npz',
                     features=features.detach().cpu().numpy(),
                     count=count,
                     curr_iter=curr_iter)
    if average and average_num is None:
        features /= len(dataset)
    elif average:
        features /= average_num

    return features.detach().cpu().numpy()
Пример #14
0
    parser = argparse.ArgumentParser(
        description='train an event2vec model on events in context')

    parser.add_argument('eventfile', help='the file containing the events')
    parser.add_argument('--dimension', type=int, default=100)
    parser.add_argument('--relations', type=int, default=4)
    parser.add_argument('--window', type=int, default=2)
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--embeddings')

    args = parser.parse_args()

    ec = EventContextData(args.eventfile)

    if args.embeddings:
        embeddings = np.loadz(args.embeddings)
        size = embeddings.shape[0]
    else:
        embeddings = None
        size = args.size

    er = EventContextRNN(args.dimension,
                         size,
                         args.relations,
                         embeddings=embeddings)

    eventDistribution = ec.eventProbs

    for i in range(args.num_epochs):
        print('epoch {}'.format(i))
        g = ec.iterEventContext(args.window, shuffle=True)
Пример #15
0
    gen_sen = []
    for x in gen_sentences:
        gen_sen.append(x[0])

    #gen_sen.append(numpy.zeros(30).tolist())

    print len(gen_sen)
    print len(orig_sen)

    gen_s = pad_list(gen_sen)
    print gen_s.shape

    #5000 gen
    #2144 orig

    orig_s = np.loadz('orig_s.npz')
    gen_s = np.loadz('gen_s.npz')

    print "compiling"

    d = discriminator(number_words=30000,
                      num_hidden=400,
                      seq_length=seq_length,
                      mb_size=64)

    print "training started"

    for i in range(0, 20):
        u = random.uniform(0, 1)
        indexGen = random.randint(0, 200 / 64)
        indexOrig = random.randint(0, 200 / 64)
Пример #16
0
def main():
    #import sklearn.linear_model as lm

    datasets = get_dataset.all_names()
    indx = textmenu(datasets)
    if indx == None:
        return
    dataset = datasets[indx]
    x_tra, y_tra, x_val, y_val = get_dataset.get_dataset(dataset)

    d = np.load('/data/data/mnist.npz')
    X = d['X']
    Y = d['Y'].ravel()
    Xtest = d['Xtest']
    Ytest = d['Ytest'].ravel()
    print 'data loaded'

    filter_size = [5, 5, 1, 200]
    stride = 2

    # sample patches to determine median of patch distance.

    # sample filters (W, B) to create RBF

    # apply filters

    # sample patches to determine patch mean.

    # PCA patches.

    #PCA first
    n_pca_dim = 50
    X_m = np.mean(X, axis=0)  # mean
    X_zm = X - X_m  # X with zero mean
    X_cov = X_zm.T.dot(X_zm)  # X covariance
    eigval, eigvec = la.eig(X_cov)
    eigvec = eigvec[:, :n_pca_dim]  # choose the dominanting 50 dimensions
    Xp = X.dot(eigvec)  # projections of X,Xtest to these 50 dim.
    Xtestp = Xtest.dot(eigvec)

    # Compute kernel step size s (median of dist among points)
    n_trials = int(Xp.shape[0]**1.5)
    I = random.randint(0, Xp.shape[0], n_trials)
    deltI = random.randint(1, Xp.shape[0], n_trials)
    J = (I + deltI) % X.shape[0]
    dists = sorted(
        map(lambda i: la.norm(Xp[I[i], :] - Xp[J[i], :]), range(n_trials)))
    s = dists[n_trials / 2]

    # generate rbf params
    n_rbf = 4000
    W = random.randn(Xp.shape[1], n_rbf) / s / np.sqrt(2)
    B = random.uniform(0, 2 * np.pi, n_rbf)

    #Xf = np.cos(Xp.dot(W)+ B)
    #Xtestf = np.cos(Xtestp.dot(W)+B)

    np.savez('mnist_pca_rbf_param.npz', P=eigvec, W=W, B=B)
    np.savez('hw2_mnist.npz',
             X=X,
             Y=Y,
             Xtest=Xtest,
             Ytest=Ytest,
             P=eigvec,
             W=W,
             B=B)
    d2 = np.loadz('hw2_mnist.npz')
    scipy.io.savemat('hw2_mnist.mat', d2)