Пример #1
0
def read(input_fname, gold_fname, output_fname=None, with_confidence=False,
         max_sent_size=_max_sent_size):
    inp = read_system_input(input_fname, max_sent_size=max_sent_size)
    gold = read_gold_standard(gold_fname)
    if output_fname:
        out = read_system_output(output_fname, with_confidence)
        return merge_arrays((inp, gold, out), flatten=True)
    else:
        return merge_arrays((inp, gold), flatten=True)
Пример #2
0
    def test_solo_w_flatten(self):
        # Test merge_arrays on a single array w & w/o flattening
        w = self.data[0]
        test = merge_arrays(w, flatten=False)
        assert_equal(test, w)

        test = merge_arrays(w, flatten=True)
        control = np.array([(1, 2, 3.0), (4, 5, 6.0)], dtype=[("a", int), ("ba", float), ("bb", int)])
        assert_equal(test, control)
Пример #3
0
 def test_wmasked_arrays(self):
     # Test merge_arrays masked arrays
     (_, x, _, _) = self.data
     mx = ma.array([1, 2, 3], mask=[1, 0, 0])
     test = merge_arrays((x, mx), usemask=True)
     control = ma.array([(1, 1), (2, 2), (-1, 3)], mask=[(0, 1), (0, 0), (1, 0)], dtype=[("f0", int), ("f1", int)])
     assert_equal(test, control)
     test = merge_arrays((x, mx), usemask=True, asrecarray=True)
     assert_equal(test, control)
     assert_(isinstance(test, MaskedRecords))
Пример #4
0
    def test_flatten(self):
        # Test standard & flexible
        (_, x, _, z) = self.data
        test = merge_arrays((x, z), flatten=True)
        control = np.array([(1, "A", 1.0), (2, "B", 2.0)], dtype=[("f0", int), ("A", "|S3"), ("B", float)])
        assert_equal(test, control)

        test = merge_arrays((x, z), flatten=False)
        control = np.array(
            [(1, ("A", 1.0)), (2, ("B", 2.0))], dtype=[("f0", int), ("f1", [("A", "|S3"), ("B", float)])]
        )
        assert_equal(test, control)
Пример #5
0
    def test_flatten(self):
        # Test standard & flexible
        (_, x, _, z) = self.data
        test = merge_arrays((x, z), flatten=True)
        control = np.array([(1, 'A', 1.), (2, 'B', 2.)],
                           dtype=[('f0', int), ('A', '|S3'), ('B', float)])
        assert_equal(test, control)

        test = merge_arrays((x, z), flatten=False)
        control = np.array([(1, ('A', 1.)), (2, ('B', 2.))],
                           dtype=[('f0', int),
                                  ('f1', [('A', '|S3'), ('B', float)])])
        assert_equal(test, control)
Пример #6
0
    def test_flatten_wflexible(self):
        # Test flatten standard & nested
        (w, x, _, _) = self.data
        test = merge_arrays((x, w), flatten=True)
        control = np.array(
            [(1, 1, 2, 3.0), (2, 4, 5, 6.0)], dtype=[("f0", int), ("a", int), ("ba", float), ("bb", int)]
        )
        assert_equal(test, control)

        test = merge_arrays((x, w), flatten=False)
        controldtype = [("f0", int), ("f1", [("a", int), ("b", [("ba", float), ("bb", int)])])]
        control = np.array([(1.0, (1, (2, 3.0))), (2, (4, (5, 6.0)))], dtype=controldtype)
        assert_equal(test, control)
Пример #7
0
    def test_w_shorter_flex(self):
        # Test merge_arrays w/ a shorter flexndarray.
        z = self.data[-1]

        # Fixme, this test looks incomplete and broken
        # test = merge_arrays((z, np.array([10, 20, 30]).view([('C', int)])))
        # control = np.array([('A', 1., 10), ('B', 2., 20), ('-1', -1, 20)],
        #                   dtype=[('A', '|S3'), ('B', float), ('C', int)])
        # assert_equal(test, control)

        # Hack to avoid pyflakes warnings about unused variables
        merge_arrays((z, np.array([10, 20, 30]).view([("C", int)])))
        np.array([("A", 1.0, 10), ("B", 2.0, 20), ("-1", -1, 20)], dtype=[("A", "|S3"), ("B", float), ("C", int)])
Пример #8
0
    def test_standard(self):
        # Test standard & standard
        # Test merge arrays
        (_, x, y, _) = self.data
        test = merge_arrays((x, y), usemask=False)
        control = np.array([(1, 10), (2, 20), (-1, 30)], dtype=[("f0", int), ("f1", int)])
        assert_equal(test, control)

        test = merge_arrays((x, y), usemask=True)
        control = ma.array(
            [(1, 10), (2, 20), (-1, 30)], mask=[(0, 0), (0, 0), (1, 0)], dtype=[("f0", int), ("f1", int)]
        )
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)
Пример #9
0
    def test_solo(self):
        # Test merge_arrays on a single array.
        (_, x, _, z) = self.data

        test = merge_arrays(x)
        control = np.array([(1,), (2,)], dtype=[('f0', int)])
        assert_equal(test, control)
        test = merge_arrays((x,))
        assert_equal(test, control)

        test = merge_arrays(z, flatten=False)
        assert_equal(test, z)
        test = merge_arrays(z, flatten=True)
        assert_equal(test, z)
Пример #10
0
    def test_flatten(self):
        # Test standard & flexible
        (_, x, _, z) = self.data
        test = merge_arrays((x, z), flatten=True)
        control = np.array(
            [(1, "A", 1.0), (2, "B", 2.0)],
            dtype=[("f0", int), ("A", "|S3"), ("B", float)],
        )
        assert_equal(test, control)

        test = merge_arrays((x, z), flatten=False)
        control = np.array(
            [(1, ("A", 1.0)), (2, ("B", 2.0))],
            dtype=[("f0", int), ("f1", [("A", "|S3"), ("B", float)])],
        )
        assert_equal(test, control)
Пример #11
0
    def computeFunction(self, data):
        # If we haven't specified a domain
        if self._domain == None or self._range == None:
            # We look for a domain in our parents
            up = self._parent
            while up != None:
                if isinstance(up, FilterModule):
                    if up._domain != None and up._range != None:
                        self._domain = up._domain
                        self._range = up._range
                up = up._parent

        if self._domain != None and self._range != None:
            if self._range == "Custom":
                try:
                    #print "custom = " + self._customFunction
                    exec("custom = " + self._customFunction)
                except:
                    custom = np.zeros(data.shape[0], dtype=[('Custom', 'f4')])
                func = merge_arrays((data[self._domain], custom),
                                    asrecarray=True,
                                    flatten=True).copy().view(HDFunction)
                func.normalize(norm=self._norm)
                #print func.dtype.names,func.dtype.names[:-1] + ('Custom',)
                func.dtype.names = func.dtype.names[:-1] + ('Custom', )
                return func
            else:
                return data.function(self._domain + [self._range],
                                     norm=self._norm)
        else:
            return None
Пример #12
0
 def readHSTsample(self):
     """
     # read the HST galaxy training sample
     """
     if os.path.isfile(self.finName):
         catfinal = fitsio.read(self.finName)
     else:
         cosmos_cat = galsim.COSMOSCatalog(self.catName,
                                           dir=self.directory)
         # used index
         index_use = cosmos_cat.orig_index
         # used catalog
         paracat = cosmos_cat.param_cat[index_use]
         # parametric catalog
         oricat = fitsio.read(
             cosmos_cat.real_cat.getFileName())[index_use]
         ra = oricat['RA']
         dec = oricat['DEC']
         indexNew = np.arange(len(ra), dtype=int)
         __tmp = np.stack([ra, dec, indexNew]).T
         radec = np.array([tuple(__t) for __t in __tmp],
                          dtype=[('ra', '>f8'), ('dec', '>f8'),
                                 ('index', 'i8')])
         catfinal = rfn.merge_arrays([paracat, radec],
                                     flatten=True,
                                     usemask=False)
         fitsio.write(self.finName, catfinal)
     self.catused = catfinal
     return
Пример #13
0
    def write_tree(self):
        array = []

        all_branches = default_branches + ["label", "train_id"]
        for aux in all_branches:
            print aux
            a = self.append_arrays(aux)
            if aux == "label" or aux == "train_id":
                tree_name = aux
            elif "evt_weight" in aux:
                tree_name = "weight"
            else:
                tree_name = aux[:-1]
            a_ = self.create_structured_array(a, tree_name)
            array.append(a_)

        for mva in self.mva_helpers.keys():
            a = [
                y for x in [
                    self.mva_helpers[mva]["prediction"]["train"],
                    self.mva_helpers[mva]["prediction"]["test"],
                    self.mva_helpers[mva]["prediction"]["data"]
                ] for y in x
            ]
            a_ = self.create_structured_array(a, mva)
            array.append(a_)

        merged_array = rfn.merge_arrays(array, flatten=True, usemask=False)

        self.output_root = self.output + ".root"
        os.system("rm %s" % self.output_root)
        root_numpy.array2root(merged_array, self.output_root, treename="t")
Пример #14
0
  def chromosome_index(self):
    snps     = self.snps
    n        = len(snps)
    index    = np.arange(n).astype([('index',int)])
    chroms   = snps['chromosome'].astype('S10')
    chromset = set(chroms)
    chroms   = chroms.astype([('chromosome','S10')])
    locs     = snps['location'].astype(int).astype([('location',int)])
    snps     = rfn.merge_arrays([chroms,locs,index])

    snps.sort()

    index = {}
    for name in chromset:
      mask        = snps['chromosome']==name
      indices     = snps['index'][mask]
      pos         = snps['location'][mask]

      if name.startswith('chr'):
        name = name[3:]
      if name.upper()=='MT':
        name = 'M'

      index[name] = pos,indices

    return index
Пример #15
0
 def load_as_recarr(self, filename, fields=None, formatmarker='# format:'):
     """Warning: if there's missing value, it will be filled with default value.
         See: https://docs.scipy.org/doc/numpy/user/basics.rec.html
         numpy.lib.recfunctions.merge_arrays()
             -1 for integers
             -1.0 for floating point numbers
             '-' for characters
             '-1' for strings
             True for boolean values
     """
     cols = self.load(filename, fields=fields, formatmarker=formatmarker)
     _fields, cols_data = cols.keys(), cols.values(
     )  # None fields is updated during self.load
     cols_type = [
         self.name2fmter[field_name]._type_ for field_name in _fields
     ]
     #for name,x,t in zip(fields, cols_data, cols_type):
     #    print '***', name,x,t
     cols_nparr = [
         np.array(x, dtype=t)
         for name, x, t in zip(_fields, cols_data, cols_type)
     ]
     unnamed_recarr = rfn.merge_arrays(cols_nparr,
                                       flatten=True,
                                       usemask=False).view(np.recarray)
     return rfn.rename_fields(
         unnamed_recarr, dict(zip(unnamed_recarr.dtype.names, _fields)))
Пример #16
0
    def __label_ner(self, ners_offset, ners_label, tokens_start, tokens_end):

        # add sublabel to ner labels. If ner has multiple token the first token label starts with I-, the next tokens starts with B-. One token ner's label always starts with I-

        # ners info: label, start and end chars positions.
        ners_lbl = []
        ners_st_id = []
        ners_end_id = []

        for offset, label in zip(ners_offset, ners_label):
            # get ner start/stop chars ids (from xml)
            pos_ = [p.split("-") for p in offset.split(";")]

            # if the ner has multiple non-consecutive tokens, then len(pos_)>1; i.e. i>0 (in the below loop)
            for i, pos in enumerate(pos_):
                # i>0; multiple non-consecutive tokens ner
                sublabels = ["I-", "I-"] if i > 0 else ["B-", "I-"]

                # We have start/stop indices for all tokens in the sentence. Now we want to get ner start/stop indices from this all tokens indices. (relate tokens with ner)
                # Find entity's start/stop chars ids (from xml) in tokens start/stop ids lists. i.e. search "pos" in  "tokens_start" and "tokens_end" lists
                # if the ner consists of multiple (consecutive) tokens, both of the start and the stop index will have the same location in "tokens_start" and "tokens_end" lists.
                # accounts for start sequence // add 9 to ids
                ner_st_id = tokens_start.index(int(pos[0]) + 9)
                # this piece of code handel the dataset issue discussed in README.md: section 1.3.2. Plural Entity
                if int(pos[1]) + 1 + 9 in tokens_end:
                    ner_end_id = tokens_end.index(int(pos[1]) + 1 + 9)
                elif int(pos[1]) + 1 + 1 + 9 in tokens_end:
                    ner_end_id = tokens_end.index(int(pos[1]) + 1 + 1 + 9)

                # multiple (non-consecutive or consecutive) tokens ner, sublabel depends on the value of i
                if ner_st_id != ner_end_id:
                    labels = [label] * (ner_end_id - ner_st_id + 1)
                    labels = [sublabels[0] + labels[0]] + \
                        [sublabels[1] + lb for lb in labels[1:]]

                # single token ner; non-consecutive ner
                elif i > 0:
                    labels = ["I-" + label]

                # single token ner
                else:
                    labels = ["B-" + label]

                # store and return ner info
                ners_lbl.extend(labels)
                ners_st_id.extend(tokens_start[ner_st_id:ner_end_id + 1])
                ners_end_id.extend(tokens_end[ner_st_id:ner_end_id + 1])

        ner_data = rfn.merge_arrays((ners_st_id, ners_end_id, ners_lbl))
        # token could have multiple tag, see README.MD.
        ner_data = np.unique(ner_data, axis=0)

        ner_start = ner_data[ner_data.dtype.names[0]]
        ner_end = ner_data[ner_data.dtype.names[1]]
        ner_labels = ner_data[ner_data.dtype.names[2]]
        #  check that there are no overllaping in start/stop ids
        # temp = np.unique(np.column_stack((ner_start, ner_end)).flatten())
        # assert np.all(temp[:-1] <= temp[1:])

        return ner_start, ner_end, ner_labels
Пример #17
0
def write_html(r, render_to='table.html'):
    types = numpy.zeros(len(r), dtype=[
        ('Type', 'S60'),
    ])

    for i, row in enumerate(r[type_fields]):
        for j, k in enumerate(row):
            if k == True:
                types[i][0] += type_fields[j] + ' | '

    r = rfn.merge_arrays([r, types], flatten=True)[used_columns]

    x = PrettyTable(r.dtype.names)
    for row in r:

        x.add_row(row)

    with open('index.html', 'r') as template:
        with open(render_to, 'w') as output:
            s = template.read()
            s = s.replace(
                "{{ table }}",
                x.get_html_string(
                    attributes={"class": "table table-hover table-striped"}))

            output.write(s)
Пример #18
0
    def test_w_shorter_flex(self):
        # Test merge_arrays w/ a shorter flexndarray.
        z = self.data[-1]

        # Fixme, this test looks incomplete and broken
        # test = merge_arrays((z, np.array([10, 20, 30]).view([('C', int)])))
        # control = np.array([('A', 1., 10), ('B', 2., 20), ('-1', -1, 20)],
        #                   dtype=[('A', '|S3'), ('B', float), ('C', int)])
        # assert_equal(test, control)

        # Hack to avoid pyflakes warnings about unused variables
        merge_arrays((z, np.array([10, 20, 30]).view([("C", int)])))
        np.array(
            [("A", 1.0, 10), ("B", 2.0, 20), ("-1", -1, 20)],
            dtype=[("A", "|S3"), ("B", float), ("C", int)],
        )
Пример #19
0
def findMovingAverage(outputCollection, collectionName, column, windowSize):
    """Find the average of column based on a window size
    
    Parameters
    ----------
    outputCollection : str
        The collection name we store the final result as
    collectionName : str
        The collection name we use to take average
    column : str
        The column name we want the average of
    windowSize : int
        The number of rows we average at a moment
    """
    wholeColumn = allCollections[collectionName][column]
    movingAverage = []
    sumTillHere = 0
    for rowNum in range(len(wholeColumn)):
        if rowNum < windowSize:
            sumTillHere += wholeColumn[rowNum]
            movingAverage.append(sumTillHere / (rowNum + 1))
        else:
            sumTillHere -= wholeColumn[rowNum - windowSize]
            sumTillHere += wholeColumn[rowNum]
            movingAverage.append(sumTillHere / windowSize)

    movAvgTable = np.array(movingAverage,
                           dtype=[('movavg(' + column + ')', 'float_')])
    allCollections[outputCollection] = rfn.merge_arrays(
        (allCollections[collectionName], movAvgTable),
        flatten=True,
        usemask=False)
    printTable(allCollections[outputCollection])
    outputToFile(outputCollection, 'allOperations', 'a+')
Пример #20
0
    def produceBinWeighter(self, filenames):
        weighter = self.make_empty_weighter()
        branches = [self.weightbranchX, self.weightbranchY]
        showprog = ShowProgress(5, len(filenames))
        counter = 0
        if self.remove or self.weight:
            for fname in filenames:
                # Read truths
                truth_array = self.readTreeFromRootToTuple(
                    fname, branches=self.truthclasses)
                # Use defined reduced truths
                reduced_array = self.reduceTruth(truth_array)
                dtype = zip(self.reducedtruthclasses,
                            [col.dtype for col in reduced_array.transpose()])
                reduced_array = reduced_array.ravel().view(dtype)
                # Read variables to weight along
                weight_along_array = self.readTreeFromRootToTuple(
                    fname, branches=branches)
                # Merge two arrays
                import numpy.lib.recfunctions as rfn
                nparray = rfn.merge_arrays([reduced_array, weight_along_array],
                                           flatten=True,
                                           usemask=False)

                weighter.addDistributions(nparray,
                                          referenceclass=self.referenceclass)
                del nparray
                del truth_array
                del weight_along_array
                del reduced_array
                showprog.show(counter)
                counter = counter + 1
            weighter.createRemoveProbabilitiesAndWeights(self.referenceclass)
        return weighter
Пример #21
0
    def restrict(self, restriction):
        restriction = np.asarray(restriction, bool)
        base = rsgame.empty_copy(self).restrict(restriction)

        size_mask = restriction.repeat(self._sizes)
        sizes = self._sizes[restriction]
        profiles = self._profiles[size_mask]
        lengths = self._lengths[restriction]
        zeros = (profiles[:, ~restriction] /
                 lengths[:, ~restriction].repeat(sizes, 0))
        removed = np.exp(-np.einsum('ij,ij->i', zeros, zeros) / 2)  # pylint: disable=invalid-unary-operand-type
        uprofs, inds = np.unique(recfunctions.merge_arrays([
            np.arange(restriction.sum()).repeat(sizes).view([('s', int)]),
            utils.axis_to_elem(profiles[:, restriction])
        ],
                                                           flatten=True),
                                 return_inverse=True)
        new_alpha = np.bincount(inds, removed * self._alpha[size_mask])
        new_sizes = np.diff(
            np.concatenate([[-1],
                            np.flatnonzero(np.diff(uprofs['s'])),
                            [new_alpha.size - 1]]))

        return _RbfGpGame(base.role_names, base.strat_names,
                          base.num_role_players, self._offset[restriction],
                          self._coefs[restriction], lengths[:, restriction],
                          new_sizes, uprofs['axis'], new_alpha)
Пример #22
0
    def update_phase(self):
        """Perform all the updates on the modules.

        The update phase occurs N times, based on the user configuration for
        the experiment. This function loops over the instruments and
        post-processing modules (based on their priority) and calls their
        update method.

        One file will be written for each update.
        """
        for update_number in range(self.config['updates']):
            current_data = np.array([(np.datetime64('now'), )],
                                    dtype=[('time', 'datetime64[us]')])

            for module in self.modules:
                class_ = module.__class__
                print("...{}: updating {}...".format(
                    update_number, module.__class__.__name__))
                if issubclass(class_, Instrument):
                    try:
                        module_data = module.update(update_number)
                    except RuntimeError:
                        self.cleanup_phase(abort=True)
                        raise
                    if module_data is not None:
                        current_data = rfn.merge_arrays(
                            [current_data, module_data], flatten=True)
                elif issubclass(class_, PostProcessing):
                    current_data = module.update(update_number,
                                                 current_data.copy())
            filename = '{}/scan_data_{:03d}.npy'.format(
                self.config['directory'], update_number)
            with open(filename, 'xb') as data_file:
                np.save(data_file, current_data.copy(), allow_pickle=False)
Пример #23
0
    def test_flatten_wflexible(self):
        # Test flatten standard & nested
        (w, x, _, _) = self.data
        test = merge_arrays((x, w), flatten=True)
        control = np.array([(1, 1, 2, 3.0), (2, 4, 5, 6.0)],
                           dtype=[('f0', int), ('a', int), ('ba', float),
                                  ('bb', int)])
        assert_equal(test, control)

        test = merge_arrays((x, w), flatten=False)
        controldtype = [('f0', int),
                        ('f1', [('a', int), ('b', [('ba', float),
                                                   ('bb', int)])])]
        control = np.array([(1., (1, (2, 3.0))), (2, (4, (5, 6.0)))],
                           dtype=controldtype)
        assert_equal(test, control)
Пример #24
0
 def run(self, outputs_requested, **kwargs):
     arrays = [kwargs[input_key].to_np() for input_key in 
               self.__input_keys]
     # http://stackoverflow.com/questions/15815854/how-to-add-column-to-numpy-array
     out = UObject(UObjectPhase.Write)
     out.from_np(merge_arrays(arrays, flatten=True))
     return {'output': out}
Пример #25
0
    def restrict(self, restriction):
        restriction = np.asarray(restriction, bool)
        base = rsgame.empty_copy(self).restrict(restriction)

        size_mask = restriction.repeat(self._sizes)
        sizes = self._sizes[restriction]
        profiles = self._profiles[size_mask]
        lengths = self._lengths[restriction]
        zeros = (profiles[:, ~restriction] /
                 lengths[:, ~restriction].repeat(sizes, 0))
        removed = np.exp(-np.einsum('ij,ij->i', zeros, zeros) / 2) # pylint: disable=invalid-unary-operand-type
        uprofs, inds = np.unique(
            recfunctions.merge_arrays([
                np.arange(restriction.sum()).repeat(sizes).view([('s', int)]),
                utils.axis_to_elem(profiles[:, restriction])], flatten=True),
            return_inverse=True)
        new_alpha = np.bincount(inds, removed * self._alpha[size_mask])
        new_sizes = np.diff(np.concatenate([
            [-1], np.flatnonzero(np.diff(uprofs['s'])),
            [new_alpha.size - 1]]))

        return _RbfGpGame(
            base.role_names, base.strat_names, base.num_role_players,
            self._offset[restriction], self._coefs[restriction],
            lengths[:, restriction], new_sizes, uprofs['axis'], new_alpha)
Пример #26
0
 def parseLogs(self, infile):
   pprint("Parsing file");
   # data is reconstructed as array of tuples
   self.data = np.genfromtxt(infile, delimiter=" ", comments="#",
     #dtype="float,float,int,int,int,int,int,int,int,int",
     #names=["ts", "ots", "cpu", "fid", "st0", "st1", "len", "count", "tot", "dlt"]
     dtype="float,float,int,int,int,int,int,int,int,int,int,int,int",
     names=["ts", "ots", "cpu", "fid", "st0", "st1", "len", "count", "tot", "dlt", "wrdlt", "wbps", "rbps"]
   )
   # iterate; create and append new columns
   if 0: # (note, takes a while, so avoiding)
     pprint("Got %d entries; processing" % (len(self.data)) )
     i = 0; rdtot=0; wrtot=0; rbps = 0; wbps = 0; wt=[]; #wd = []; wa =[]; ra=[];
     for ix in self.data:
       if ix['fid'] == 1:
         wrtot = ix['tot']
         wbps=wrtot/ix['ts'];
       if ix['fid'] == 2:
         rdtot = ix['tot']
         rbps = rdtot/ix['ts']
       #wd.append( wrtot-rdtot)
       #wa.append( wbps );
       #ra.append( rbps );
       wt.append ( (wrtot-rdtot, wbps, rbps) )
     #my_appended_array = append_fields( self.data, names=['wrdlt','wbps','rbps'], dtypes="int,int,int", data=[wd, wa, ra] ) # problem; "expected a readable buffer"
     self.data = merge_arrays([self.data, np.array(wt, dtype=[('wrdlt', int), ('wbps', int), ('rbps', int)]) ], flatten=True) # cast to int works from here
   #end if 0
   pprint("Done.") #(self.data)
Пример #27
0
 def test_singlerecord(self):
     (_, x, y, z) = self.data
     test = merge_arrays((x[0], y[0], z[0]), usemask=False)
     control = np.array([(1, 10, ('A', 1))],
                        dtype=[('f0', int), ('f1', int),
                               ('f2', [('A', '|S3'), ('B', float)])])
     assert_equal(test, control)
Пример #28
0
 def test_w_singlefield(self):
     # Test single field
     test = merge_arrays((np.array([1, 2]).view([("a", int)]), np.array([10.0, 20.0, 30.0])))
     control = ma.array(
         [(1, 10.0), (2, 20.0), (-1, 30.0)], mask=[(0, 0), (0, 0), (1, 0)], dtype=[("a", int), ("f1", float)]
     )
     assert_equal(test, control)
Пример #29
0
def create_pointcloud(data, mode=None, groups=None, batch_columns=None):
    if mode is None:
        mode = Mode.STANDARD
    if groups is None:
        groups = {}
    groups = merge_dicts(DEFAULT_GROUPS, groups)
    if batch_columns is None:
        batch_columns = []

    columns = []

    def add(name, data):
        if name == 'position':
            columns.append(PositionColumn(name, data, mode))
        elif name.lower() in FeatureColumn.TYPES:
            columns.append(FeatureColumn(name.lower(), data))
        else:
            columns.append(BatchColumn(name, data))

    # Remap columns based off their groupings
    grouped = set()
    for name, selection in groups.iteritems():
        add(name, data[selection])
        grouped.update(
            selection if isinstance(selection, list) else [selection])
    for column in (set(data.dtype.names) - grouped):
        add(column, data[column])

    # Find all mapped values and replace it with their mapping
    if len(batch_columns) > 0:
        r_columns, r_names = [], []
        for column in batch_columns:
            if column not in columns:
                raise Exception('Column %s does not exist' % column)
            r_column = columns[columns.index(column)]
            r_columns.append(r_column)
            r_names.extend(r_column.names())

        merged_data = merge_arrays([x.data() for x in r_columns],
                                   flatten=True,
                                   usemask=False)
        merged_data.dtype.names = r_names
        batch_groups, batch_ids = np.unique(merged_data,
                                            axis=0,
                                            return_inverse=True)
        batch_ids = batch_ids.astype(np.uint16)

        idx_offset = 0
        for column in r_columns:
            column.is_instanced = True
            d_names = column.names()
            selector = d_names[0] if column.count() == 1 else d_names
            column._data = batch_groups[selector]

        columns.append(
            FeatureColumn('batch_id', batch_ids,
                          {'BATCH_LENGTH': len(batch_groups)}))

    return PointcloudTile(columns)
Пример #30
0
 def test_w_singlefield(self):
     # Test single field
     test = merge_arrays(
         (np.array([1, 2]).view([('a', int)]), np.array([10., 20., 30.])), )
     control = ma.array([(1, 10.), (2, 20.), (-1, 30.)],
                        mask=[(0, 0), (0, 0), (1, 0)],
                        dtype=[('a', int), ('f1', float)])
     assert_equal(test, control)
Пример #31
0
 def __init__(self, size=default_size, freq="1m"):
     super(ArrayManager, self).__init__(size=size)
     dt_int = np.array([(0, )] * size,
                       dtype=np.dtype([('datetimeint', np.int64)]))
     self.array = rfn.merge_arrays([dt_int, self.array],
                                   flatten=True,
                                   usemask=False)
     self._freq = freq
Пример #32
0
def fix_hj_fracgood_colmns(objects):
    nrows = objects.size
    dtypes = np.dtype([('FRACGOOD', '>f4')])
    dummy = np.empty(nrows, dtype=dtypes)
    dummy[:] = 1.0
    obj = rfn.merge_arrays([objects,dummy], flatten=True, usemask=False)
    #
    return obj
    def test_standard(self):
        # Test standard & standard
        # Test merge arrays
        (_, x, y, _) = self.data
        test = merge_arrays((x, y), usemask=False)
        control = np.array([(1, 10), (2, 20), (-1, 30)],
                           dtype=[("f0", int), ("f1", int)])
        assert_equal(test, control)

        test = merge_arrays((x, y), usemask=True)
        control = ma.array(
            [(1, 10), (2, 20), (-1, 30)],
            mask=[(0, 0), (0, 0), (1, 0)],
            dtype=[("f0", int), ("f1", int)],
        )
        assert_equal(test, control)
        assert_equal(test.mask, control.mask)
Пример #34
0
 def test_w_singlefield(self):
     # Test single field
     test = merge_arrays((np.array([1, 2]).view([('a', int)]),
                          np.array([10., 20., 30.])),)
     control = ma.array([(1, 10.), (2, 20.), (-1, 30.)],
                        mask=[(0, 0), (0, 0), (1, 0)],
                        dtype=[('a', int), ('f1', float)])
     assert_equal(test, control)
Пример #35
0
 def test_singlerecord(self):
     (_, x, y, z) = self.data
     test = merge_arrays((x[0], y[0], z[0]), usemask=False)
     control = np.array([(1, 10, ('A', 1))],
                        dtype=[('f0', int),
                               ('f1', int),
                               ('f2', [('A', '|S3'), ('B', float)])])
     assert_equal(test, control)
 def test_singlerecord(self):
     (_, x, y, z) = self.data
     test = merge_arrays((x[0], y[0], z[0]), usemask=False)
     control = np.array(
         [(1, 10, ("A", 1))],
         dtype=[("f0", int), ("f1", int),
                ("f2", [("A", "|S3"), ("B", float)])],
     )
     assert_equal(test, control)
Пример #37
0
    def finalizeObjects(self, objects):
        objs = numpy.recarray(len(objects),
                              dtype=[('NAME','S24'),
                                     ('TS','f4'),
                                     ('GLON','f4'),
                                     ('GLAT','f4'),
                                     ('RA','f4'),
                                     ('DEC','f4'),
                                     ('MODULUS','f4'),
                                     ('DISTANCE','f4'),
                                     ('RICHNESS','f4'),
                                     ('MASS','f4'),
                                     ('NANNULUS','i4'),
                                     ('NINTERIOR','i4'),
                                     ])
        
        objs['TS'] = self.values[objects['IDX_MAX'],objects['ZIDX_MAX']]
        lon,lat = objects['X_MAX'],objects['Y_MAX']

        coordsys = self.config['coords']['coordsys']
        if coordsys.lower() == 'gal':
            print("GAL coordintes")
            objs['GLON'],objs['GLAT'] = lon,lat
            objs['RA'],objs['DEC'] = gal2cel(lon,lat)
        else:
            print("CEL coordintes")
            objs['RA'],objs['DEC'] = lon,lat
            objs['GLON'],objs['GLAT'] = cel2gal(lon,lat)

        modulus = objects['Z_MAX']
        objs['MODULUS'] = modulus
        objs['DISTANCE'] = mod2dist(modulus)

        nside = healpy.npix2nside(len(self.nannulus))
        pix = ang2pix(nside,lon,lat)

        richness = self.richness[objects['IDX_MAX'],objects['ZIDX_MAX']]
        objs['RICHNESS'] = richness
        objs['MASS'] = richness * self.stellar[pix]

        objs['NANNULUS']  = self.nannulus[pix].astype(int)
        objs['NINTERIOR'] = self.ninterior[pix].astype(int)

        # Default name formatting
        # http://cdsarc.u-strasbg.fr/ftp/pub/iau/
        # http://cds.u-strasbg.fr/vizier/Dic/iau-spec.htx
        fmt = "J%(hour)02i%(hmin)04.1f%(deg)+03i%(dmin)02i"
        for obj,_ra,_dec in zip(objs,objs['RA'],objs['DEC']):
            hms = dec2hms(_ra); dms = dec2dms(_dec)
            params = dict(hour=hms[0],hmin=hms[1]+hms[2]/60.,
                          deg=dms[0],dmin=dms[1]+dms[2]/60.)
            obj['NAME'] = fmt%params

        out = recfuncs.merge_arrays([objs,objects],usemask=False,
                                    asrecarray=True,flatten=True)

        return out
    def test_flatten_wflexible(self):
        # Test flatten standard & nested
        (w, x, _, _) = self.data
        test = merge_arrays((x, w), flatten=True)
        control = np.array(
            [(1, 1, 2, 3.0), (2, 4, 5, 6.0)],
            dtype=[("f0", int), ("a", int), ("ba", float), ("bb", int)],
        )
        assert_equal(test, control)

        test = merge_arrays((x, w), flatten=False)
        controldtype = [
            ("f0", int),
            ("f1", [("a", int), ("b", [("ba", float), ("bb", int)])]),
        ]
        control = np.array([(1.0, (1, (2, 3.0))), (2, (4, (5, 6.0)))],
                           dtype=controldtype)
        assert_equal(test, control)
def get_series(data, series_name):
  desired = data['calibration_%s_record_desired_pose' % series_name]
  desired,_ = transform_from_start(desired)
  d_i = time_cluster(desired)
  desired = desired[numpy.hstack([numpy.diff(d_i)>0,[True]])]
  desired = rfn.merge_arrays([desired,quat_to_rpq(desired)],flatten=True)
  estimated = data['calibration_%s_record_estimated_pose' % series_name]
  estimated,_ = transform_from_start(estimated)
  e_i = time_cluster(estimated)
  return desired, estimated, e_i
Пример #40
0
 def extend(data, const, duplicate):
     if len(duplicate) == 0: return data
     new_vals = tuple([const[f][0] for f in duplicate])
     new_fields = [gen_name(field, data.dtype.names) for field in duplicate]
     dtype = [(name, np.dtype(type(val)))
              for (name, val) in zip(new_fields, new_vals)]
     extension = np.empty(len(data), dtype=dtype)
     extension.fill(new_vals)
     data = merge_arrays((data, extension), usemask=False, flatten=True)
     return data
 def test_w_singlefield(self):
     # Test single field
     test = merge_arrays((np.array([1, 2]).view(
         [("a", int)]), np.array([10.0, 20.0, 30.0])), )
     control = ma.array(
         [(1, 10.0), (2, 20.0), (-1, 30.0)],
         mask=[(0, 0), (0, 0), (1, 0)],
         dtype=[("a", int), ("f1", float)],
     )
     assert_equal(test, control)
Пример #42
0
    def test_sql(self):

        # Make sure we don't accidentally corrupt our test database
        db_path, db_file_name = self._tmp_files.tmp_copy(path_of_data(
            'small.db'))
        db_url = 'sqlite:///{}'.format(db_path)
        
        q_sel_employees = 'CREATE TABLE {tmp_emp} AS SELECT * FROM employees;'
        # We have to be careful about the datetime type in sqlite3. It will
        # forget if we don't keep reminding it, and if it forgets sqlalchemy
        # will be unhappy. Hence, we can't use CREATE TABLE AS if our table
        # has a DATETIME
        q_sel_hours = ('CREATE TABLE {tmp_hrs} '
                       '(id INT, employee_id INT, time DATETIME, '
                       '    event_type TEXT); '
                       'INSERT INTO {tmp_hrs} SELECT * FROM hours;')
        q_join = ('CREATE TABLE {joined} '
                  '(id INT, last_name TEXT, salary REAL, time DATETIME, '
                  '    event_type TEXT); '
                  'INSERT INTO {joined} '
                  'SELECT {tmp_emp}.id, last_name, salary, time, event_type '
                  'FROM {tmp_emp} JOIN {tmp_hrs} ON '
                  '{tmp_emp}.id = {tmp_hrs}.employee_id;')

        p = Pipeline()
        get_emp = p.add(RunSQL(db_url, q_sel_employees, [], ['tmp_emp'], {}))
        get_hrs = p.add(RunSQL(db_url, q_sel_hours, [], ['tmp_hrs'], {}))
        join = p.add(RunSQL(db_url, q_join, ['tmp_emp', 'tmp_hrs'], ['joined'],
                            {}))
        csv_out = p.add(CSVWrite(self._tmp_files('out.csv')))

        get_emp['tmp_emp'] > join['tmp_emp']
        get_hrs['tmp_hrs'] > join['tmp_hrs']
        join['joined'] > csv_out['input']

        self.run_pipeline(p)

        ctrl = csv_read(path_of_data('test_transform_test_sql_ctrl.csv'))
        result = self._tmp_files.csv_read('out.csv')
        # Because Numpy insists on printing times with local offsets, but
        # not every computer has the same offset, we have to force it back
        # into UTC
        for i, dt in enumerate(result['time']):
            # .item() makes a datetime, which we can format correctly later
            # http://stackoverflow.com/questions/25134639/how-to-force-python-print-numpy-datetime64-with-specified-timezone
            result['time'][i] = np.datetime64(dt).item().strftime(
                    '%Y-%m-%dT%H:%M:%S')
        # Then we have to make the string field smaller
        new_cols = []
        for col in result.dtype.names:
            new_cols.append(result[col].astype(ctrl.dtype[col]))
        result = merge_arrays(new_cols, flatten=True) 
        result.dtype.names = ctrl.dtype.names

        self.assertTrue(np.array_equal(result, ctrl))
Пример #43
0
    def test_sql(self):

        # Make sure we don't accidentally corrupt our test database
        db_path, db_file_name = self._tmp_files.tmp_copy(
            path_of_data('small.db'))
        db_url = 'sqlite:///{}'.format(db_path)

        q_sel_employees = 'CREATE TABLE {tmp_emp} AS SELECT * FROM employees;'
        # We have to be careful about the datetime type in sqlite3. It will
        # forget if we don't keep reminding it, and if it forgets sqlalchemy
        # will be unhappy. Hence, we can't use CREATE TABLE AS if our table
        # has a DATETIME
        q_sel_hours = ('CREATE TABLE {tmp_hrs} '
                       '(id INT, employee_id INT, time DATETIME, '
                       '    event_type TEXT); '
                       'INSERT INTO {tmp_hrs} SELECT * FROM hours;')
        q_join = ('CREATE TABLE {joined} '
                  '(id INT, last_name TEXT, salary REAL, time DATETIME, '
                  '    event_type TEXT); '
                  'INSERT INTO {joined} '
                  'SELECT {tmp_emp}.id, last_name, salary, time, event_type '
                  'FROM {tmp_emp} JOIN {tmp_hrs} ON '
                  '{tmp_emp}.id = {tmp_hrs}.employee_id;')

        p = Pipeline()
        get_emp = p.add(RunSQL(db_url, q_sel_employees, [], ['tmp_emp'], {}))
        get_hrs = p.add(RunSQL(db_url, q_sel_hours, [], ['tmp_hrs'], {}))
        join = p.add(
            RunSQL(db_url, q_join, ['tmp_emp', 'tmp_hrs'], ['joined'], {}))
        csv_out = p.add(CSVWrite(self._tmp_files('out.csv')))

        get_emp['tmp_emp'] > join['tmp_emp']
        get_hrs['tmp_hrs'] > join['tmp_hrs']
        join['joined'] > csv_out['input']

        self.run_pipeline(p)

        ctrl = csv_read(path_of_data('test_transform_test_sql_ctrl.csv'))
        result = self._tmp_files.csv_read('out.csv')
        # Because Numpy insists on printing times with local offsets, but
        # not every computer has the same offset, we have to force it back
        # into UTC
        for i, dt in enumerate(result['time']):
            # .item() makes a datetime, which we can format correctly later
            # http://stackoverflow.com/questions/25134639/how-to-force-python-print-numpy-datetime64-with-specified-timezone
            result['time'][i] = np.datetime64(dt).item().strftime(
                '%Y-%m-%dT%H:%M:%S')
        # Then we have to make the string field smaller
        new_cols = []
        for col in result.dtype.names:
            new_cols.append(result[col].astype(ctrl.dtype[col]))
        result = merge_arrays(new_cols, flatten=True)
        result.dtype.names = ctrl.dtype.names

        self.assertTrue(np.array_equal(result, ctrl))
Пример #44
0
    def finalizeObjects(self, objects):
        objs = numpy.recarray(len(objects),
                              dtype=[('NAME','S24'),
                                     ('TS','f4'),
                                     ('GLON','f4'),
                                     ('GLAT','f4'),
                                     ('RA','f4'),
                                     ('DEC','f4'),
                                     ('MODULUS','f4'),
                                     ('DISTANCE','f4'),
                                     ('RICHNESS','f4'),
                                     ('MASS','f4'),
                                     ('NANNULUS','i4'),
                                     ('NINTERIOR','i4'),
                                     ])
        
        objs['TS'] = self.values[objects['IDX_MAX'],objects['ZIDX_MAX']]

        glon,glat = objects['X_MAX'],objects['Y_MAX']
        objs['GLON'],objs['GLAT'] = glon,glat
        
        ra,dec    = gal2cel(glon,glat)
        objs['RA'],objs['DEC'] = ra,dec

        modulus = objects['Z_MAX']
        objs['MODULUS'] = modulus
        objs['DISTANCE'] = mod2dist(modulus)

        #ninterior = ugali.utils.skymap.readSparseHealpixMap(self.roifile,'NINSIDE')
        #nannulus = ugali.utils.skymap.readSparseHealpixMap(self.roifile,'NANNULUS')
        #stellar = ugali.utils.skymap.readSparseHealpixMap(self.roifile,'STELLAR')

        nside = healpy.npix2nside(len(self.nannulus))
        pix = ang2pix(nside,glon,glat)

        richness = self.richness[objects['IDX_MAX'],objects['ZIDX_MAX']]
        objs['RICHNESS'] = richness
        objs['MASS'] = richness * self.stellar[pix]

        objs['NANNULUS']  = self.nannulus[pix].astype(int)
        objs['NINTERIOR'] = self.ninterior[pix].astype(int)

        # Default name formatting
        # http://cdsarc.u-strasbg.fr/ftp/pub/iau/
        # http://cds.u-strasbg.fr/vizier/Dic/iau-spec.htx
        fmt = "J%(hour)02i%(hmin)04.1f%(deg)+03i%(dmin)02i"
        for obj,_ra,_dec in zip(objs,ra,dec):
            hms = dec2hms(_ra); dms = dec2dms(_dec)
            params = dict(hour=hms[0],hmin=hms[1]+hms[2]/60.,
                          deg=dms[0],dmin=dms[1]+dms[2]/60.)
            obj['NAME'] = fmt%params

        out = recfuncs.merge_arrays([objs,objects],usemask=False,asrecarray=True,flatten=True)
        # This is safer than viewing as FITS_rec
        return pyfits.new_table(out).data
Пример #45
0
def list_shank_units(bird, sess, shank, shank_file=None, sorted=False):
    meta_dt = np.dtype([('sess', 'S32', 1), ('shank', np.int, 1), ('is_good', 'b', 1)])
    kwik_file = et.open_kwik(bird, sess) if shank_file is None else et.open_kwik(bird, sess, shank_file)
    group = int(shank)
    all_units = kwf.list_units(kwik_file, group=group, sorted=False)
    n_units = all_units.size
    all_meta = np.recarray(n_units, dtype=meta_dt)
    all_meta['sess'] = sess
    all_meta['shank'] = shank
    all_meta['is_good'] = True
    return rfn.merge_arrays((all_meta, all_units), asrecarray=True, flatten=True)
Пример #46
0
def classify_line(filename, classifier):
    """ Use `classifier` to classify data stored in `filename`

    Args:
      filename (str): filename of stored results
      classifier (sklearn classifier): pre-trained classifier

    """
    z = np.load(filename)
    rec = z['record']

    if rec.shape[0] == 0:
        logger.debug('No records in {f}. Continuing'.format(f=filename))
        return

    # Rescale intercept term
    coef = rec['coef'].copy()  # copy so we don't transform npz coef
    coef[:, 0, :] = (coef[:, 0, :] + coef[:, 1, :] *
                     ((rec['start'] + rec['end']) / 2.0)[:, np.newaxis])

    # Include RMSE for full X matrix
    newdim = (coef.shape[0], coef.shape[1] * coef.shape[2])
    X = np.hstack((coef.reshape(newdim), rec['rmse']))

    # Create output and classify
    classes = classifier.classes_
    classified = np.zeros(rec.shape[0], dtype=[
        ('class', 'u2'),
        ('class_proba', 'float32', classes.size)
    ])
    classified['class'] = classifier.predict(X)
    classified['class_proba'] = classifier.predict_proba(X)

    # Replace with new classification if exists, or add by merging
    if ('class' in rec.dtype.names and 'class_proba' in rec.dtype.names and
            rec['class_proba'].shape[1] == classes.size):
        rec['class'] = classified['class']
        rec['class_proba'] = classified['class_proba']
    else:
        # Drop incompatible classified results if needed
        # e.g., if the number of classes changed
        if 'class' in rec.dtype.names and 'class_proba' in rec.dtype.names:
            rec = nprfn.drop_fields(rec, ['class', 'class_proba'])
        rec = nprfn.merge_arrays((rec, classified), flatten=True)

    # Create dict for re-saving `npz` file (only way to append)
    out = {}
    for k, v in z.iteritems():
        out[k] = v
    out['classes'] = classes
    out['record'] = rec

    np.savez(filename, **out)
Пример #47
0
 def __init__(self,
                 ax,
                 map_fig, 
                 zplot_fig,
                 cmdplot_fig,
                 ccdplot_fig,
                 avplot_fig,
                 cmd_ax,
                 ccd_ax,
                 avhist_ax,
                 zhist_ax,
                 data,
                 point_list = {}):
     self.previous_point = []
     self.start_point = []
     self.end_point = []
     self.line = None    
     if point_list == {}:
         self.point_list = point_list
     else:
         self.point_list = pickle.load(open(point_list,'rb'))
     self.map_fig = map_fig
     self.map_fig.canvas.draw()
     self.map_fig.canvas.set_window_title('Spatial Map')
     self.data = data
     self.zplot_fig = zplot_fig
     self.zplot_fig.canvas.set_window_title('Metallicity Histogram')
     self.avplot_fig = avplot_fig
     self.avplot_fig.canvas.set_window_title('Av Histogram')
     self.cmdplot_fig = cmdplot_fig
     self.cmdplot_fig.canvas.set_window_title('Color-Magnitude Diagram')
     self.ccdplot_fig = ccdplot_fig
     self.ccdplot_fig.canvas.set_window_title('Color-Color Diagram')
     self.cmd_ax = cmd_ax
     self.ccd_ax = ccd_ax
     self.avhist_ax = avhist_ax
     self.zhist_ax = zhist_ax
     self.ax = ax
     self.star_arr = np.zeros((len(self.data),1),dtype=[('REGION_NUM',int)])
     self.star_arr['REGION_NUM'] += 999
     self.region_arr = rfn.merge_arrays([self.data,self.star_arr],flatten = True)
     self.region_arr = np.ma.masked_array(self.region_arr,np.isnan(self.region_arr['Z']))
     
     self.region_counter = 0
     
     self.draw_spatial()
     
     self.color_list = ['red','blue','green','purple','black']
     self.color_num = 0
     self.color_mod = 0
     
     self.region_data = 0
     self.region_check = False
    def __init__(self, *arrays):
        """Constructs DataObject from arbitrary number of ndarrays.

        Each ndarray can have an arbitrary number of fields. Field
        names should all be capitalized and words in multi-word field
        names should be separated by underscores if necessary. ndarrays
        have a 'size' property---their sizes should all be equivalent.

        Args:
            arrays (numpy.ndarray): ndarrays with equivalent sizes.
        """
        self._ndarray = merge_arrays(arrays, flatten=True)
Пример #49
0
    def _build_nodes(cls, matching_problem: MatchingProblem) -> np.ndarray:
        """Method to build the nodes based on a matching problem"""

        courier_ids, route_ids = cls._get_entities_ids(matching_problem)

        route_demands = np.array(-1 * np.ones(route_ids.shape),
                                 dtype=[('demand', '<i8')])
        courier_demands = np.zeros(courier_ids.shape,
                                   dtype=[('demand', '<i8')])

        courier_nodes = rfn.merge_arrays([courier_ids, courier_demands],
                                         flatten=True,
                                         usemask=False)
        route_nodes = rfn.merge_arrays([route_ids, route_demands],
                                       flatten=True,
                                       usemask=False)
        supply_node = np.array([('supply', len(route_ids))],
                               dtype=[('id', '<U100'), ('demand', '<i8')])

        return np.concatenate((courier_nodes, route_nodes, supply_node),
                              axis=0)
Пример #50
0
def classify_line(filename, classifier):
    """ Use `classifier` to classify data stored in `filename`

    Args:
      filename (str): filename of stored results
      classifier (sklearn classifier): pre-trained classifier

    """
    z = np.load(filename)
    rec = z['record']

    if rec.shape[0] == 0:
        logger.debug('No records in {f}. Continuing'.format(f=filename))
        return

    # Rescale intercept term
    coef = rec['coef'].copy()  # copy so we don't transform npz coef
    coef[:, 0, :] = (coef[:, 0, :] + coef[:, 1, :] *
                     ((rec['start'] + rec['end']) / 2.0)[:, np.newaxis])

    # Include RMSE for full X matrix
    newdim = (coef.shape[0], coef.shape[1] * coef.shape[2])
    X = np.hstack((coef.reshape(newdim), rec['rmse']))

    # Create output and classify
    classes = classifier.classes_
    classified = np.zeros(rec.shape[0], dtype=[
        ('class', 'u2'),
        ('class_proba', 'float32', classes.size)
    ])
    classified['class'] = classifier.predict(X)
    classified['class_proba'] = classifier.predict_proba(X)

    # Replace with new classification if exists, or add by merging
    if ('class' in rec.dtype.names and 'class_proba' in rec.dtype.names and
            rec['class_proba'].shape[1] == classes.size):
        rec['class'] = classified['class']
        rec['class_proba'] = classified['class_proba']
    else:
        # Drop incompatible classified results if needed
        # e.g., if the number of classes changed
        if 'class' in rec.dtype.names and 'class_proba' in rec.dtype.names:
            rec = nprfn.drop_fields(rec, ['class', 'class_proba'])
        rec = nprfn.merge_arrays((rec, classified), flatten=True)

    # Create dict for re-saving `npz` file (only way to append)
    out = {}
    for k, v in z.iteritems():
        out[k] = v
    out['classes'] = classes
    out['record'] = rec

    np.savez(filename, **out)
Пример #51
0
def extend_list(runlist, *newcol_name):
    newlist = runlist
    for index, value in enumerate(newcol_name):
        newcol = np.zeros(np.size(runlist),
                          dtype={
                              'names': [value],
                              'formats': ['f8']
                          })
        newlist = rfn.merge_arrays((newlist, newcol),
                                   flatten=True,
                                   usemask=False)
    return newlist
Пример #52
0
def create_pointcloud(data, mode=None, groups=None, batch_columns=None):
  if mode is None:
    mode = Mode.STANDARD
  if groups is None:
    groups = {}
  groups = merge_dicts(DEFAULT_GROUPS, groups)
  if batch_columns is None:
    batch_columns = []

  columns = []
  def add(name, data):
    if name == 'position':
      columns.append(PositionColumn(name, data, mode))
    elif name.lower() in FeatureColumn.TYPES:
      columns.append(FeatureColumn(name.lower(), data))
    else:
      columns.append(BatchColumn(name, data))


  # Remap columns based off their groupings
  grouped = set()
  for name, selection in groups.iteritems():
    add(name, data[selection])
    grouped.update(selection if isinstance(selection, list) else [selection])
  for column in (set(data.dtype.names) - grouped):
    add(column, data[column])

  # Find all mapped values and replace it with their mapping
  if len(batch_columns) > 0:
    r_columns, r_names = [], []
    for column in batch_columns:
      if column not in columns:
        raise Exception('Column %s does not exist' % column)
      r_column = columns[columns.index(column)]
      r_columns.append(r_column)
      r_names.extend(r_column.names())

    merged_data = merge_arrays([ x.data() for x in r_columns ], flatten=True, usemask=False)
    merged_data.dtype.names = r_names
    batch_groups, batch_ids = np.unique(merged_data, axis=0, return_inverse=True)
    batch_ids = batch_ids.astype(np.uint16)

    idx_offset = 0
    for column in r_columns:
      column.is_instanced = True
      d_names = column.names()
      selector = d_names[0] if column.count() == 1 else d_names
      column._data = batch_groups[selector]

    columns.append(FeatureColumn('batch_id', batch_ids, { 'BATCH_LENGTH': len(batch_groups) }))

  return PointcloudTile(columns)
Пример #53
0
def addTermToMatrix(term,ndMat):
	rows = len(ndMat)
	if (rows == 0):
		rows=1
	newfield = np.zeros((rows,1),dtype=[(term,'i8')])
	newarr = merge_arrays([ndMat,newfield],flatten=True)
	if ('f0' in newarr.dtype.names): 
		#this clears up the first column if the array started as 0x0
		#newarr.dtype.names is a tuple, needs to be a list for remove
		names=list(newarr.dtype.names)
		names.remove('f0')
		newarr=newarr[names]
	return newarr
  def read_oneproc(self,i):
    f = open(datadir+'proc'+str(i)+'/'+inputfilename,'rb')
    procdata = []
    ndump = 0
    ndump_recorded = 0
    nstalk_tot_thisproc = 0
    while f.read(1):
      f.seek(-1,1)
      ndump = ndump +1
      header=  np.fromfile(f,dtype=headertype,count=1)
      #print('header',header)
      nstalk_loc = header['nstalk_loc'][0]
      if (nstalk_loc>0):
        pad = np.fromfile(f,dtype=np.int32,count=1)
        ipar_stalk  = np.fromfile(f,dtype=np.dtype([('ipar',np.int32)]),count=nstalk_loc)
        #read off after-padding
        pad = np.fromfile(f,dtype=np.int32,count=1)

        pad = np.fromfile(f,dtype=np.int32,count=1)
        if not(pad==nstalk_loc*stalktype.itemsize):
          print('proc',nproc,'dump',ndump,'padding at data ',pad,' while nstalk_loc*stalktype.itemze ', nstalk_loc*stalktype.itemsize)
          exit()
        values_stalk  = np.fromfile(f,dtype=stalktype,count=nstalk_loc)
        pad = np.fromfile(f,dtype=np.int32,count=1)
    
       
        if ( header['tstalk'][0] >= min_time_to_record ): 
          merged = rfn.merge_arrays( [ipar_stalk, values_stalk], flatten = True , usemask = False, asrecarray=True)
          merged = rfn.append_fields( merged, 'tstalk', data = header['tstalk'][0]*np.ones(nstalk_loc) )
          #compress on max ipar condition
          merged = merged.compress(merged['ipar'] < max_ipar_to_record)
          if (ndump==1):
            nstalk_tot_thisproc = len(merged)

          ndump_recorded = ndump_recorded + 1
          procdata.append([header['tstalk'][0],merged])
          if (len(procdata) >= MAX_QUEUE_CHUNK):
            self.result_queue.put(procdata)
            procdata = []
        elif (ndump==1):
          #compress on max ipar condition
          nstalk_tot_thisproc = len(ipar_stalk.compress(ipar_stalk['ipar'] < max_ipar_to_record))
          continue
      elif (ndump ==1): 
        #nothing to read form this proc on first dump, still need to count particles recorded
        nstalk_tot_thisproc = 0

    f.close()
    if (len(procdata) >0):
      self.result_queue.put(procdata)
    self.result_queue2.put([nstalk_tot_thisproc, ndump_recorded])
Пример #55
0
def test_measure_latency():
    data = numpy.ndarray(shape=(NUM_SAMPLES), dtype=RECORD)

    for n, (frame, _) in enumerate(stbt.frames(50)):
        if n >= NUM_SAMPLES:
            break
        stbt_receive_time = numpy.array(
            [(frame.time, time.time())], dtype=OWN_TIMESTAMPS)
        timestamps = read_timestamps(frame)
        data[n] = merge_arrays([timestamps, stbt_receive_time], flatten=True,
                               usemask=False)

    # Sometimes we'll lose a row but no biggie
    numpy.savetxt("latency-test.txt", data[:n])
Пример #56
0
def updateArray(data):
    ''' Adds the results containers to the data product. '''

    newData = np.zeros(data.size)
    data = rfns.append_fields(data, ['ML_pred_1d', 'ML_pred_2d',
        'ML_pred_3d'],
            [newData, newData, newData], dtypes='>f4',
            usemask=False)

    newnewData = np.zeros(data.size, dtype=[('ML_pred_1d_err', '>f4', (2,)),
        ('ML_pred_2d_err', '>f4', (2,)), ('ML_pred_3d_err', '>f4', (2,)),])
    data = rfns.merge_arrays((data, newnewData), usemask=False,
            asrecarray=False, flatten=True)

    return data
def visitOffsets(visits, zpOff=1.):
    """
    Convert an opsim array to have more generic names and
    add any extra columns we may want
    """

    dnames = visits.dtype.names
    dnames = ['ra' if (x=='fieldRA' or x=='ditheredRA') else x for x in dnames]
    dnames = ['dec' if (x=='fieldDec' or x=='ditheredDec') else x for x in dnames]
    dnames = ['visitID' if (x=='obsHistID') else x for x in dnames]
    
    visits.dtype.names = dnames
    
    zp = (np.random.uniform(size=visits.size)*zpOff).astype(zip(['zpOff'],[float]))
    visits = rfn.merge_arrays([visits, zp],  flatten=True, usemask=False)
    return visits
Пример #58
0
def makeDataValues(size=100, min=0., max=1., nd=3, random=True):
    """Generate a simple array of numbers, evenly arranged between min/max, in nd dimensions, but (optional)
    random order."""
    data = []
    for d in range(nd):
        datavalues = np.arange(0, size, dtype='float')
        datavalues *= (float(max) - float(min)) / (datavalues.max() - datavalues.min())
        datavalues += min
        if random:
            randorder = np.random.rand(size)
            randind = np.argsort(randorder)
            datavalues = datavalues[randind]
        datavalues = np.array(zip(datavalues), dtype=[('testdata'+ '%d' %(d), 'float')])
        data.append(datavalues)
    data = rfn.merge_arrays(data, flatten=True, usemask=False)
    return data
Пример #59
0
 def getInfo(self,catalog=None):
     """ Returns info for the source from either a specified catalog or all of the catalogs that contain the object
         Arguments:
             catalog: optional pointer to catalog object that contains the source. If this field is missing the 
                 function will return infomation from every catalog containing the object
         Returns:
             Structured np.array
     """
     # to get the name of each field use self.data.dtype.names
     myInfo=np.array(self.data,copy=True)
     if catalog is None:
         for n,cat in enumerate(self.catalogs):
             catalog=openCatalog(cat)
             objects=np.sort(catalog[objects],order='id')['objects']
             myInfo=rfn.merge_arrays(myInfo,obj)
     return myInfo
Пример #60
0
 def _run_plugin_update(self, plugin, update_number, data):
     """Run the update phase on one PLACE plugin"""
     class_ = plugin.__class__
     elm_name = plugin.elm_module_name
     try:
         if issubclass(class_, Instrument):
             new_data = plugin.update(
                 update_number, self.progress.experiment['plugins'][elm_name]['progress'])
             if new_data is not None:
                 data = rfn.merge_arrays([data, new_data], flatten=True)
         elif issubclass(class_, PostProcessing):
             data = plugin.update(update_number, data.copy())
     except RuntimeError as err:
         self.progress.message = str(err)
         self.cleanup_phase(abort=True)
         raise
     return data