def _compare_odict_and_omddict(self, d, omd):
        assert len(d) == len(omd)  # __len__().

        # __contains__(), has_key(), get(), and setdefault().
        for dkey, omdkey in zip(d, omd):
            assert dkey == omdkey and dkey in d and omdkey in omd
            assert dkey in d and omdkey in omd
            assert d.get(dkey) == omd.get(omdkey)
            d.setdefault(dkey, _unique)
            omd.setdefault(omdkey, _unique)
            assert d.get(dkey) == omd.get(omdkey) and d.get(dkey) != _unique
        for nonkey in self.nonkeys:
            assert d.get(nonkey) == omd.get(nonkey) is None
            d.setdefault(nonkey, _unique)
            omd.setdefault(nonkey, _unique)
            assert d.get(nonkey) == omd.get(nonkey) == _unique

        # items(), keys, values(), iteritems(), iterkeys, and itervalues().
        iterators = [
            zip(d.items(), omd.items(), d.keys(), omd.keys(), d.values(), omd.values()),
            zip(
                six.iteritems(d),
                six.iteritems(omd),
                six.iterkeys(d),
                six.iterkeys(omd),
                six.itervalues(d),
                six.itervalues(omd),
            ),
        ]
        for iterator in iterators:
            for ditem, omditem, dkey, omdkey, dvalue, omdvalue in iterator:
                assert dkey == omdkey
                assert ditem == omditem
                assert dvalue == omdvalue

        # pop().
        dcopy, omdcopy = d.copy(), omd.copy()
        while dcopy and omdcopy:
            dpop = dcopy.pop(list(dcopy.keys())[0])
            omdpop = omdcopy.pop(list(omdcopy.keys())[0])
            assert dpop == omdpop
        # popitem().
        dcopy, omdcopy = d.copy(), omd.copy()
        while dcopy and omdcopy:
            assert dcopy.popitem() == omdcopy.popitem()

        # __getitem__().
        for dkey, omdkey in zip(six.iterkeys(d), six.iterkeys(omd)):
            assert d[dkey] == omd[omdkey]
        # __setitem__().
        for dkey, omdkey in zip(d, omd):
            d[dkey] = _unique
            omd[omdkey] = _unique
            assert dkey == omdkey and d[dkey] == omd[omdkey]
        # __delitem__().
        while d and omd:
            dkey, omdkey = list(d.keys())[0], list(omd.keys())[0]
            del d[dkey]
            del omd[omdkey]
            assert dkey == omdkey and dkey not in d and omdkey not in omd
Exemplo n.º 2
0
    def _write_sort2_as_sort2(self, f, page_num, page_stamp, header, words):
        nodes = self.node_gridtype[:, 0]
        gridtypes = self.node_gridtype[:, 1]
        times = self._times
        for inode, (node_id, gridtypei) in enumerate(zip(nodes, gridtypes)):
            t1 = self.data[inode, :, 0]
            t2 = self.data[inode, :, 1]
            t3 = self.data[inode, :, 2]
            r1 = self.data[inode, :, 3]
            r2 = self.data[inode, :, 4]
            r3 = self.data[inode, :, 5]

            header[1] = ' POINT-ID = %10i\n' % node_id
            f.write(''.join(header + words))
            for dt, t1i, t2i, t3i, r1i, r2i, r3i in zip(times, t1, t2, t3, r1, r2, r3):
                sgridtype = self.recast_gridtype_as_string(gridtypei)
                vals = [t1i, t2i, t3i, r1i, r2i, r3i]
                vals2 = write_floats_13e(vals)
                (dx, dy, dz, rx, ry, rz) = vals2
                if sgridtype == 'G':
                    f.write('%14s %6s     %-13s  %-13s  %-13s  %-13s  %-13s  %s\n' % (
                        write_float_12e(dt), sgridtype, dx, dy, dz, rx, ry, rz))
                elif sgridtype == 'S':
                    f.write('%14s %6s     %s\n' % (node_id, sgridtype, dx))
                elif sgridtype == 'H':
                    f.write('%14s %6s     %-13s  %-13s  %-13s  %-13s  %-13s  %s\n' % (
                        write_float_12e(dt), sgridtype, dx, dy, dz, rx, ry, rz))
                elif sgridtype == 'L':
                    f.write('%14s %6s     %-13s  %-13s  %-13s  %-13s  %-13s  %s\n' % (
                        write_float_12e(dt), sgridtype, dx, dy, dz, rx, ry, rz))
                else:
                    raise NotImplementedError(sgridtype)
            f.write(page_stamp % page_num)
            page_num += 1
        return page_num
Exemplo n.º 3
0
def get_contributor_name_string(ibs, contrib_rowid_list, include_tag=False):
    r"""
    Returns:
        contrib_name_list (list):  a contributor's full name

    RESTful:
        Method: GET
        URL:    /api/contributor/name_string/
    """
    first_list = ibs.get_contributor_first_name(contrib_rowid_list)
    last_list = ibs.get_contributor_last_name(contrib_rowid_list)
    if include_tag:
        tag_list = ibs.get_contributor_tag(contrib_rowid_list)
        name_list = zip(first_list, last_list, tag_list)
        contrib_name_list = [
            "%s %s (%s)" % (first, last, tag)
            for first, last, tag in name_list
        ]
    else:
        name_list = zip(first_list, last_list)
        contrib_name_list = [
            "%s %s" % (first, last)
            for first, last in name_list
        ]

    return contrib_name_list
Exemplo n.º 4
0
    def __to_dp_matrix_mt(self, value_matrix):
        from concurrent import futures

        col_data_map = {}

        try:
            with futures.ProcessPoolExecutor(self.max_workers) as executor:
                future_list = [
                    executor.submit(
                        _to_dp_list_helper,
                        self,
                        col_idx,
                        values,
                        self.__get_col_type_hint(col_idx),
                        self.strip_str_value,
                    )
                    for col_idx, values in enumerate(zip(*value_matrix))
                ]

                for future in futures.as_completed(future_list):
                    col_idx, value_dp_list = future.result()
                    col_data_map[col_idx] = value_dp_list
        finally:
            logger.debug("shutdown ProcessPoolExecutor: workers={}".format(self.max_workers))
            executor.shutdown()

        return list(zip(*[col_data_map[col_idx] for col_idx in sorted(col_data_map)]))
Exemplo n.º 5
0
    def _write_atoms(self, atoms):
        self.f.write('\n')
        self.f.write('Atoms\n')
        self.f.write('\n')

        try:
            charges = atoms.charges
        except (NoDataError, AttributeError):
            has_charges = False
        else:
            has_charges = True

        indices = atoms.indices + 1
        types = atoms.types.astype(np.int32)

        if self.convert_units:
            coordinates = self.convert_pos_to_native(atoms.positions, inplace=False)

        if has_charges:
            for index, atype, charge, coords in zip(indices, types, charges,
                    coordinates):
                self.f.write('{i:d} 0 {t:d} {c:f} {x:f} {y:f} {z:f}\n'.format(
                             i=index, t=atype, c=charge, x=coords[0],
                             y=coords[1], z=coords[2]))
        else:
            for index, atype, coords in zip(indices, types, coordinates):
                self.f.write('{i:d} 0 {t:d} {x:f} {y:f} {z:f}\n'.format(
                             i=index, t=atype, x=coords[0], y=coords[1],
                             z=coords[2]))
Exemplo n.º 6
0
    def add_lines(self, levels, colors, linewidths, erase=True):
        '''
        Draw lines on the colorbar.

        *colors* and *linewidths* must be scalars or
        sequences the same length as *levels*.

        Set *erase* to False to add lines without first
        removing any previously added lines.
        '''
        y = self._locate(levels)
        igood = (y < 1.001) & (y > -0.001)
        y = y[igood]
        if cbook.iterable(colors):
            colors = np.asarray(colors)[igood]
        if cbook.iterable(linewidths):
            linewidths = np.asarray(linewidths)[igood]
        N = len(y)
        x = np.array([0.0, 1.0])
        X, Y = np.meshgrid(x, y)
        if self.orientation == 'vertical':
            xy = [list(zip(X[i], Y[i])) for i in xrange(N)]
        else:
            xy = [list(zip(Y[i], X[i])) for i in xrange(N)]
        col = collections.LineCollection(xy, linewidths=linewidths)

        if erase and self.lines:
            for lc in self.lines:
                lc.remove()
            self.lines = []
        self.lines.append(col)
        col.set_color(colors)
        self.ax.add_collection(col)
        self.stale = True
Exemplo n.º 7
0
def reverse_points_if_backwards(xy, xy_next):
    """
    This function aligns xy_next so that it is in the same direction as xy.
    Nothing occurs if they are already aligned

    inputs:
    xy, xy_next - list of tuples [(x1, y1), (x2, y2) ...]
    xy and xy_next are seperated by one timestep.
    the function returns the reversed spine and a flag to see it was reversed
    """

    x, y = zip(*xy)
    xnext, ynext = zip(*xy_next)
    xnext_rev = xnext[::-1]
    ynext_rev = ynext[::-1]

    distance_original = 0.
    distance_rev = 0.
    for k in range(len(x)):
        distance_original += ((x[k] - xnext[k]) ** 2 + (y[k] - ynext[k]) ** 2)
        distance_rev += (x[k] - xnext_rev[k]) ** 2 + (y[k] - ynext_rev[k]) ** 2
        if (distance_original > distance_rev):
            #print "reversed", index, distance_rev, distance_original
            newxy = list(zip(xnext_rev, ynext_rev))
            return (newxy, True)
        else:
            #print "ok", index
            return (xy_next, False)
Exemplo n.º 8
0
    def from_arrays(cls, arrays, column_names=None, **kwargs):
        """Produce :class:`ColumnDataSource` from array-like data.

        Returns:
            :class:`ColumnDataSource`

        """
        # handle list of arrays
        if any(cls.is_list_arrays(array) for array in arrays):
            list_of_arrays = copy(arrays)
            arrays = list(chain.from_iterable(arrays))
            column_names = column_names or gen_column_names(len(arrays))
            cols = copy(column_names)
            dims = kwargs.get('dims', DEFAULT_DIMS)

            # derive column selections
            for dim, list_of_array in zip(dims, list_of_arrays):
                sel = [cols.pop(0) for _ in list_of_array]
                kwargs[dim] = sel
        else:
            column_names = column_names or gen_column_names(len(arrays))

        # try to replace auto names with Series names
        for i, array in enumerate(arrays):
            if isinstance(array, pd.Series):
                name = array.name
                if name not in column_names and name is not None:
                    column_names[i] = name

        table = {column_name: array for column_name, array in zip(column_names, arrays)}
        return cls(df=pd.DataFrame.from_dict(data=table), **kwargs)
Exemplo n.º 9
0
Arquivo: folding.py Projeto: Afey/rep
    def staged_predict_proba(self, X, vote_function=None):
        """
        Predict probabilities on each stage. To get unbiased predictions, you can pass training dataset
        (with same order of events) and vote_function=None.

        :param X: pandas.DataFrame of shape [n_samples, n_features]
        :param vote_function: function to combine prediction of folds' estimators.
            If None then self.vote_function is used.
        :type vote_function: None or function

        :return: iterator for numpy.array of shape [n_samples, n_classes] with probabilities
        """
        if vote_function is not None:
            print('Using voting KFold prediction')
            X = self._get_train_features(X)
            iterators = [estimator.staged_predict_proba(X) for estimator in self.estimators]
            for fold_prob in zip(*iterators):
                probabilities = numpy.array(fold_prob)
                yield vote_function(probabilities)
        else:
            print('Default prediction')
            X = self._get_train_features(X)
            folds_column = self._get_folds_column(len(X))
            iterators = [self.estimators[fold].staged_predict_proba(X.iloc[folds_column == fold, :])
                         for fold in range(self.n_folds)]
            for fold_prob in zip(*iterators):
                probabilities = numpy.zeros(shape=(len(X), 2))
                for fold in range(self.n_folds):
                    probabilities[folds_column == fold] = fold_prob[fold]
                yield probabilities
Exemplo n.º 10
0
def evaluate_srl_classify(no_repeat=False, gold_file=None):
    """Evaluates the performance of the network on the SRL classifying task."""
    # load data
    md = Metadata.load_from_file('srl_classify')
    nn = taggers.load_network(md)
    r = taggers.create_reader(md, gold_file)
    r.create_converter()
    
    r.codify_sentences()
    hits = 0
    total_args = 0
    
    for sentence, tags, predicates, args in zip(r.sentences, r.tags,
                                                r.predicates, r.arg_limits):
        
        # the answer includes all predicates
        answer = nn.tag_sentence(sentence, predicates, args,
                                 allow_repeats=not no_repeat)
        
        for pred_answer, pred_gold in zip(answer, tags):
        
            for net_tag, gold_tag in zip(pred_answer, pred_gold):
                if net_tag == gold_tag:
                    hits += 1
            
            total_args += len(pred_gold)
    
    print('Accuracy: %f' % (float(hits) / total_args))
Exemplo n.º 11
0
def save_weights_to_hdf5_group(f, layers):
  from tensorflow.python.keras._impl.keras import __version__ as keras_version  # pylint: disable=g-import-not-at-top

  save_attributes_to_hdf5_group(
      f, 'layer_names', [layer.name.encode('utf8') for layer in layers])
  f.attrs['backend'] = K.backend().encode('utf8')
  f.attrs['keras_version'] = str(keras_version).encode('utf8')

  for layer in layers:
    g = f.create_group(layer.name)
    symbolic_weights = layer.weights
    weight_values = K.batch_get_value(symbolic_weights)
    weight_names = []
    for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)):
      if hasattr(w, 'name') and w.name:
        name = str(w.name)
      else:
        name = 'param_' + str(i)
      weight_names.append(name.encode('utf8'))
    save_attributes_to_hdf5_group(g, 'weight_names', weight_names)
    for name, val in zip(weight_names, weight_values):
      param_dset = g.create_dataset(name, val.shape, dtype=val.dtype)
      if not val.shape:
        # scalar
        param_dset[()] = val
      else:
        param_dset[:] = val
Exemplo n.º 12
0
    def verify_cloned_plan(self, original_plan, cloned_plan,
                           link_cases=True, copy_cases=None,
                           maintain_case_orignal_author=None,
                           keep_case_default_tester=None):
        self.assertEqual('Copy of {}'.format(original_plan.name), cloned_plan.name)
        self.assertEqual(Product.objects.get(pk=self.product.pk), cloned_plan.product)
        self.assertEqual(Version.objects.get(pk=self.version.pk), cloned_plan.product_version)

        # Verify option set_parent
        self.assertEqual(TestPlan.objects.get(pk=original_plan.pk), cloned_plan.parent)

        # Verify option copy_texts
        self.assertEqual(cloned_plan.text.count(), original_plan.text.count())
        for copied_text, original_text in zip(cloned_plan.text.all(),
                                              original_plan.text.all()):
            self.assertEqual(copied_text.plan_text_version, original_text.plan_text_version)
            self.assertEqual(copied_text.author, original_text.author)
            self.assertEqual(copied_text.create_date, original_text.create_date)
            self.assertEqual(copied_text.plan_text, original_text.plan_text)

        # Verify option copy_attachments
        for attachment in original_plan.attachment.all():
            added = TestPlanAttachment.objects.filter(
                plan=cloned_plan, attachment=attachment).exists()
            self.assertTrue(added)

        # Verify option copy_environment_groups
        for env_group in original_plan.env_group.all():
            added = TCMSEnvPlanMap.objects.filter(plan=cloned_plan, group=env_group).exists()
            self.assertTrue(added)

        # Verify options link_testcases and copy_testcases
        if link_cases and not copy_cases:
            for case in original_plan.case.all():
                is_case_linked = TestCasePlan.objects.filter(plan=cloned_plan, case=case).exists()
                self.assertTrue(is_case_linked)

        if link_cases and copy_cases:
            # Ensure cases of original plan are not linked to cloned plan
            for case in original_plan.case.all():
                original_case_not_linked_to_cloned_plan = TestCasePlan.objects.filter(
                    plan=cloned_plan, case=case).exists()
                self.assertFalse(original_case_not_linked_to_cloned_plan)

            self.assertEqual(cloned_plan.case.count(), original_plan.case.count())

            # Verify if case' author and default tester are set properly
            for original_case, copied_case in zip(original_plan.case.all(),
                                                  cloned_plan.case.all()):
                if maintain_case_orignal_author:
                    self.assertEqual(original_case.author, copied_case.author)
                else:
                    me = self.plan_tester
                    self.assertEqual(me, copied_case.author)

                if keep_case_default_tester:
                    self.assertEqual(original_case.default_tester, copied_case.default_tester)
                else:
                    me = self.plan_tester
                    self.assertEqual(me, copied_case.default_tester)
Exemplo n.º 13
0
def make_factor_text(factor, name):
    collapse_uniform = True
    if collapse_uniform and ut.almost_allsame(factor.values):
        # Reduce uniform text
        ftext = name + ':\nuniform(%.3f)' % (factor.values[0],)
    else:
        values = factor.values
        try:
            rowstrs = ['p(%s)=%.3f' % (','.join(n), v,)
                       for n, v in zip(zip(*factor.statenames), values)]
        except Exception:
            rowstrs = ['p(%s)=%.3f' % (','.join(n), v,)
                       for n, v in zip(factor._row_labels(False), values)]
        idxs = ut.list_argmaxima(values)
        for idx in idxs:
            rowstrs[idx] += '*'
        thresh = 4
        always_sort = True
        if len(rowstrs) > thresh:
            sortx = factor.values.argsort()[::-1]
            rowstrs = ut.take(rowstrs, sortx[0:(thresh - 1)])
            rowstrs += ['... %d more' % ((len(values) - len(rowstrs)),)]
        elif always_sort:
            sortx = factor.values.argsort()[::-1]
            rowstrs = ut.take(rowstrs, sortx)
        ftext = name + ': \n' + '\n'.join(rowstrs)
    return ftext
Exemplo n.º 14
0
    def raw_method(self):
        list_fields = []
        if self.method in ['HESS', 'INV']:
            for (alphaA, omegaA, alphaB, omegaB, Lj, NEj, NDj) in zip(
                    self.alphaAjs, self.omegaAjs, self.alphaBjs, self.omegaBjs,
                    self.LJs, self.NEJs, self.NDJs):
                alphaA = set_blank_if_default(alphaA, 0.0)
                omegaA = set_blank_if_default(omegaA, 0.0)
                alphaB = set_blank_if_default(alphaB, 0.0)
                omegaB = set_blank_if_default(omegaB, 0.0)
                list_fields += [alphaA, omegaA, alphaB, omegaB, Lj, NEj, NDj, None]

        elif self.method == 'CLAN':
            assert len(self.alphaAjs) == len(self.omegaAjs)
            assert len(self.alphaAjs) == len(self.mblkszs)
            assert len(self.alphaAjs) == len(self.iblkszs)
            assert len(self.alphaAjs) == len(self.ksteps)
            assert len(self.alphaAjs) == len(self.NJIs)
            for (alphaA, omegaA, mblksz, iblksz, kstep, Nj) in zip(
                    self.alphaAjs, self.omegaAjs, self.mblkszs, self.iblkszs,
                    self.ksteps, self.NJIs):
                alphaA = set_blank_if_default(alphaA, 0.0)
                omegaA = set_blank_if_default(omegaA, 0.0)
                mblksz = set_blank_if_default(mblksz, 7)
                iblksz = set_blank_if_default(iblksz, 2)
                kstep = set_blank_if_default(kstep, 5)

                list_fields += [alphaA, omegaA, mblksz, iblksz,
                                kstep, None, Nj, None]
        else:
            msg = 'invalid EIGC method...method=%r' % self.method
            raise RuntimeError(msg)
        return list_fields
Exemplo n.º 15
0
    def _reduce(results, dataset_out, data_name, dtype, shuffle, rng):
        if len(results) > 0 and (len(data_name) != len(results[0]) or
                                 len(dtype) != len(results[0])):
            raise ValueError('Returned [{}] results but only given [{}] name and'
                             ' [{}] dtype'.format(
                                 len(results[0]), len(data_name), len(dtype)))

        final = [[] for i in range(len(results[0]))]
        for res in results:
            for i, j in zip(res, final):
                j.append(i)
        final = [np.vstack(i)
                 if isinstance(i[0], np.ndarray)
                 else np.asarray(reduce(lambda x, y: x + y, i))
                 for i in final]
        # shufle features
        if shuffle > 2:
            permutation = rng.permutation(final[0].shape[0])
            final = [i[permutation] for i in final]
        # save to dataset
        for i, name, dt in zip(final, data_name, dtype):
            shape = i.shape
            dt = np.dtype(dt)
            x = dataset_out.get_data(name, dtype=dt, shape=shape, value=i)
            x.flush()
        return None
Exemplo n.º 16
0
def data_index_integrity(ibs, qreq):
    print('checking qreq.data_index integrity')

    aid_list = ibs.get_valid_aids()
    desc_list = ibs.get_annot_vecs(aid_list)
    fid_list = ibs.get_annot_feat_rowids(aid_list)
    desc_list2 = ibs.get_feat_vecs(fid_list)

    assert all([np.all(desc1 == desc2) for desc1, desc2 in zip(desc_list, desc_list2)])

    dx2_data = qreq.data_index.dx2_data
    check_sift_desc(dx2_data)
    dx2_aid  = qreq.data_index.dx2_aid
    dx2_fx   = qreq.data_index.dx2_fx

    # For each descriptor create a (aid, fx) pair indicating its
    # chip id and the feature index in that chip id.
    nFeat_list = list(map(len, desc_list))
    _dx2_aid = [[aid] * nFeat for (aid, nFeat) in zip(aid_list, nFeat_list)]
    _dx2_fx = [list(range(nFeat)) for nFeat in nFeat_list]

    assert len(_dx2_fx) == len(aid_list)
    assert len(_dx2_aid) == len(aid_list)
    print('... loop checks')

    for count in range(len(aid_list)):
        aid = aid_list[count]
        assert np.all(np.array(_dx2_aid[count]) == aid)
        assert len(_dx2_fx[count]) == desc_list[count].shape[0]
        dx_list = np.where(dx2_aid == aid)[0]
        np.all(dx2_data[dx_list] == desc_list[count])
        np.all(dx2_fx[dx_list] == np.arange(len(dx_list)))
    print('... seems ok')
Exemplo n.º 17
0
def setup_plan(plan):
  """Sets up a TensorFlow Fold plan for MNIST.

  The inputs are 28 x 28 images represented as 784-dimensional float32
  vectors (scaled to [0, 1] and categorical digit labels in [0, 9].

  The training loss is softmax cross-entropy. There is only one
  metric, accuracy. In inference mode, the output is a class label.

  Dropout is applied before every layer (including on the inputs).

  Args:
    plan: A TensorFlow Fold plan to set up.
  """
  # Convert the input NumPy array into a tensor.
  model_block = td.Vector(INPUT_LENGTH)

  # Create a placeholder for dropout, if we are in train mode.
  keep_prob = (tf.placeholder_with_default(1.0, [], name='keep_prob')
               if plan.mode == plan.mode_keys.TRAIN else None)

  # Add the fully connected hidden layers.
  for _ in xrange(FLAGS.num_layers):
    model_block >>= td.FC(FLAGS.num_units, input_keep_prob=keep_prob)

  # Add the linear output layer.
  model_block >>= td.FC(NUM_LABELS, activation=None, input_keep_prob=keep_prob)

  if plan.mode == plan.mode_keys.INFER:
    # In inference mode, we run the model directly on images.
    plan.compiler = td.Compiler.create(model_block)
    logits, = plan.compiler.output_tensors
  else:
    # In training/eval mode, we run the model on (image, label) pairs.
    plan.compiler = td.Compiler.create(
        td.Record((model_block, td.Scalar(tf.int64))))
    logits, y_ = plan.compiler.output_tensors

  y = tf.argmax(logits, 1)  # create the predicted output tensor

  datasets = tf.contrib.learn.datasets.mnist.load_mnist(FLAGS.logdir_base)
  if plan.mode == plan.mode_keys.INFER:
    plan.examples = datasets.test.images
    plan.outputs = [y]
  else:
    # Create loss and accuracy tensors, and add them to the plan.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=y_)
    plan.losses['cross_entropy'] = loss
    accuracy = tf.reduce_mean(tf.cast(tf.equal(y, y_), tf.float32))
    plan.metrics['accuracy'] = accuracy
    if plan.mode == plan.mode_keys.TRAIN:
      plan.examples = zip(datasets.train.images, datasets.train.labels)
      plan.dev_examples = zip(datasets.validation.images,
                              datasets.validation.labels)
      # Turn dropout on for training, off for validation.
      plan.train_feeds[keep_prob] = FLAGS.keep_prob
    else:
      assert plan.mode == plan.mode_keys.EVAL
      plan.examples = zip(datasets.test.images, datasets.test.labels)
Exemplo n.º 18
0
    def raw_fields(self):
        list_fields = ['RBE3', self.eid, None, self.refgrid, self.refc]
        for (wt, ci, Gij) in self.WtCG_groups:
            #print('wt=%s ci=%s Gij=%s' % (wt, ci, Gij))
            list_fields += [wt, ci] + Gij
        nSpaces = 8 - (len(list_fields) - 1) % 8  # puts UM onto next line

        if nSpaces < 8:
            list_fields += [None] * nSpaces

        if self.Gmi and 0:
            fields2 = ['UM']
            for (gmi, cmi) in zip(self.Gmi, self.Cmi):
                fields2 += [gmi, cmi]
            list_fields += build_table_lines(fields2, i=1, j=1)  ## ..todo:: what's going on here with the arguments???

        if self.Gmi:
            list_fields += ['UM']
        if self.Gmi:
            #print("Gmi = %s" % self.Gmi)
            #print("Cmi = %s" % self.Cmi)
            for (gmi, cmi) in zip(self.Gmi, self.Cmi):
                list_fields += [gmi, cmi]

        nSpaces = 8 - (len(list_fields) - 1) % 8  # puts ALPHA onto next line
        if nSpaces < 8:
            list_fields += [None] * nSpaces

        if self.alpha > 0.:  # handles the default value
            list_fields += ['ALPHA', self.alpha]
        return list_fields
Exemplo n.º 19
0
    def _write_sort2_as_sort2(self, f, page_num, page_stamp, header, words):
        nodes = self.node_gridtype[:, 0]
        gridtypes = self.node_gridtype[:, 1]
        times = self._times
        for inode, (node_id, gridtypei) in enumerate(zip(nodes, gridtypes)):
            t1 = self.data[inode, :, 0]

            header[1] = ' POINT-ID = %10i\n' % node_id
            f.write(''.join(header + words))
            for dt, t1i in zip(times, t1):
                sgridtype = self.recast_gridtype_as_string(gridtypei)
                vals = [t1i]
                vals2 = write_floats_13e(vals)
                dx = vals2[0]
                if sgridtype == 'G':
                    f.write('%14s %6s     %s\n' % (write_float_12e(dt), sgridtype, dx))
                elif sgridtype == 'S':
                    f.write('%14s %6s     %s\n' % (node_id, sgridtype, dx))
                elif sgridtype == 'H':
                    f.write('%14s %6s     %s\n' % (write_float_12e(dt), sgridtype, dx))
                elif sgridtype == 'L':
                    f.write('%14s %6s     %s\n' % (write_float_12e(dt), sgridtype, dx))
                else:
                    raise NotImplementedError(sgridtype)
            f.write(page_stamp % page_num)
            page_num += 1
        return page_num
Exemplo n.º 20
0
def compile_gem(return_variables, expressions, prefix_ordering, remove_zeros=False):
    """Compiles GEM to Impero.

    :arg return_variables: return variables for each root (type: GEM expressions)
    :arg expressions: multi-root expression DAG (type: GEM expressions)
    :arg prefix_ordering: outermost loop indices
    :arg remove_zeros: remove zero assignment to return variables
    """
    expressions = optimise.remove_componenttensors(expressions)

    # Remove zeros
    if remove_zeros:
        rv = []
        es = []
        for var, expr in zip(return_variables, expressions):
            if not isinstance(expr, gem.Zero):
                rv.append(var)
                es.append(expr)
        return_variables, expressions = rv, es

    # Collect indices in a deterministic order
    indices = OrderedSet()
    for node in traversal(expressions):
        if isinstance(node, gem.Indexed):
            for index in node.multiindex:
                if isinstance(index, gem.Index):
                    indices.add(index)
        elif isinstance(node, gem.FlexiblyIndexed):
            for offset, idxs in node.dim2idxs:
                for index, stride in idxs:
                    if isinstance(index, gem.Index):
                        indices.add(index)

    # Build ordered index map
    index_ordering = make_prefix_ordering(indices, prefix_ordering)
    apply_ordering = make_index_orderer(index_ordering)

    get_indices = lambda expr: apply_ordering(expr.free_indices)

    # Build operation ordering
    ops = scheduling.emit_operations(list(zip(return_variables, expressions)), get_indices)

    # Empty kernel
    if len(ops) == 0:
        raise NoopError()

    # Drop unnecessary temporaries
    ops = inline_temporaries(expressions, ops)

    # Build Impero AST
    tree = make_loop_tree(ops, get_indices)

    # Collect temporaries
    temporaries = collect_temporaries(ops)

    # Determine declarations
    declare, indices = place_declarations(ops, tree, temporaries, get_indices)

    # Prepare ImperoC (Impero AST + other data for code generation)
    return ImperoC(tree, temporaries, declare, indices)
Exemplo n.º 21
0
    def raw_fields(self):
        list_fields = [self.type, self.eid]

        for (i, gn, cn) in zip(count(), self.Gni, self.Cni):
            #print('i=%r gn=%r cn=%r' % (i, gn, cn))
            list_fields += [gn, cn]
            if i > 0 and i % 3 == 0:
                #print('adding blank')
                list_fields += [None]

        nSpaces = 8 - (len(list_fields) - 1) % 8  # puts UM/ALPHA onto next line
        if nSpaces < 8:
            list_fields += [None] * nSpaces

        # overly complicated loop to print the UM section
        list_fields += ['UM']
        j = 1
        for (i, gm, cm) in zip(count(), self.Gmi, self.Cmi):
            #print "j=%s gmi=%s cmi=%s" %(j,gm,cm)
            list_fields += [gm, cm]
            if i > 0 and j % 3 == 0:
                list_fields += [None, None]
                #print "---"
                j -= 3
            j += 1

        if self.alpha > 0.:  # handles default alpha value
            nSpaces = 8 - (len(list_fields) - 1) % 8  # puts ALPHA onto next line
            if nSpaces == 1:
                list_fields += [None, None]
            list_fields += [self.alpha]
        return list_fields
Exemplo n.º 22
0
 def test_get_strain_state_dict(self):
     strain_inds = [(0,), (1,), (2,), (1, 3), (1, 2, 3)]
     vecs = {}
     strain_states = []
     for strain_ind in strain_inds:
         ss = np.zeros(6)
         np.put(ss, strain_ind, 1)
         strain_states.append(tuple(ss))
         vec = np.zeros((4, 6))
         rand_values = np.random.uniform(0.1, 1, 4)
         for i in strain_ind:
             vec[:, i] = rand_values
         vecs[strain_ind] = vec
     all_strains = [Strain.from_voigt(v).zeroed() for vec in vecs.values()
                    for v in vec]
     random.shuffle(all_strains)
     all_stresses = [Stress.from_voigt(np.random.random(6)).zeroed()
                     for s in all_strains]
     strain_dict = {k.tostring():v for k,v in zip(all_strains, all_stresses)}
     ss_dict = get_strain_state_dict(all_strains, all_stresses, add_eq=False)
     # Check length of ss_dict
     self.assertEqual(len(strain_inds), len(ss_dict))
     # Check sets of strain states are correct
     self.assertEqual(set(strain_states), set(ss_dict.keys()))
     for strain_state, data in ss_dict.items():
         # Check correspondence of strains/stresses
         for strain, stress in zip(data["strains"], data["stresses"]):
             self.assertArrayAlmostEqual(Stress.from_voigt(stress), 
                                         strain_dict[Strain.from_voigt(strain).tostring()])
Exemplo n.º 23
0
Arquivo: data.py Projeto: imito/odin
 def append(self, *arrays):
   if self.read_only:
     raise RuntimeError("This Data is set in read-only mode")
   accepted_arrays = []
   add_size = 0
   # ====== check if shape[1:] matching ====== #
   for a, d in zip(arrays, self._data):
     if hasattr(a, 'shape'):
       if a.shape[1:] == d.shape[1:]:
         accepted_arrays.append(a)
         add_size += a.shape[0]
     else:
       accepted_arrays.append(None)
   # ====== resize ====== #
   old_size = self.__len__()
   # special case, Mmap is init with temporary size = 1 (all zeros),
   # NOTE: risky to calculate sum of big array here
   if old_size == 1 and \
   sum(np.sum(np.abs(d[:])) for d in self._data) == 0.:
     old_size = 0
   # resize and append data
   self.resize(old_size + add_size) # resize only once will be faster
   # ====== update values ====== #
   for a, d in zip(accepted_arrays, self._data):
     if a is not None:
       d[old_size:old_size + a.shape[0]] = a
   return self
Exemplo n.º 24
0
    def write_card(self, size=8, is_double=False):
        msg = '\n$' + '-' * 80
        msg += '\n$ %s Matrix %s\n' % ('DMI', self.name)
        list_fields = ['DMI', self.name, 0, self.form, self.tin,
                       self.tout, None, self.nRows, self.nCols]
        if size == 8:
            msg += print_card_8(list_fields)
        #elif is_double:
            #msg += print_card_double(list_fields)
        else:
            msg += print_card_16(list_fields)
        #msg += self.print_card(list_fields,size=16,isD=False)

        if self.is_complex():
            for (gci, gcj, reali, imagi) in zip(self.GCi, self.GCj, self.Real, self.Complex):
                list_fields = ['DMI', self.name, gcj, gci, reali, imagi]
                if size == 8:
                    msg += print_card_8(list_fields)
                elif is_double:
                    msg += print_card_double(list_fields)
                else:
                    msg += print_card_16(list_fields)
        else:
            for (gci, gcj, reali) in zip(self.GCi, self.GCj, self.Real):
                list_fields = ['DMI', self.name, gcj, gci, reali]
                if size == 8:
                    msg += print_card_8(list_fields)
                elif is_double:
                    msg += print_card_double(list_fields)
                else:
                    msg += print_card_16(list_fields)
        return msg
    def _write_sort1_as_sort2(self, f, page_num, page_stamp, header, words):
        element = self.element
        element_type = self.element_data_type
        times = self._times

        node_id = 0  ## TODO: fix the node id
        for inode, (eid, etypei) in enumerate(zip(element, element_type)):
            t1 = self.data[:, inode, 0].ravel()
            t2 = self.data[:, inode, 1].ravel()
            t3 = self.data[:, inode, 2].ravel()
            r1 = self.data[:, inode, 3].ravel()
            r2 = self.data[:, inode, 4].ravel()
            r3 = self.data[:, inode, 5].ravel()

            header[1] = ' POINT-ID = %10i\n' % node_id
            f.write(''.join(header + words))
            for dt, t1i, t2i, t3i, r1i, r2i, r3i in zip(times, t1, t2, t3, r1, r2, r3):
                vals = [t1i, t2i, t3i, r1i, r2i, r3i]
                vals2 = write_floats_13e(vals)
                (dx, dy, dz, rx, ry, rz) = vals2
                f.write('%14s %6s     %-13s  %-13s  %-13s  %-13s  %-13s  %s\n' % (
                    write_float_12E(dt), etypei, dx, dy, dz, rx, ry, rz))
            f.write(page_stamp % page_num)
            page_num += 1
        return page_num
Exemplo n.º 26
0
def reorder(outcomes, pmf, sample_space, index=None):
    """
    Helper function to reorder outcomes and pmf to match sample_space.

    """
    try:
        order = [(sample_space.index(outcome), i)
                 for i, outcome in enumerate(outcomes)]
    except ValueError:
        # Let's identify which outcomes were not in the sample space.
        bad = []
        for outcome in outcomes:
            try:
                sample_space.index(outcome)
            except ValueError:
                bad.append(outcome)
        if len(bad) == 1:
            single = True
        else:
            single = False
        raise InvalidOutcome(bad, single=single)

    order.sort()
    _, order = zip(*order)

    if index is None:
        index = dict(zip(outcomes, range(len(outcomes))))

    outcomes = [outcomes[i] for i in order]
    pmf = [pmf[i] for i in order]
    new_index = dict(zip(outcomes, range(len(outcomes))))
    return outcomes, pmf, new_index
Exemplo n.º 27
0
def detect_gid_list(ibs, gid_list, tree_path_list, downsample=True, **kwargs):
    """
    Args:
        gid_list (list of int): the list of IBEIS image_rowids that need detection
        tree_path_list (list of str): the list of trees to load for detection
        downsample (bool, optional): a flag to indicate if the original image
            sizes should be used; defaults to True

            True:  ibs.get_image_detectpaths() is used
            False: ibs.get_image_paths() is used

    Kwargs (optional): refer to the PyRF documentation for configuration settings

    Yields:
        results (list of dict)
    """
    # Get new gpaths if downsampling
    if downsample:
        gpath_list = ibs.get_image_detectpaths(gid_list)
        neww_list = [vt.open_image_size(gpath)[0] for gpath in gpath_list]
        oldw_list = [oldw for (oldw, oldh) in ibs.get_image_sizes(gid_list)]
        downsample_list = [oldw / neww for oldw, neww in zip(oldw_list, neww_list)]
    else:
        gpath_list = ibs.get_image_paths(gid_list)
        downsample_list = [None] * len(gpath_list)
    # Run detection
    results_iter = detect(ibs, gpath_list, tree_path_list, **kwargs)
    # Upscale the results
    for gid, downsample, (gpath, result_list) in zip(gid_list, downsample_list, results_iter):
        # Upscale the results back up to the original image size
        if downsample is not None and downsample != 1.0:
            for result in result_list:
                for key in ["centerx", "centery", "xtl", "ytl", "width", "height"]:
                    result[key] = int(result[key] * downsample)
        yield gid, gpath, result_list
Exemplo n.º 28
0
    def test_format_1_converter(self):
        filename = os.path.join(self.tempdir, 'svhn_format_1.hdf5')
        parser = argparse.ArgumentParser()
        subparsers = parser.add_subparsers()
        subparser = subparsers.add_parser('svhn')
        svhn.fill_subparser(subparser)
        subparser.set_defaults(directory=self.tempdir, output_file=filename)
        args = parser.parse_args(['svhn', '1'])
        args_dict = vars(args)
        func = args_dict.pop('func')
        func(**args_dict)
        h5file = h5py.File(filename, mode='r')

        expected_features = sum((self.f1_mock[split]['image']
                                 for split in ('train', 'test', 'extra')), [])
        for val, truth in zip(h5file['features'][...], expected_features):
            assert_equal(val, truth.transpose(2, 0, 1).flatten())

        expected_labels = sum((self.f1_mock[split]['label']
                               for split in ('train', 'test', 'extra')), [])
        for val, truth in zip(h5file['bbox_labels'][...], expected_labels):
            truth[truth == 10] = 0
            assert_equal(val, truth)

        expected_lefts = sum((self.f1_mock[split]['left']
                              for split in ('train', 'test', 'extra')), [])
        for val, truth in zip(h5file['bbox_lefts'][...], expected_lefts):
            assert_equal(val, truth)
Exemplo n.º 29
0
def pr_dict(dbpr):
    d = {dk: getattr(dbpr, sk) for sk, dk in zip(SRC_PR_KEYS, PR_KEYS)}
    dd = {dk: getattr(dbpr, sk) for sk, dk in zip(('name',), ('Label',))}

    d.update(dd)

    return d
Exemplo n.º 30
0
def sort_together(iterables, key_list=(0,), reverse=False):
    """Return the input iterables sorted together, with *key_list* as the
    priority for sorting. All iterables are trimmed to the length of the
    shortest one.

    This can be used like the sorting function in a spreadsheet. If each
    iterable represents a column of data, the key list determines which
    columns are used for sorting.

    By default, all iterables are sorted using the ``0``-th iterable::

        >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')]
        >>> sort_together(iterables)
        [(1, 2, 3, 4), ('d', 'c', 'b', 'a')]

    Set a different key list to sort according to another iterable.
    Specifying mutliple keys dictates how ties are broken::

        >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')]
        >>> sort_together(iterables, key_list=(1, 2))
        [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')]

    Set *reverse* to ``True`` to sort in descending order.

        >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True)
        [(3, 2, 1), ('a', 'b', 'c')]

    """
    return list(zip(*sorted(zip(*iterables),
                            key=itemgetter(*key_list),
                            reverse=reverse)))
Exemplo n.º 31
0
def make_multisource_batch_pipeline(dataset_spec_list,
                                    split,
                                    batch_size,
                                    add_dataset_offset,
                                    pool=None,
                                    shuffle_buffer_size=None,
                                    read_buffer_size_bytes=None,
                                    num_prefetch=0,
                                    image_size=None,
                                    num_to_take=None):
    """Returns a pipeline emitting data from multiple source as Batches.

  Args:
    dataset_spec_list: A list of DatasetSpecification, one for each source.
    split: A learning_spec.Split object identifying the source split.
    batch_size: An int representing the max number of examples in each batch.
    add_dataset_offset: A Boolean, whether to add an offset to each dataset's
      targets, so that each target is unique across all datasets.
    pool: String (optional), for example-split datasets, which example split to
      use ('valid', or 'test'), used at meta-test time only.
    shuffle_buffer_size: int or None, number of examples in the buffer used for
      shuffling the examples from different classes, while they are mixed
      together. There is only one shuffling operation, not one per class.
    read_buffer_size_bytes: int or None, buffer size for each TFRecordDataset.
    num_prefetch: int, the number of examples to prefetch for each class of each
      dataset. Prefetching occurs just after the class-specific Dataset object
      is constructed. If < 1, no prefetching occurs.
    image_size: int, desired image size used during decoding.
    num_to_take: Optional, a list specifying for each dataset the number of
      examples per class to restrict to (for this given split). If provided, its
      length must be the same as len(dataset_spec). If None, no restrictions are
      applied to any dataset and all data per class is used.

  Returns:
    A Dataset instance that outputs decoded batches from all classes in the
    split.
  """
    if num_to_take is not None and len(num_to_take) != len(dataset_spec_list):
        raise ValueError('num_to_take does not have the same length as '
                         'dataset_spec_list.')
    if num_to_take is None:
        num_to_take = [-1] * len(dataset_spec_list)
    sources = []
    offset = 0
    for dataset_spec, num_to_take_for_dataset in zip(dataset_spec_list,
                                                     num_to_take):
        batch_reader = reader.BatchReader(dataset_spec, split,
                                          shuffle_buffer_size,
                                          read_buffer_size_bytes, num_prefetch,
                                          num_to_take_for_dataset)
        dataset = batch_reader.create_dataset_input_pipeline(
            batch_size=batch_size, pool=pool, offset=offset)
        sources.append(dataset)
        if add_dataset_offset:
            offset += len(dataset_spec.get_classes(split))

    # Sample uniformly among sources
    dataset = tf.data.experimental.sample_from_datasets(sources)

    map_fn = functools.partial(process_batch, image_size=image_size)
    dataset = dataset.map(map_fn)

    # Overlap episode processing and training.
    dataset = dataset.prefetch(1)
    return dataset
Exemplo n.º 32
0
def create_masked_lm_predictions(tokens, masked_lm_prob,
                                 max_predictions_per_seq, vocab_words, rng):
  """Creates the predictions for the masked LM objective."""

  cand_indexes = []
  # Note(mingdachen): We create a list for recording if the piece is
  # the starting piece of current token, where 1 means true, so that
  # on-the-fly whole word masking is possible.
  token_boundary = [0] * len(tokens)

  for (i, token) in enumerate(tokens):
    if token == "[CLS]" or token == "[SEP]":
      token_boundary[i] = 1
      continue
    # Whole Word Masking means that if we mask all of the wordpieces
    # corresponding to an original word.
    #
    # Note that Whole Word Masking does *not* change the training code
    # at all -- we still predict each WordPiece independently, softmaxed
    # over the entire vocabulary.
    if (FLAGS.do_whole_word_mask and len(cand_indexes) >= 1 and
        not is_start_piece(token)):
      cand_indexes[-1].append(i)
    else:
      cand_indexes.append([i])
      if is_start_piece(token):
        token_boundary[i] = 1

  output_tokens = list(tokens)

  masked_lm_positions = []
  masked_lm_labels = []

  if masked_lm_prob == 0:
    return (output_tokens, masked_lm_positions,
            masked_lm_labels, token_boundary)

  num_to_predict = min(max_predictions_per_seq,
                       max(1, int(round(len(tokens) * masked_lm_prob))))

  # Note(mingdachen):
  # By default, we set the probilities to favor shorter ngram sequences.
  ngrams = np.arange(1, FLAGS.ngram + 1, dtype=np.int64)
  pvals = 1. / np.arange(1, FLAGS.ngram + 1)
  pvals /= pvals.sum(keepdims=True)

  if not FLAGS.favor_shorter_ngram:
    pvals = pvals[::-1]

  ngram_indexes = []
  for idx in range(len(cand_indexes)):
    ngram_index = []
    for n in ngrams:
      ngram_index.append(cand_indexes[idx:idx+n])
    ngram_indexes.append(ngram_index)

  rng.shuffle(ngram_indexes)

  masked_lms = []
  covered_indexes = set()
  for cand_index_set in ngram_indexes:
    if len(masked_lms) >= num_to_predict:
      break
    if not cand_index_set:
      continue
    # Note(mingdachen):
    # Skip current piece if they are covered in lm masking or previous ngrams.
    for index_set in cand_index_set[0]:
      for index in index_set:
        if index in covered_indexes:
          continue

    n = np.random.choice(ngrams[:len(cand_index_set)],
                         p=pvals[:len(cand_index_set)] /
                         pvals[:len(cand_index_set)].sum(keepdims=True))
    index_set = sum(cand_index_set[n - 1], [])
    n -= 1
    # Note(mingdachen):
    # Repeatedly looking for a candidate that does not exceed the
    # maximum number of predictions by trying shorter ngrams.
    while len(masked_lms) + len(index_set) > num_to_predict:
      if n == 0:
        break
      index_set = sum(cand_index_set[n - 1], [])
      n -= 1
    # If adding a whole-word mask would exceed the maximum number of
    # predictions, then just skip this candidate.
    if len(masked_lms) + len(index_set) > num_to_predict:
      continue
    is_any_index_covered = False
    for index in index_set:
      if index in covered_indexes:
        is_any_index_covered = True
        break
    if is_any_index_covered:
      continue
    for index in index_set:
      covered_indexes.add(index)

      masked_token = None
      # 80% of the time, replace with [MASK]
      if rng.random() < 0.8:
        masked_token = "[MASK]"
      else:
        # 10% of the time, keep original
        if rng.random() < 0.5:
          masked_token = tokens[index]
        # 10% of the time, replace with random word
        else:
          masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]

      output_tokens[index] = masked_token

      masked_lms.append(MaskedLmInstance(index=index, label=tokens[index]))
  assert len(masked_lms) <= num_to_predict

  rng.shuffle(ngram_indexes)

  select_indexes = set()
  if FLAGS.do_permutation:
    for cand_index_set in ngram_indexes:
      if len(select_indexes) >= num_to_predict:
        break
      if not cand_index_set:
        continue
      # Note(mingdachen):
      # Skip current piece if they are covered in lm masking or previous ngrams.
      for index_set in cand_index_set[0]:
        for index in index_set:
          if index in covered_indexes or index in select_indexes:
            continue

      n = np.random.choice(ngrams[:len(cand_index_set)],
                           p=pvals[:len(cand_index_set)] /
                           pvals[:len(cand_index_set)].sum(keepdims=True))
      index_set = sum(cand_index_set[n - 1], [])
      n -= 1

      while len(select_indexes) + len(index_set) > num_to_predict:
        if n == 0:
          break
        index_set = sum(cand_index_set[n - 1], [])
        n -= 1
      # If adding a whole-word mask would exceed the maximum number of
      # predictions, then just skip this candidate.
      if len(select_indexes) + len(index_set) > num_to_predict:
        continue
      is_any_index_covered = False
      for index in index_set:
        if index in covered_indexes or index in select_indexes:
          is_any_index_covered = True
          break
      if is_any_index_covered:
        continue
      for index in index_set:
        select_indexes.add(index)
    assert len(select_indexes) <= num_to_predict

    select_indexes = sorted(select_indexes)
    permute_indexes = list(select_indexes)
    rng.shuffle(permute_indexes)
    orig_token = list(output_tokens)

    for src_i, tgt_i in zip(select_indexes, permute_indexes):
      output_tokens[src_i] = orig_token[tgt_i]
      masked_lms.append(MaskedLmInstance(index=src_i, label=orig_token[src_i]))

  masked_lms = sorted(masked_lms, key=lambda x: x.index)

  for p in masked_lms:
    masked_lm_positions.append(p.index)
    masked_lm_labels.append(p.label)
  return (output_tokens, masked_lm_positions, masked_lm_labels, token_boundary)
Exemplo n.º 33
0
        def verschiebung(self, n):
            r"""
            Return the image of the symmetric function ``self`` under the
            `n`-th Verschiebung operator.

            The `n`-th Verschiebung operator `\mathbf{V}_n` is defined to be
            the unique algebra endomorphism `V` of the ring of symmetric
            functions that satisfies `V(h_r) = h_{r/n}` for every positive
            integer `r` divisible by `n`, and satisfies `V(h_r) = 0` for
            every positive integer `r` not divisible by `n`. This operator
            `\mathbf{V}_n` is a Hopf algebra endomorphism. For every
            nonnegative integer `r` with `n \mid r`, it satisfies

            .. MATH::

                \mathbf{V}_n(h_r) = h_{r/n},
                \quad \mathbf{V}_n(p_r) = n p_{r/n},
                \quad \mathbf{V}_n(e_r) = (-1)^{r - r/n} e_{r/n}

            (where `h` is the complete homogeneous basis, `p` is the
            powersum basis, and `e` is the elementary basis). For every
            nonnegative integer `r` with `n \nmid r`, it satisfes

            .. MATH::

                \mathbf{V}_n(h_r) = \mathbf{V}_n(p_r) = \mathbf{V}_n(e_r) = 0.

            The `n`-th Verschiebung operator is also called the `n`-th
            Verschiebung endomorphism. Its name derives from the Verschiebung
            (German for "shift") endomorphism of the Witt vectors.

            The `n`-th Verschiebung operator is adjoint to the `n`-th
            Frobenius operator (see :meth:`frobenius` for its definition)
            with respect to the Hall scalar product (:meth:`scalar`).

            The action of the `n`-th Verschiebung operator on the Schur basis
            can also be computed explicitly. The following (probably clumsier
            than necessary) description can be obtained by solving exercise
            7.61 in Stanley's [STA]_.

            Let `\lambda` be a partition. Let `n` be a positive integer. If
            the `n`-core of `\lambda` is nonempty, then
            `\mathbf{V}_n(s_\lambda) = 0`. Otherwise, the following method
            computes `\mathbf{V}_n(s_\lambda)`: Write the partition `\lambda`
            in the form `(\lambda_1, \lambda_2, \ldots, \lambda_{ns})` for some
            nonnegative integer `s`. (If `n` does not divide the length of
            `\lambda`, then this is achieved by adding trailing zeroes to
            `\lambda`.) Set `\beta_i = \lambda_i + ns - i` for every
            `s \in \{ 1, 2, \ldots, ns \}`. Then,
            `(\beta_1, \beta_2, \ldots, \beta_{ns})` is a strictly decreasing
            sequence of nonnegative integers. Stably sort the list
            `(1, 2, \ldots, ns)` in order of (weakly) increasing remainder of
            `-1 - \beta_i` modulo `n`. Let `\xi` be the sign of the
            permutation that is used for this sorting. Let `\psi` be the sign
            of the permutation that is used to stably sort the list
            `(1, 2, \ldots, ns)` in order of (weakly) increasing remainder of
            `i - 1` modulo `n`. (Notice that `\psi = (-1)^{n(n-1)s(s-1)/4}`.)
            Then, `\mathbf{V}_n(s_\lambda) = \xi \psi \prod_{i = 0}^{n - 1}
            s_{\lambda^{(i)}}`, where
            `(\lambda^{(0)}, \lambda^{(1)}, \ldots, \lambda^{(n - 1)})`
            is the `n`-quotient of `\lambda`.

            INPUT:

            - ``n`` -- a positive integer

            OUTPUT:

            The result of applying the `n`-th Verschiebung operator (on the ring of
            symmetric functions) to ``self``.

            EXAMPLES::

                sage: Sym = SymmetricFunctions(ZZ)
                sage: s = Sym.s()
                sage: s[5].verschiebung(2)
                0
                sage: s[6].verschiebung(6)
                s[1]
                sage: s[6,3].verschiebung(3)
                s[2, 1] + s[3]
                sage: s[6,3,1].verschiebung(2)
                -s[3, 2]
                sage: s[3,2,1].verschiebung(1)
                s[3, 2, 1]
                sage: s([]).verschiebung(1)
                s[]
                sage: s([]).verschiebung(4)
                s[]

            TESTS:

            Let us check that this method on the powersum basis gives the
            same result as the implementation in sfa.py on the monomial
            basis::

                sage: Sym = SymmetricFunctions(QQ)
                sage: s = Sym.s(); h = Sym.h()
                sage: all( h(s(lam)).verschiebung(3) == h(s(lam).verschiebung(3))
                ....:      for lam in Partitions(6) )
                True
                sage: all( s(h(lam)).verschiebung(2) == s(h(lam).verschiebung(2))
                ....:      for lam in Partitions(4) )
                True
                sage: all( s(h(lam)).verschiebung(5) == s(h(lam).verschiebung(5))
                ....:      for lam in Partitions(10) )
                True
                sage: all( s(h(lam)).verschiebung(2) == s(h(lam).verschiebung(2))
                ....:      for lam in Partitions(8) )
                True
                sage: all( s(h(lam)).verschiebung(3) == s(h(lam).verschiebung(3))
                ....:      for lam in Partitions(12) )
                True
                sage: all( s(h(lam)).verschiebung(3) == s(h(lam).verschiebung(3))
                ....:      for lam in Partitions(9) )
                True
            """
            # Extra hack for the n == 1 case, since lam.quotient(1)
            # (for lam being a partition) returns a partition rather than
            # a partition tuple.
            if n == 1:
                return self

            parent = self.parent()
            s_coords_of_self = self.monomial_coefficients().items()
            result = parent.zero()
            from sage.combinat.permutation import Permutation
            for (lam, coeff) in s_coords_of_self:
                if len(lam.core(n)) == 0:
                    quotient = lam.quotient(n)
                    quotient_prod = parent.prod(parent(part)
                                                for part in quotient)
                    # Now, compute the sign of quotient_prod in the
                    # n-th Verschiebung of lam.
                    len_lam = len(lam)
                    ns = len_lam + ((- len_lam) % n)
                    s = ns // n   # This is actually ns / n, as we have n | ns.
                    beta_list = lam.beta_numbers(ns)
                    zipped_beta_list = sorted(zip(beta_list, range(1, ns + 1)),
                                              key=lambda a: (-1 - a[0]) % n)
                    # We are using the fact that sort is a stable sort.
                    perm_list = [a[1] for a in zipped_beta_list]
                    if Permutation(perm_list).sign() == 1:
                        minus_sign = False
                    else:
                        minus_sign = True
                    if (n * s * (n-1) * (s-1)) % 8 == 4:
                        minus_sign = not minus_sign
                    if minus_sign:
                        result -= coeff * quotient_prod
                    else:
                        result += coeff * quotient_prod
            return result
Exemplo n.º 34
0
import numpy as np
from six.moves import zip
from collections import OrderedDict
from bokeh.plotting import *
from bokeh.objects import HoverTool

TOOLS="pan,wheel_zoom,box_zoom,reset,hover,previewsave"

xx, yy = np.meshgrid(range(0,101,4), range(0,101,4))
x = xx.flatten()
y = yy.flatten()
N = len(x)
inds = [str(i) for i in np.arange(N)]
radii = np.random.random(size=N)*0.4 + 1.7
colors = [
    "#%02x%02x%02x" % (r, g, 150) for r, g in zip(np.floor(50+2*x), np.floor(30+2*y))
]

foo = list(itertools.permutations("abcdef"))[:N]
bar = np.random.normal(size=N)

source = ColumnDataSource(
    data=dict(
        x=x,
        y=y,
        radius=radii,
        colors=colors,
        foo=foo,
        bar=bar,
    )
)
Exemplo n.º 35
0
    def rate(self,
             rating_groups,
             ranks=None,
             weights=None,
             min_delta=DELTA,
             partial_weights=None):
        """Recalculates ratings by the ranking table::

           env = TrueSkill()  # uses default settings
           # create ratings
           r1 = env.create_rating(42.222)
           r2 = env.create_rating(89.999)
           # calculate new ratings
           rating_groups = [(r1,), (r2,)]
           rated_rating_groups = env.rate(rating_groups, ranks=[0, 1])
           # save new ratings
           (r1,), (r2,) = rated_rating_groups

        ``rating_groups`` is a list of rating tuples or dictionaries that
        represents each team of the match.  You will get a result as same
        structure as this argument.  Rating dictionaries for this may be useful
        to choose specific player's new rating::

           # load players from the database
           p1 = load_player_from_database('Arpad Emrick Elo')
           p2 = load_player_from_database('Mark Glickman')
           p3 = load_player_from_database('Heungsub Lee')
           # calculate new ratings
           rating_groups = [{p1: p1.rating, p2: p2.rating}, {p3: p3.rating}]
           rated_rating_groups = env.rate(rating_groups, ranks=[0, 1])
           # save new ratings
           for player in [p1, p2, p3]:
               player.rating = rated_rating_groups[player.team][player]

        :param rating_groups: a list of tuples or dictionaries containing
                              :class:`Rating` objects.
        :param ranks: a ranking table.  By default, it is same as the order of
                      the ``rating_groups``.
        :param weights: weights of each players for "partial play".
        :param min_delta: each loop checks a delta of changes and the loop
                          will stop if the delta is less then this argument.
        :returns: recalculated ratings same structure as ``rating_groups``.
        :raises: :exc:`FloatingPointError` occurs when winners have too lower
                 rating than losers.  higher floating-point precision couls
                 solve this error.  set the backend to "mpmath".

        .. versionadded:: 0.2

        """

        rating_groups, keys = self.validate_rating_groups(rating_groups)
        weights = self.validate_weights(weights, rating_groups, keys)
        group_size = len(rating_groups)
        if ranks is None:
            ranks = range(group_size)
        elif len(ranks) != group_size:
            raise ValueError('Wrong ranks')
        # sort rating groups by rank
        by_rank = lambda x: x[1][1]
        sorting = sorted(enumerate(zip(rating_groups, ranks, weights)),
                         key=by_rank)
        sorted_rating_groups, sorted_ranks, sorted_weights = [], [], []
        for x, (g, r, w) in sorting:
            sorted_rating_groups.append(g)
            sorted_ranks.append(r)
            # make weights to be greater than 0
            sorted_weights.append(max(min_delta, w_) for w_ in w)
        # build factor graph
        args = (sorted_rating_groups, sorted_ranks, sorted_weights)
        builders = self.factor_graph_builders(*args)
        args = builders + (min_delta, )
        layers = self.run_schedule(*args)
        # make result
        rating_layer, team_sizes = layers[0], _team_sizes(sorted_rating_groups)
        transformed_groups = []
        for start, end in zip([0] + team_sizes[:-1], team_sizes):
            group = []
            for f in rating_layer[start:end]:
                group.append(Rating(float(f.var.mu), float(f.var.sigma)))
            transformed_groups.append(tuple(group))
        by_hint = lambda x: x[0]
        unsorting = sorted(zip((x for x, __ in sorting), transformed_groups),
                           key=by_hint)
        if keys is None:
            return [g for x, g in unsorting]

        # restore the structure with input dictionary keys
        return [dict(zip(keys[x], g)) for x, g in unsorting]
Exemplo n.º 36
0
 def build_perf_layer():
     for rating_var, perf_var in zip(rating_vars, perf_vars):
         yield LikelihoodFactor(rating_var, perf_var, self.beta**2)
Exemplo n.º 37
0
 def build_rating_layer():
     for rating_var, rating in zip(rating_vars, flatten_ratings):
         yield PriorFactor(rating_var, rating, self.tau)
def multi_conv_res(x, padding, name, layers, hparams, mask=None, source=None):
    """A stack of separable convolution blocks with residual connections."""
    with tf.variable_scope(name):
        padding_bias = None
        if mask is not None:
            padding_bias = (1.0 -
                            mask) * -1e9  # Bias to not attend to padding.
            if padding == "LEFT":  # Do not mask anything when left-padding.
                mask = None
        if (hparams.kernel_scheme in _KERNEL_SCHEMES
                and hparams.dilation_scheme in _DILATION_SCHEMES):
            kernels = _KERNEL_SCHEMES[hparams.kernel_scheme]
            dilations = _DILATION_SCHEMES[hparams.dilation_scheme]
            dilations_and_kernels = list(zip(dilations, kernels))
            dilations_and_kernels1 = dilations_and_kernels[:2]
            dilations_and_kernels2 = dilations_and_kernels[2:]
        else:
            k = (hparams.kernel_height, hparams.kernel_width)
            k2 = (hparams.large_kernel_size, 1)
            dilations_and_kernels1 = [((1, 1), k), ((1, 1), k)]
            dilations_and_kernels2 = [((1, 1), k2), ((4, 4), k2)]
        separabilities1 = [hparams.separability, hparams.separability]
        separabilities2 = [hparams.separability] * len(dilations_and_kernels2)
        if hparams.separability < 0:
            separabilities1 = [hparams.separability - 1, hparams.separability]
            separabilities2 = [
                hparams.separability - i
                for i in reversed(range(len(dilations_and_kernels2)))
            ]

        def norm_fn(x, name):
            with tf.variable_scope(name, default_name="norm"):
                return common_layers.apply_norm(x, hparams.norm_type,
                                                hparams.hidden_size,
                                                hparams.norm_epsilon)

        for layer in xrange(layers):
            with tf.variable_scope("layer_%d" % layer):
                y = common_layers.subseparable_conv_block(
                    x,
                    hparams.hidden_size,
                    dilations_and_kernels1,
                    normalizer_fn=norm_fn,
                    padding=padding,
                    mask=mask,
                    separabilities=separabilities1,
                    name="residual1")
                x += common_layers.subseparable_conv_block(
                    x + y,
                    hparams.hidden_size,
                    dilations_and_kernels2,
                    normalizer_fn=norm_fn,
                    padding=padding,
                    mask=mask,
                    separabilities=separabilities2,
                    name="residual2") + y
                if source is not None and hparams.attention_type != "none":
                    x += attention(x,
                                   source,
                                   norm_fn,
                                   hparams,
                                   bias=padding_bias)
                if mask is not None:
                    x *= mask
        return tf.nn.dropout(x, 1.0 - hparams.dropout)
Exemplo n.º 39
0
 def _check_shape(self, shape):
     for s1, s2 in zip(self.shape, shape):
         if s1 != s2 and s1 >= 0 and s2 >= 0:
             raise ValueError(
                 "Shape not match. What is defined in data layer is {}, but receive {}"
                 .format(self.shape, shape))
Exemplo n.º 40
0
def make_multisource_episode_pipeline(dataset_spec_list,
                                      use_dag_ontology_list,
                                      use_bilevel_ontology_list,
                                      split,
                                      episode_descr_config,
                                      pool=None,
                                      shuffle_buffer_size=None,
                                      read_buffer_size_bytes=None,
                                      num_prefetch=0,
                                      image_size=None,
                                      num_to_take=None):
    """Returns a pipeline emitting data from multiple sources as Episodes.

  Each episode only contains data from one single source. For each episode, its
  source is sampled uniformly across all sources.

  Args:
    dataset_spec_list: A list of DatasetSpecification, one for each source.
    use_dag_ontology_list: A list of Booleans, one for each source: whether to
      use that source's DAG-structured ontology to sample episode classes.
    use_bilevel_ontology_list: A list of Booleans, one for each source: whether
      to use that source's bi-level ontology to sample episode classes.
    split: A learning_spec.Split object identifying the sources split. It is the
      same for all datasets.
    episode_descr_config: An instance of EpisodeDescriptionConfig containing
      parameters relating to sampling shots and ways for episodes.
    pool: String (optional), for example-split datasets, which example split to
      use ('train', 'valid', or 'test'), used at meta-test time only.
    shuffle_buffer_size: int or None, shuffle buffer size for each Dataset.
    read_buffer_size_bytes: int or None, buffer size for each TFRecordDataset.
    num_prefetch: int, the number of examples to prefetch for each class of each
      dataset. Prefetching occurs just after the class-specific Dataset object
      is constructed. If < 1, no prefetching occurs.
    image_size: int, desired image size used during decoding.
    num_to_take: Optional, a list specifying for each dataset the number of
      examples per class to restrict to (for this given split). If provided, its
      length must be the same as len(dataset_spec). If None, no restrictions are
      applied to any dataset and all data per class is used.

  Returns:
    A Dataset instance that outputs fully-assembled and decoded episodes.
  """
    if pool is not None:
        if not data.POOL_SUPPORTED:
            raise NotImplementedError(
                'Example-level splits or pools not supported.')
    if num_to_take is not None and len(num_to_take) != len(dataset_spec_list):
        raise ValueError('num_to_take does not have the same length as '
                         'dataset_spec_list.')
    if num_to_take is None:
        num_to_take = [-1] * len(dataset_spec_list)
    sources = []
    for (dataset_spec, use_dag_ontology, use_bilevel_ontology,
         num_to_take_for_dataset) in zip(dataset_spec_list,
                                         use_dag_ontology_list,
                                         use_bilevel_ontology_list,
                                         num_to_take):
        episode_reader = reader.EpisodeReader(dataset_spec, split,
                                              shuffle_buffer_size,
                                              read_buffer_size_bytes,
                                              num_prefetch,
                                              num_to_take_for_dataset)
        sampler = sampling.EpisodeDescriptionSampler(
            episode_reader.dataset_spec,
            split,
            episode_descr_config,
            pool=pool,
            use_dag_hierarchy=use_dag_ontology,
            use_bilevel_hierarchy=use_bilevel_ontology)
        dataset = episode_reader.create_dataset_input_pipeline(sampler,
                                                               pool=pool)
        sources.append(dataset)

    # Sample uniformly among sources
    dataset = tf.data.experimental.sample_from_datasets(sources)

    # Episodes coming out of `dataset` contain flushed examples and are internally
    # padded with dummy examples. `process_episode` discards flushed examples,
    # splits the episode into support and query sets, removes the dummy examples
    # and decodes the example strings.
    chunk_sizes = sampler.compute_chunk_sizes()
    map_fn = functools.partial(process_episode,
                               chunk_sizes=chunk_sizes,
                               image_size=image_size)
    dataset = dataset.map(map_fn)

    # Overlap episode processing and training.
    dataset = dataset.prefetch(1)
    return dataset
    def _detect_number_from_words(self, number_list=None, original_list=None):
        """
        Detect numbers from number words, for example - "two thousand", "One hundred twenty two".
        How it works?
            First it splits the text checking if any of '-' or ':' is present in text, and pass the split text
            to number word detector, which return the number value and original word from which it is being detected.
            Further we check for unit in suffix and prefix of original string and update that if any units are found.
        Args:
            number_list (list): list containing detected numeric text
            original_list (list): list containing original numeral text
        Returns:
            number_list (list): list containing updated detected numeric text
            original_list (list): list containing updated original numeral text

        Examples:
            [In]  >>  self.processed_text = "One hundred two"
            [In]  >>  _detect_number_from_numerals()
            [Out] >> ([{'value': '102', 'unit': None}], ['one hundred two two'])

            [In]  >>  self.processed_text = "two hundred - three hundred"
            [In]  >>  _detect_number_from_numerals()
            [Out] >> ([{'value': '200', 'unit': None}, {'value': '300', 'unit': None}],
                      ['two hundred', 'three hundred'])

            [In]  >>  self.processed_text = "one two three"
            [In]  >>  _detect_number_from_numerals()
            [Out] >> ([{'value': '2', 'unit': None}, {'value': '2', 'unit': None}, {'value': '3', 'unit': None}],
                      ['one', 'two', 'three'])

            *Notes*
                Some Limitations:
                i) Cannot detect decimals without the integer part. E.g. .25, .5, etc
                ii) Cannot detect one with "a/an". E.g. I want an apple
                iii) Detects wrong for multiple scales mentioned consecutively E.g. three hundred thousand,
                     hundred thousand
        """
        number_list = number_list or []
        original_list = original_list or []

        # Splitting text based on "-" and ":",  as in case of text "two thousand-three thousand", simple splitting
        # will give list as [two, thousand-three, thousand], result in number word detector giving wrong result,
        # hence we need to separate them into [two thousand, three thousand] using '-' or ':' as split char
        numeral_text_list = re.split(r'[\-\:]', self.processed_text)
        for numeral_text in numeral_text_list:
            numbers, original_texts = get_number_from_number_word(
                numeral_text, self.numbers_word_map)
            full_list = list(zip(numbers, original_texts))
            sorted_full_list = sorted(full_list,
                                      key=lambda kv: len(kv[1]),
                                      reverse=True)
            for number, original_text in sorted_full_list:
                unit = None
                if self.unit_type:
                    unit, original_text = self._get_unit_from_text(
                        original_text, numeral_text)
                _pattern = re.compile(self._SPAN_BOUNDARY_TEMPLATE.format(
                    re.escape(original_text)),
                                      flags=_re_flags)
                if _pattern.search(numeral_text):
                    numeral_text = _pattern.sub(self.tag, numeral_text, 1)
                    number_list.append({
                        NUMBER_DETECTION_RETURN_DICT_VALUE:
                        str(number),
                        NUMBER_DETECTION_RETURN_DICT_UNIT:
                        unit
                    })
                    original_list.append(original_text)
        return number_list, original_list
Exemplo n.º 42
0
def static_or_dynamic_map_fn(fn,
                             elems,
                             dtype=None,
                             parallel_iterations=32,
                             back_prop=True):
    """Runs map_fn as a (static) for loop when possible.

  This function rewrites the map_fn as an explicit unstack input -> for loop
  over function calls -> stack result combination.  This allows our graphs to
  be acyclic when the batch size is static.
  For comparison, see https://www.tensorflow.org/api_docs/python/tf/map_fn.

  Note that `static_or_dynamic_map_fn` currently is not *fully* interchangeable
  with the default tf.map_fn function as it does not accept nested inputs (only
  Tensors or lists of Tensors).  Likewise, the output of `fn` can only be a
  Tensor or list of Tensors.

  TODO(jonathanhuang): make this function fully interchangeable with tf.map_fn.

  Args:
    fn: The callable to be performed. It accepts one argument, which will have
      the same structure as elems. Its output must have the
      same structure as elems.
    elems: A tensor or list of tensors, each of which will
      be unpacked along their first dimension. The sequence of the
      resulting slices will be applied to fn.
    dtype:  (optional) The output type(s) of fn. If fn returns a structure of
      Tensors differing from the structure of elems, then dtype is not optional
      and must have the same structure as the output of fn.
    parallel_iterations: (optional) number of batch items to process in
      parallel.  This flag is only used if the native tf.map_fn is used
      and defaults to 32 instead of 10 (unlike the standard tf.map_fn default).
    back_prop: (optional) True enables support for back propagation.
      This flag is only used if the native tf.map_fn is used.

  Returns:
    A tensor or sequence of tensors. Each tensor packs the
    results of applying fn to tensors unpacked from elems along the first
    dimension, from first to last.
  Raises:
    ValueError: if `elems` a Tensor or a list of Tensors.
    ValueError: if `fn` does not return a Tensor or list of Tensors
  """
    if isinstance(elems, list):
        for elem in elems:
            if not isinstance(elem, tf.Tensor):
                raise ValueError(
                    '`elems` must be a Tensor or list of Tensors.')

        elem_shapes = [elem.shape.as_list() for elem in elems]
        # Fall back on tf.map_fn if shapes of each entry of `elems` are None or fail
        # to all be the same size along the batch dimension.
        for elem_shape in elem_shapes:
            if (not elem_shape or not elem_shape[0]
                    or elem_shape[0] != elem_shapes[0][0]):
                return tf.map_fn(fn, elems, dtype, parallel_iterations,
                                 back_prop)
        arg_tuples = zip(*[tf.unstack(elem) for elem in elems])
        outputs = [fn(arg_tuple) for arg_tuple in arg_tuples]
    else:
        if not isinstance(elems, tf.Tensor):
            raise ValueError('`elems` must be a Tensor or list of Tensors.')
        elems_shape = elems.shape.as_list()
        if not elems_shape or not elems_shape[0]:
            return tf.map_fn(fn, elems, dtype, parallel_iterations, back_prop)
        outputs = [fn(arg) for arg in tf.unstack(elems)]
    # Stack `outputs`, which is a list of Tensors or list of lists of Tensors
    if all([isinstance(output, tf.Tensor) for output in outputs]):
        return tf.stack(outputs)
    else:
        if all([isinstance(output, list) for output in outputs]):
            if all([
                    all([
                        isinstance(entry, tf.Tensor) for entry in output_list
                    ]) for output_list in outputs
            ]):
                return [
                    tf.stack(output_tuple) for output_tuple in zip(*outputs)
                ]
    raise ValueError('`fn` should return a Tensor or a list of Tensors.')
Exemplo n.º 43
0
def run(argv):
    comm.Barrier()
    start_time = MPI.Wtime()
    #broadcast parameters
    if rank == 0:
        iparams, txt_out_input = process_input(argv)
        iparams.flag_volume_correction = False
        iparams.flag_hush = True
        print(txt_out_input)
        frame_files = read_pickles(iparams.data)
    else:
        iparams = None
        frame_files = None
    comm.Barrier()
    #assign scaling task
    if rank == 0:
        master(frame_files, iparams, "scale")
        result = []
    else:
        result = client()
    result = comm.gather(result, root=0)
    comm.Barrier()
    #pre-merge task
    if rank == 0:
        results = sum(result, [])
        print("Scaling is done on %d cores for %d frames" %
              (size, len(results)))
        master(results, iparams, "pre_merge")
        result = []
    else:
        result = client()
    result = comm.gather(result, root=0)
    comm.Barrier()
    #merge task
    if rank == 0:
        print("Pre-merge is done on %d cores" % (len(result)))
        master(result, iparams, "merge")
        result = []
    else:
        result = client()
    #finalize merge
    result = comm.gather(result, root=0)
    comm.Barrier()
    if rank == 0:
        print("Merge completed on %d cores" % (len(result)))
        results = sum(result, [])
        mdh = merge_data_handler()
        txt_out_rejection = ""
        for _mdh, _txt_out_rejection in results:
            mdh.extend(_mdh)
            txt_out_rejection += _txt_out_rejection
        #selet only indices with non-Inf non-Nan stats
        selections = flex.bool([
            False if (math.isnan(r0) or math.isinf(r0) or math.isnan(r1)
                      or math.isinf(r1)) else True
            for r0, r1 in zip(mdh.r_meas_div, mdh.r_meas_divisor)
        ])
        mdh.reduce_by_selection(selections)
        its = intensities_scaler()
        mdh, txt_merge_mean_table = its.write_output(mdh, iparams, 'test',
                                                     'average')
        print(txt_merge_mean_table)
    #collect time profile
    comm.Barrier()
    end_time = MPI.Wtime()
    txt_time = 'Elapsed Time (s):%10.2f\n' % (end_time - start_time)
    #write log output
    if rank == 0:
        print(txt_time)
        with open(os.path.join(iparams.run_no, 'log.txt'), 'w') as f:
            f.write(txt_out_input + txt_merge_mean_table + txt_time)
        with open(os.path.join(iparams.run_no, 'rejections.txt'), 'w') as f:
            f.write(txt_out_rejection)
    MPI.Finalize()
Exemplo n.º 44
0
    def sample(self, bqm):
        """Sample from a binary quadratic model.

        Args:
            bqm (:obj:`~dimod.BinaryQuadraticModel`):
                Binary quadratic model to be sampled from.

        Returns:
            :obj:`~dimod.Response`: A `dimod` :obj:`.~dimod.Response` object.


        Examples:
            This example provides samples for a two-variable Ising model.

            >>> import dimod
            ...
            >>> sampler = dimod.ExactSolver()
            >>> bqm = dimod.BinaryQuadraticModel({0: 0.0, 1: 1.0}, {(0, 1): 0.5}, -0.5, dimod.SPIN)
            >>> response = sampler.sample(bqm)
            >>> response.data_vectors['energy']
            array([-1., -2.,  1.,  0.])

        """
        M = bqm.binary.to_numpy_matrix()
        off = bqm.binary.offset

        if M.shape == (0, 0):
            return Response.from_samples([], {'energy': []}, {}, bqm.vartype)

        sample = np.zeros((len(bqm),), dtype=bool)

        # now we iterate, flipping one bit at a time until we have
        # traversed all samples. This is a Gray code.
        # https://en.wikipedia.org/wiki/Gray_code
        def iter_samples():
            sample = np.zeros((len(bqm)), dtype=bool)
            energy = 0.0

            yield sample.copy(), energy + off

            for i in range(1, 1 << len(bqm)):
                v = _ffs(i)

                # flip the bit in the sample
                sample[v] = not sample[v]

                # for now just calculate the energy, but there is a more clever way by calculating
                # the energy delta for the single bit flip, don't have time, pull requests
                # appreciated!
                energy = sample.dot(M).dot(sample.transpose())

                yield sample.copy(), float(energy) + off

        samples, energies = zip(*iter_samples())

        response = Response.from_samples(np.array(samples, dtype='int8'), {'energy': energies}, {},
                                         vartype=Vartype.BINARY)

        # make sure the response matches the given vartype, in-place.
        response.change_vartype(bqm.vartype, inplace=True)

        return response
    def get_loss(self, cate_preds, kernel_preds, ins_pred, ins_labels,
                 cate_labels, grid_order_list, fg_num):
        """
        Get loss of network of SOLOv2.

        Args:
            cate_preds (list): Tensor list of categroy branch output.
            kernel_preds (list): Tensor list of kernel branch output.
            ins_pred (list): Tensor list of instance branch output.
            ins_labels (list): List of instance labels pre batch.
            cate_labels (list): List of categroy labels pre batch.
            grid_order_list (list): List of index in pre grid.
            fg_num (int): Number of positive samples in a mini-batch.
        Returns:
            loss_ins (Tensor): The instance loss Tensor of SOLOv2 network.
            loss_cate (Tensor): The category loss Tensor of SOLOv2 network.
        """
        batch_size = paddle.shape(grid_order_list[0])[0]
        ins_pred_list = []
        for kernel_preds_level, grid_orders_level in zip(
                kernel_preds, grid_order_list):
            if grid_orders_level.shape[1] == 0:
                ins_pred_list.append(None)
                continue
            grid_orders_level = paddle.reshape(grid_orders_level, [-1])
            reshape_pred = paddle.reshape(
                kernel_preds_level,
                shape=(paddle.shape(kernel_preds_level)[0],
                       paddle.shape(kernel_preds_level)[1], -1))
            reshape_pred = paddle.transpose(reshape_pred, [0, 2, 1])
            reshape_pred = paddle.reshape(
                reshape_pred, shape=(-1, paddle.shape(reshape_pred)[2]))
            gathered_pred = paddle.gather(reshape_pred,
                                          index=grid_orders_level)
            gathered_pred = paddle.reshape(
                gathered_pred,
                shape=[batch_size, -1,
                       paddle.shape(gathered_pred)[1]])
            cur_ins_pred = ins_pred
            cur_ins_pred = paddle.reshape(cur_ins_pred,
                                          shape=(paddle.shape(cur_ins_pred)[0],
                                                 paddle.shape(cur_ins_pred)[1],
                                                 -1))
            ins_pred_conv = paddle.matmul(gathered_pred, cur_ins_pred)
            cur_ins_pred = paddle.reshape(ins_pred_conv,
                                          shape=(-1,
                                                 paddle.shape(ins_pred)[-2],
                                                 paddle.shape(ins_pred)[-1]))
            ins_pred_list.append(cur_ins_pred)

        num_ins = paddle.sum(fg_num)
        cate_preds = [
            paddle.reshape(paddle.transpose(cate_pred, [0, 2, 3, 1]),
                           shape=(-1, self.cate_out_channels))
            for cate_pred in cate_preds
        ]
        flatten_cate_preds = paddle.concat(cate_preds)
        new_cate_labels = []
        for cate_label in cate_labels:
            new_cate_labels.append(paddle.reshape(cate_label, shape=[-1]))
        cate_labels = paddle.concat(new_cate_labels)

        loss_ins, loss_cate = self.solov2_loss(ins_pred_list, ins_labels,
                                               flatten_cate_preds, cate_labels,
                                               num_ins)

        return {'loss_ins': loss_ins, 'loss_cate': loss_cate}
Exemplo n.º 46
0
def load_imdb_dataset(path='data/imdb/',
                      nb_words=None,
                      skip_top=0,
                      maxlen=None,
                      test_split=0.2,
                      seed=113,
                      start_char=1,
                      oov_char=2,
                      index_from=3):
    """Load IMDB dataset

    Parameters
    ----------
    path : : string
        Path to download data to, defaults to data/imdb/

    Examples
    --------
    >>> X_train, y_train, X_test, y_test = tl.files.load_imbd_dataset(
    ...                                 nb_words=20000, test_split=0.2)
    >>> print('X_train.shape', X_train.shape)
    ... (20000,)  [[1, 62, 74, ... 1033, 507, 27],[1, 60, 33, ... 13, 1053, 7]..]
    >>> print('y_train.shape', y_train.shape)
    ... (20000,)  [1 0 0 ..., 1 0 1]

    References
    -----------
    - `Modified from keras. <https://github.com/fchollet/keras/blob/master/keras/datasets/imdb.py>`_
    """

    filename = "imdb.pkl"
    url = 'https://s3.amazonaws.com/text-datasets/'
    maybe_download_and_extract(filename, path, url)

    if filename.endswith(".gz"):
        f = gzip.open(os.path.join(path, filename), 'rb')
    else:
        f = open(os.path.join(path, filename), 'rb')

    X, labels = cPickle.load(f)
    f.close()

    np.random.seed(seed)
    np.random.shuffle(X)
    np.random.seed(seed)
    np.random.shuffle(labels)

    if start_char is not None:
        X = [[start_char] + [w + index_from for w in x] for x in X]
    elif index_from:
        X = [[w + index_from for w in x] for x in X]

    if maxlen:
        new_X = []
        new_labels = []
        for x, y in zip(X, labels):
            if len(x) < maxlen:
                new_X.append(x)
                new_labels.append(y)
        X = new_X
        labels = new_labels
    if not X:
        raise Exception('After filtering for sequences shorter than maxlen=' +
                        str(maxlen) + ', no sequence was kept. '
                        'Increase maxlen.')
    if not nb_words:
        nb_words = max([max(x) for x in X])

    # by convention, use 2 as OOV word
    # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV)
    if oov_char is not None:
        X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x]
             for x in X]
    else:
        nX = []
        for x in X:
            nx = []
            for w in x:
                if (w >= nb_words or w < skip_top):
                    nx.append(w)
            nX.append(nx)
        X = nX

    X_train = np.array(X[:int(len(X) * (1 - test_split))])
    y_train = np.array(labels[:int(len(X) * (1 - test_split))])

    X_test = np.array(X[int(len(X) * (1 - test_split)):])
    y_test = np.array(labels[int(len(X) * (1 - test_split)):])

    return X_train, y_train, X_test, y_test
Exemplo n.º 47
0
    def get_points_in_sphere(self, frac_points, center, r, zip_results=True):
        """
        Find all points within a sphere from the point taking into account
        periodic boundary conditions. This includes sites in other periodic
        images.

        Algorithm:

        1. place sphere of radius r in crystal and determine minimum supercell
           (parallelpiped) which would contain a sphere of radius r. for this
           we need the projection of a_1 on a unit vector perpendicular
           to a_2 & a_3 (i.e. the unit vector in the direction b_1) to
           determine how many a_1"s it will take to contain the sphere.

           Nxmax = r * length_of_b_1 / (2 Pi)

        2. keep points falling within r.

        Args:
            frac_points: All points in the lattice in fractional coordinates.
            center: Cartesian coordinates of center of sphere.
            r: radius of sphere.
            zip_results (bool): Whether to zip the results together to group by
                 point, or return the raw fcoord, dist, index arrays

        Returns:
            if zip_results:
                [(fcoord, dist, index) ...] since most of the time, subsequent
                processing requires the distance.
            else:
                fcoords, dists, inds
        """
        recp_len = np.array(self.reciprocal_lattice.abc)
        sr = r + 0.15
        nmax = sr * recp_len / (2 * math.pi)
        pcoords = self.get_fractional_coords(center)
        floor = math.floor

        n = len(frac_points)
        fcoords = np.array(frac_points)
        pts = np.tile(center, (n, 1))
        indices = np.array(list(range(n)))

        arange = np.arange(start=int(floor(pcoords[0] - nmax[0])),
                           stop=int(floor(pcoords[0] + nmax[0])) + 1)
        brange = np.arange(start=int(floor(pcoords[1] - nmax[1])),
                           stop=int(floor(pcoords[1] + nmax[1])) + 1)
        crange = np.arange(start=int(floor(pcoords[2] - nmax[2])),
                           stop=int(floor(pcoords[2] + nmax[2])) + 1)

        arange = arange[:, None] * np.array([1, 0, 0])[None, :]
        brange = brange[:, None] * np.array([0, 1, 0])[None, :]
        crange = crange[:, None] * np.array([0, 0, 1])[None, :]

        images = arange[:, None, None] + brange[None, :, None] +\
            crange[None, None, :]

        shifted_coords = fcoords[:, None, None, None, :] + \
            images[None, :, :, :, :]
        coords = self.get_cartesian_coords(shifted_coords)
        dists = np.sqrt(
            np.sum((coords - pts[:, None, None, None, :])**2, axis=4))
        within_r = np.where(dists <= r)
        if zip_results:
            return list(
                zip(shifted_coords[within_r], dists[within_r],
                    indices[within_r[0]]))
        else:
            return shifted_coords[within_r], dists[within_r], \
                indices[within_r[0]]
Exemplo n.º 48
0
def split_grid(grid, num_cores):
    """Split the grid into blocks of vertices.

    Take the overall `grid` for the system and split it into lists of cube
    vertices that can be distributed to each core.

    Parameters
    ----------
    grid : numpy.array
        3D array
    num_cores : int
        number of partitions to generate

    Returns
    -------
    list_dictionaries_for_cores : list of dict
    total_cubes : int
    num_sheets : int
    delta_array_shape : tuple

    """
    # unpack the x,y,z mgrid arrays
    x, y, z = grid
    num_z_values = z.shape[-1]
    num_sheets = z.shape[0]
    delta_array_shape = tuple(
        [n - 1 for n in x.shape]
    )  # the final target shape for return delta arrays is n-1 in each dimension

    ordered_list_per_sheet_x_values = []
    for x_sheet in x:  # each x_sheet should have shape (25,23) and the same x value in each element
        array_all_x_values_current_sheet = x_sheet.flatten()
        ordered_list_per_sheet_x_values.append(
            array_all_x_values_current_sheet)
    ordered_list_per_sheet_y_values = []
    for y_columns in y:
        array_all_y_values_current_sheet = y_columns.flatten()
        ordered_list_per_sheet_y_values.append(
            array_all_y_values_current_sheet)
    ordered_list_per_sheet_z_values = []
    for z_slices in z:
        array_all_z_values_current_sheet = z_slices.flatten()
        ordered_list_per_sheet_z_values.append(
            array_all_z_values_current_sheet)

    ordered_list_cartesian_coordinates_per_sheet = []
    for x_sheet_coords, y_sheet_coords, z_sheet_coords in zip(
            ordered_list_per_sheet_x_values, ordered_list_per_sheet_y_values,
            ordered_list_per_sheet_z_values):
        ordered_list_cartesian_coordinates_per_sheet.append(
            list(zip(x_sheet_coords, y_sheet_coords, z_sheet_coords)))
    array_ordered_cartesian_coords_per_sheet = np.array(
        ordered_list_cartesian_coordinates_per_sheet)
    #now I'm going to want to build cubes in an ordered fashion, and in such a way that I can track the index /
    # centroid of each cube for domain decomposition / reconstruction and mayavi mlab.flow() input
    #cubes will be formed from N - 1 base sheets combined with subsequent sheets
    current_base_sheet = 0
    dictionary_cubes_centroids_indices = {}
    cube_counter = 0
    while current_base_sheet < num_sheets - 1:
        current_base_sheet_array = array_ordered_cartesian_coords_per_sheet[
            current_base_sheet]
        current_top_sheet_array = array_ordered_cartesian_coords_per_sheet[
            current_base_sheet +
            1]  # the points of the sheet 'to the right' in the grid
        current_index = 0
        while current_index < current_base_sheet_array.shape[0] - num_z_values:
            # iterate through all the indices in each of the sheet arrays (careful to avoid extra
            # points not needed for cubes)
            column_z_level = 0  # start at the bottom of a given 4-point column and work up
            while column_z_level < num_z_values - 1:
                current_list_cube_vertices = []
                first_two_vertices_base_sheet = current_base_sheet_array[
                    current_index:current_index + 2, ...].tolist()
                first_two_vertices_top_sheet = current_top_sheet_array[
                    current_index:current_index + 2, ...].tolist()
                next_two_vertices_base_sheet = current_base_sheet_array[
                    current_index + num_z_values:2 + num_z_values +
                    current_index, ...].tolist()
                next_two_vertices_top_sheet = current_top_sheet_array[
                    current_index + num_z_values:2 + num_z_values +
                    current_index, ...].tolist()
                for vertex_set in [
                        first_two_vertices_base_sheet,
                        first_two_vertices_top_sheet,
                        next_two_vertices_base_sheet,
                        next_two_vertices_top_sheet
                ]:
                    current_list_cube_vertices.extend(vertex_set)
                vertex_array = np.array(current_list_cube_vertices)
                assert vertex_array.shape == (
                    8, 3), "vertex_array has incorrect shape"
                cube_centroid = np.average(
                    np.array(current_list_cube_vertices), axis=0)
                dictionary_cubes_centroids_indices[cube_counter] = {
                    'centroid': cube_centroid,
                    'vertex_list': current_list_cube_vertices
                }
                cube_counter += 1
                current_index += 1
                column_z_level += 1
                if column_z_level == num_z_values - 1:  # the loop will break but I should also increment the
                    # current_index
                    current_index += 1
        current_base_sheet += 1
    total_cubes = len(dictionary_cubes_centroids_indices)

    #produce an array of pseudo cube indices (actually the dictionary keys which are cube numbers in string format):
    pseudo_cube_indices = np.arange(0, total_cubes)
    sublist_of_cube_indices_per_core = np.array_split(pseudo_cube_indices,
                                                      num_cores)
    #now, the split of pseudoindices seems to work well, and the above sublist_of_cube_indices_per_core is a list of
    # arrays of cube numbers / keys in the original dictionary
    #now I think I'll try to produce a list of dictionaries that each contain their assigned cubes based on the above
    #  per core split
    list_dictionaries_for_cores = []
    subdictionary_counter = 0
    for array_cube_indices in sublist_of_cube_indices_per_core:
        current_core_dictionary = {}
        items_to_pop = len(array_cube_indices)
        items_popped = 0
        while items_popped < items_to_pop:
            key, value = dictionary_cubes_centroids_indices.popitem()
            current_core_dictionary.update({key: value})
            items_popped += 1
        list_dictionaries_for_cores.append(current_core_dictionary)
        subdictionary_counter += 1
    return list_dictionaries_for_cores, total_cubes, num_sheets, delta_array_shape
Exemplo n.º 49
0
 def set_state(self, value_list):
     assert len(self.updates) == len(value_list)
     for u, v in zip(self.updates, value_list):
         K.set_value(u[0], v)
Exemplo n.º 50
0
 def _CompareWeights(self, lhs, rhs):
   self.assertEqual(len(lhs), len(rhs))
   for lw, rw in zip(lhs, rhs):
     self.assertAllEqual(lw, rw)
Exemplo n.º 51
0
def get_pbmodel(pbnames, model, pbfile=None, mag_type=None, mag_zero=0.):
    """
    Converts passband names ``pbnames`` into passband models based on the
    mapping of name to ``pysynphot`` ``obsmode`` strings in ``pbfile``.

    Parameters
    ----------
    pbnames : array-like
        List of passband names to get throughput models for Each name is
        resolved by first looking in ``pbfile`` (if provided) If an entry is
        found, that entry is treated as an ``obsmode`` for pysynphot. If the
        entry cannot be treated as an ``obsmode,`` we attempt to treat as an
        ASCII file. If neither is possible, an error is raised.
    model : :py:class:`WDmodel.WDmodel.WDmodel` instance
        The DA White Dwarf SED model generator
        All the passbands are interpolated onto the wavelengths of the SED
        model.
    pbfile : str, optional
        Filename containing mapping between ``pbnames`` and ``pysynphot``
        ``obsmode`` string, as well as the standard that has 0 magnitude in the
        system (either ''Vega'' or ''AB''). The ``obsmode`` may also be the
        fullpath to a file that is readable by ``pysynphot``
    mag_type : str, optional
        One of ''vegamag'' or ''abmag''
        Used to specify the standard that has mag_zero magnitude in the passband.
        If ``magsys`` is specified in ``pbfile,`` that overrides this option.
        Must be the same for all passbands listed in ``pbname`` that do not
        have ``magsys`` specified in ``pbfile``
        If ``pbnames`` require multiple ``mag_type``, concatentate the output.
    mag_zero : float, optional
        Magnitude of the standard in the passband
        If ``magzero`` is specified in ``pbfile,`` that overrides this option.
        Must be the same for all passbands listed in ``pbname`` that do not
        have ``magzero`` specified in ``pbfile``
        If ``pbnames`` require multiple ``mag_zero``, concatentate the output.

    Returns
    -------
    out : dict
        Output passband model dictionary. Has passband name ``pb`` from ``pbnames`` as key.

    Raises
    ------
    RuntimeError
        If a bandpass cannot be loaded

    Notes
    -----
        Each item of ``out`` is a tuple with
            * ``pb`` : (:py:class:`numpy.recarray`)
              The passband transmission with zero throughput entries trimmed.
              Has ``dtype=[('wave', '<f8'), ('throughput', '<f8')]``
            * ``transmission`` : (array-like)
              The non-zero passband transmission interpolated onto overlapping model wavelengths
            * ``ind`` : (array-like)
              Indices of model wavelength that overlap with this passband
            * ``zp`` : (float)
              mag_type zeropoint of this passband
            * ``avgwave`` : (float)
              Passband average/reference wavelength

        ``pbfile`` must be readable by :py:func:`WDmodel.io.read_pbmap` and
        must return a :py:class:`numpy.recarray`
        with``dtype=[('pb', 'str'),('obsmode', 'str')]``

        If there is no entry in ``pbfile`` for a passband, then we attempt to
        use the passband name ``pb`` as ``obsmode`` string as is.

        Trims the bandpass to entries with non-zero transmission and determines
        the ``VEGAMAG/ABMAG`` zeropoint for the passband - i.e. ``zp`` that
        gives ``mag_Vega/AB=mag_zero`` in all passbands.

    See Also
    --------
    :py:func:`WDmodel.io.read_pbmap`
    :py:func:`WDmodel.passband.chop_syn_spec_pb`
    """

    # figure out the mapping from passband to observation mode
    if pbfile is None:
        pbfile = 'WDmodel_pb_obsmode_map.txt'
        pbfile = io.get_pkgfile(pbfile)
    pbdata  = io.read_pbmap(pbfile)
    pbmap   = dict(list(zip(pbdata.pb, pbdata.obsmode)))
    sysmap  = dict(list(zip(pbdata.pb, pbdata.magsys)))
    zeromap = dict(list(zip(pbdata.pb, pbdata.magzero)))

    # setup the photometric system by defining the standard and corresponding magnitude system
    if mag_type not in ('vegamag', 'abmag', None):
        message = 'Magnitude system must be one of abmag or vegamag'
        raise RuntimeError(message)

    try:
        mag_zero = float(mag_zero)
    except ValueError as e:
        message = 'Zero magnitude must be a floating point number'
        raise RuntimeError(message)

    # define the standards
    vega = S.Vega
    vega.convert('flam')
    ab   = S.FlatSpectrum(0., waveunits='angstrom', fluxunits='abmag')
    ab.convert('flam')

    # defile the magnitude sysem
    if mag_type == 'vegamag':
        mag_type= 'vegamag'
    else:
        mag_type = 'abmag'

    out = OrderedDict()

    for pb in pbnames:

        standard = None

        # load each passband
        obsmode = pbmap.get(pb, pb)
        magsys  = sysmap.get(pb, mag_type)
        synphot_mag = zeromap.get(pb, mag_zero)

        if magsys == 'vegamag':
            standard = vega
        elif magsys == 'abmag':
            standard = ab
        else:
            message = 'Unknown standard system {} for passband {}'.format(magsys, pb)
            raise RuntimeError(message)

        loadedpb = False
        # treat the passband as a obsmode string
        try:
            bp = S.ObsBandpass(obsmode)
            loadedpb = True
        except ValueError:
            message = 'Could not load pb {} as an obsmode string {}'.format(pb, obsmode)
            warnings.warn(message, RuntimeWarning)
            loadedpb = False

        # if that fails, try to load the passband interpreting obsmode as a file
        if not loadedpb:
            try:
                bandpassfile = io.get_filepath(obsmode)
                bp = S.FileBandpass(bandpassfile)
                loadedpb = True
            except Exception as e:
                message = 'Could not load passband {} from obsmode or file {}'.format(pb, obsmode)
                warnings.warn(message, RuntimeWarning)
                loadedpb = False

        if not loadedpb:
            message = 'Could not load passband {}. Giving up.'.format(pb)
            raise RuntimeError(message)

        avgwave = bp.avgwave()
        if standard.wave.min() > model._wave.min():
            message = 'Standard does not extend past the blue edge of the model'
            warnings.warn(message, RuntimeWarning)

        if standard.wave.max() < model._wave.max():
            message = 'Standard does not extend past the red edge of the model'
            warnings.warn(message, RuntimeWarning)

        # interpolate the standard onto the model wavelengths
        sinterp = interp1d(standard.wave, standard.flux, fill_value='extrapolate')
        standard_flux = sinterp(model._wave)
        standard = np.rec.fromarrays([model._wave, standard_flux], names='wave,flux')

        # cut the passband to non-zero values and interpolate onto overlapping standard wavelengths
        outpb, outzp = chop_syn_spec_pb(standard, synphot_mag, bp, model)

        # interpolate the passband onto the standard's  wavelengths
        transmission, ind = interp_passband(model._wave, outpb, model)

        # save everything we need for this passband
        out[pb] = (outpb, transmission, ind, outzp, avgwave)
    return out
Exemplo n.º 52
0
    def list(cls, domain, date_, location_filters, sort_column):
        location_query = ''
        if location_filters:
            location_query = [
                "woman.{loc} = %({loc})s".format(loc=loc)
                for loc in location_filters.keys()
            ]
            location_query = " AND ".join(location_query)
            location_query = location_query + " AND"

        query, params = """
            SELECT
                woman.person_case_id AS "id",
                woman.name AS "name",
                woman.dob AS "dob",
                ((%(start_date)s - "woman"."dob") / 30.417)::INT AS "age_in_months",
                eligible_couple."currentFamilyPlanningMethod" AS "currentFamilyPlanningMethod",
                eligible_couple."adoptionDateOfFamilyPlaning" AS "adoptionDateOfFamilyPlaning"
            FROM "{woman_table}" woman
            LEFT JOIN (
                SELECT
                    "{eligible_couple_table}".person_case_id,
                    "{eligible_couple_table}".timeend::date AS "adoptionDateOfFamilyPlaning",
                    "{eligible_couple_table}".fp_current_method AS "currentFamilyPlanningMethod"
                FROM (
                    SELECT
                        person_case_id,
                        MAX(timeend) AS "timeend"
                    FROM "{eligible_couple_table}"
                    WHERE timeend <= %(end_date)s
                    GROUP BY person_case_id
                ) as last_eligible_couple
                INNER JOIN "{eligible_couple_table}" ON
                "{eligible_couple_table}".person_case_id=last_eligible_couple.person_case_id AND
                "{eligible_couple_table}".timeend=last_eligible_couple.timeend
            ) eligible_couple ON eligible_couple.person_case_id=woman.person_case_id
            WHERE (
                woman.domain = %(domain)s AND
                woman.marital_status = 'married' AND
                NOT (
                    woman.migration_status = 'yes' AND woman.migration_status IS NOT NULL
                ) AND
                {location_where}
                dob BETWEEN %(dob_start_date)s AND %(dob_end_date)s AND
                (
                    pregnant_ranges IS NULL OR
                    NOT daterange(%(start_date)s, %(end_date)s) && ANY(pregnant_ranges)
                )
            ) ORDER BY {sort_col}
            """.format(
            location_where=location_query,
            woman_table=Woman._meta.db_table,
            eligible_couple_table=cls._ucr_eligible_couple_table(domain),
            sort_col=sort_column), {
                'domain': domain,
                'dob_start_date': date_ - relativedelta(years=49),
                'dob_end_date': date_ - relativedelta(years=15),
                'start_date': date_,
                'end_date': date_ + relativedelta(months=1),
            }
        params.update(location_filters)
        db_alias = get_aaa_db_alias()
        with connections[db_alias].cursor() as cursor:
            cursor.execute(query, params)
            desc = cursor.description
            return [
                dict(zip([col[0] for col in desc], row))
                for row in cursor.fetchall()
            ]
Exemplo n.º 53
0
    def _process_soln_file(self, results, TimFile, INPUT):
        #
        # **NOTE: This solution parser assumes the baron input file
        #         was generated by the Pyomo baron_writer plugin, and
        #         that a dummy constraint named c_e_FIX_ONE_VAR_CONST__
        #         was added as the initial constraint in order to
        #         support trivial constraint equations arrising from
        #         fixing pyomo variables. Thus, the dual price solution
        #         information for the first constraint in the solution
        #         file will be excluded from the results object.
        #

        # TODO: Is there a way to hanle non-zero return values from baron?
        #       Example: the "NonLinearity Error if POW expression"
        #       (caused by  x ^ y) when both x and y are variables
        #       causes an ugly python error and the solver log has a single
        #       line to display the error, hard to pick out of the list

        # Check for suffixes to send back to pyomo
        extract_marginals = False
        extract_price = False
        for suffix in self._suffixes:
            flag = False
            if re.match(suffix, "rc"):  #baron_marginal
                extract_marginals = True
                flag = True
            if re.match(suffix, "dual"):  #baron_price
                extract_price = True
                flag = True
            if not flag:
                raise RuntimeError("***The BARON solver plugin cannot"
                                   "extract solution suffix=" + suffix)

        soln = Solution()

        #
        # Process model and solver status from the Baron tim file
        #
        line = TimFile.readline().split()
        results.problem.name = line[0]
        results.problem.number_of_constraints = int(line[1])
        results.problem.number_of_variables = int(line[2])
        results.problem.lower_bound = float(line[5])
        results.problem.upper_bound = float(line[6])
        soln.gap = results.problem.upper_bound - results.problem.lower_bound
        solver_status = line[7]
        model_status = line[8]

        objective = None
        ##try:
        ##    objective = symbol_map.getObject("__default_objective__")
        ##    objective_label = symbol_map_byObjects[id(objective)]
        ##except:
        ##    objective_label = "__default_objective__"
        # [JDS 17/Feb/15] I am not sure why this is needed, but all
        # other solvers (in particular the ASL solver and CPLEX) always
        # return the objective value in the __default_objective__ label,
        # and not by the Pyomo object name.  For consistency, we will
        # do the same here.
        objective_label = "__default_objective__"

        soln.objective[objective_label] = {'Value': None}
        results.problem.number_of_objectives = 1
        if objective is not None:
            results.problem.sense = \
                'minimizing' if objective.is_minimizing() else 'maximizing'

        if solver_status == '1':
            results.solver.status = SolverStatus.ok
        elif solver_status == '2':
            results.solver.status = SolverStatus.error
            results.solver.termination_condition = TerminationCondition.error
            #CLH: I wasn't sure if this was double reporting errors. I
            #     just filled in one termination_message for now
            results.solver.termination_message = \
                ("Insufficient memory to store the number of nodes required "
                 "for this seach tree. Increase physical memory or change "
                 "algorithmic options")
        elif solver_status == '3':
            results.solver.status = SolverStatus.ok
            results.solver.termination_condition = \
                TerminationCondition.maxIterations
        elif solver_status == '4':
            results.solver.status = SolverStatus.ok
            results.solver.termination_condition = \
                TerminationCondition.maxTimeLimit
        elif solver_status == '5':
            results.solver.status = SolverStatus.warning
            results.solver.termination_condition = \
                TerminationCondition.other
        elif solver_status == '6':
            results.solver.status = SolverStatus.aborted
            results.solver.termination_condition = \
                TerminationCondition.userInterrupt
        elif solver_status == '7':
            results.solver.status = SolverStatus.error
            results.solver.termination_condition = \
                TerminationCondition.error
        elif solver_status == '8':
            results.solver.status = SolverStatus.unknown
            results.solver.termination_condition = \
                TerminationCondition.unknown
        elif solver_status == '9':
            results.solver.status = SolverStatus.error
            results.solver.termination_condition = \
                TerminationCondition.solverFailure
        elif solver_status == '10':
            results.solver.status = SolverStatus.error
            results.solver.termination_condition = \
                TerminationCondition.error
        elif solver_status == '11':
            results.solver.status = SolverStatus.aborted
            results.solver.termination_condition = \
                TerminationCondition.licensingProblems
            results.solver.termination_message = \
                'Run terminated because of a licensing error.'

        if model_status == '1':
            soln.status = SolutionStatus.optimal
            results.solver.termination_condition = \
                TerminationCondition.optimal
        elif model_status == '2':
            soln.status = SolutionStatus.infeasible
            results.solver.termination_condition = \
                TerminationCondition.infeasible
        elif model_status == '3':
            soln.status = SolutionStatus.unbounded
            results.solver.termination_condition = \
                TerminationCondition.unbounded
        elif model_status == '4':
            soln.status = SolutionStatus.feasible
        elif model_status == '5':
            soln.status = SolutionStatus.unknown

        #
        # Process BARON results file
        #

        # Solutions that were preprocessed infeasible, were aborted,
        # or gave error will not have filled in res.lst files
        if results.solver.status not in [
                SolverStatus.error, SolverStatus.aborted
        ]:
            #
            # Extract the solution vector and objective value from BARON
            #
            var_value = []
            var_name = []
            var_marginal = []
            con_price = []
            SolvedDuringPreprocessing = False

            #############
            #
            # Scan through the first part of the solution file, until the
            # termination message '*** Normal completion ***'
            line = ''
            while '***' not in line:
                line = INPUT.readline()
                if 'Problem solved during preprocessing' in line:
                    SolvedDuringPreprocessing = True

            INPUT.readline()
            INPUT.readline()
            try:
                objective_value = float(INPUT.readline().split()[4])
            except IndexError:
                # No objective value, so no solution to return
                if solver_status == '1' and model_status in ('1', '4'):
                    logger.error(
                        """Failed to process BARON solution file: could not extract the final
objective value, but BARON completed normally.  This is indicative of a
bug in Pyomo's BARON solution parser.  Please report this (along with
the Pyomo model and BARON version) to the Pyomo Developers.""")
                return
            INPUT.readline()
            INPUT.readline()

            # Scan through the solution variable values
            line = INPUT.readline()
            while line.strip() != '':
                var_value.append(float(line.split()[2]))
                line = INPUT.readline()

            # Only scan through the marginal and price values if baron
            # found that information.
            has_dual_info = False
            if 'Corresponding dual solution vector is' in INPUT.readline():
                has_dual_info = True
                INPUT.readline()
                line = INPUT.readline()
                while 'Price' not in line and line.strip() != '':
                    var_marginal.append(float(line.split()[2]))
                    line = INPUT.readline()

                if 'Price' in line:
                    line = INPUT.readline()
                    #
                    # Assume the baron_writer added the dummy
                    # c_e_FIX_ONE_VAR_CONST__ constraint as the first
                    #
                    line = INPUT.readline()
                    while line.strip() != '':
                        con_price.append(float(line.split()[2]))
                        line = INPUT.readline()

            # Skip either a few blank lines or an empty block of useless
            # marginal and price values (if 'No dual information is available')
            while 'The best solution found is' not in INPUT.readline():
                pass

            # Collect the variable names, which are given in the same
            # order as the lists for values already read
            INPUT.readline()
            INPUT.readline()
            line = INPUT.readline()
            while line.strip() != '':
                var_name.append(line.split()[0])
                line = INPUT.readline()

            assert len(var_name) == len(var_value)
            #
            #
            ################

            #
            # Plug gathered information into pyomo soln
            #

            soln_variable = soln.variable
            # After collecting solution information, the soln is
            # filled with variable name, number, and value. Also,
            # optionally fill the baron_marginal suffix
            for i, (label, val) in enumerate(zip(var_name, var_value)):

                soln_variable[label] = {"Value": val}

                # Only adds the baron_marginal key it is requested and exists
                if extract_marginals and has_dual_info:
                    soln_variable[label]["rc"] = var_marginal[i]

            # Fill in the constraint 'price' information
            if extract_price and has_dual_info:
                soln_constraint = soln.constraint
                #
                # Assume the baron_writer added the dummy
                # c_e_FIX_ONE_VAR_CONST__ constraint as the first,
                # so constraint aliases start at 1
                #
                for i, price_val in enumerate(con_price, 1):
                    # use the alias made by the Baron writer
                    con_label = ".c" + str(i)
                    soln_constraint[con_label] = {"dual": price_val}

            # This check is necessary because solutions that are
            # preprocessed infeasible have ok solver status, but no
            # objective value located in the res.lst file
            if not (SolvedDuringPreprocessing and \
                    soln.status == SolutionStatus.infeasible):
                soln.objective[objective_label] = {'Value': objective_value}

            # Fill the solution for most cases, except errors
            results.solution.insert(soln)
Exemplo n.º 54
0
def createMethod(methodName, methodDesc, rootDesc, schema):
  """Creates a method for attaching to a Resource.

  Args:
    methodName: string, name of the method to use.
    methodDesc: object, fragment of deserialized discovery document that
      describes the method.
    rootDesc: object, the entire deserialized discovery document.
    schema: object, mapping of schema names to schema descriptions.
  """
  methodName = fix_method_name(methodName)
  (pathUrl, httpMethod, methodId, accept,
   maxSize, mediaPathUrl) = _fix_up_method_description(methodDesc, rootDesc, schema)

  parameters = ResourceMethodParameters(methodDesc)

  def method(self, **kwargs):
    # Don't bother with doc string, it will be over-written by createMethod.

    for name in six.iterkeys(kwargs):
      if name not in parameters.argmap:
        raise TypeError('Got an unexpected keyword argument "%s"' % name)

    # Remove args that have a value of None.
    keys = list(kwargs.keys())
    for name in keys:
      if kwargs[name] is None:
        del kwargs[name]

    for name in parameters.required_params:
      if name not in kwargs:
        # temporary workaround for non-paging methods incorrectly requiring
        # page token parameter (cf. drive.changes.watch vs. drive.changes.list)
        if name not in _PAGE_TOKEN_NAMES or _findPageTokenName(
            _methodProperties(methodDesc, schema, 'response')):
          raise TypeError('Missing required parameter "%s"' % name)

    for name, regex in six.iteritems(parameters.pattern_params):
      if name in kwargs:
        if isinstance(kwargs[name], six.string_types):
          pvalues = [kwargs[name]]
        else:
          pvalues = kwargs[name]
        for pvalue in pvalues:
          if re.match(regex, pvalue) is None:
            raise TypeError(
                'Parameter "%s" value "%s" does not match the pattern "%s"' %
                (name, pvalue, regex))

    for name, enums in six.iteritems(parameters.enum_params):
      if name in kwargs:
        # We need to handle the case of a repeated enum
        # name differently, since we want to handle both
        # arg='value' and arg=['value1', 'value2']
        if (name in parameters.repeated_params and
            not isinstance(kwargs[name], six.string_types)):
          values = kwargs[name]
        else:
          values = [kwargs[name]]
        for value in values:
          if value not in enums:
            raise TypeError(
                'Parameter "%s" value "%s" is not an allowed value in "%s"' %
                (name, value, str(enums)))

    actual_query_params = {}
    actual_path_params = {}
    for key, value in six.iteritems(kwargs):
      to_type = parameters.param_types.get(key, 'string')
      # For repeated parameters we cast each member of the list.
      if key in parameters.repeated_params and type(value) == type([]):
        cast_value = [_cast(x, to_type) for x in value]
      else:
        cast_value = _cast(value, to_type)
      if key in parameters.query_params:
        actual_query_params[parameters.argmap[key]] = cast_value
      if key in parameters.path_params:
        actual_path_params[parameters.argmap[key]] = cast_value
    body_value = kwargs.get('body', None)
    media_filename = kwargs.get('media_body', None)
    media_mime_type = kwargs.get('media_mime_type', None)

    if self._developerKey:
      actual_query_params['key'] = self._developerKey

    model = self._model
    if methodName.endswith('_media'):
      model = MediaModel()
    elif 'response' not in methodDesc:
      model = RawModel()

    headers = {}
    headers, params, query, body = model.request(headers,
        actual_path_params, actual_query_params, body_value)

    expanded_url = uritemplate.expand(pathUrl, params)
    url = _urljoin(self._baseUrl, expanded_url + query)

    resumable = None
    multipart_boundary = ''

    if media_filename:
      # Ensure we end up with a valid MediaUpload object.
      if isinstance(media_filename, six.string_types):
        if media_mime_type is None:
          logger.warning(
              'media_mime_type argument not specified: trying to auto-detect for %s',
              media_filename)
          media_mime_type, _ = mimetypes.guess_type(media_filename)
        if media_mime_type is None:
          raise UnknownFileType(media_filename)
        if not mimeparse.best_match([media_mime_type], ','.join(accept)):
          raise UnacceptableMimeTypeError(media_mime_type)
        media_upload = MediaFileUpload(media_filename,
                                       mimetype=media_mime_type)
      elif isinstance(media_filename, MediaUpload):
        media_upload = media_filename
      else:
        raise TypeError('media_filename must be str or MediaUpload.')

      # Check the maxSize
      if media_upload.size() is not None and media_upload.size() > maxSize > 0:
        raise MediaUploadSizeError("Media larger than: %s" % maxSize)

      # Use the media path uri for media uploads
      expanded_url = uritemplate.expand(mediaPathUrl, params)
      url = _urljoin(self._baseUrl, expanded_url + query)
      if media_upload.resumable():
        url = _add_query_parameter(url, 'uploadType', 'resumable')

      if media_upload.resumable():
        # This is all we need to do for resumable, if the body exists it gets
        # sent in the first request, otherwise an empty body is sent.
        resumable = media_upload
      else:
        # A non-resumable upload
        if body is None:
          # This is a simple media upload
          headers['content-type'] = media_upload.mimetype()
          body = media_upload.getbytes(0, media_upload.size())
          url = _add_query_parameter(url, 'uploadType', 'media')
        else:
          # This is a multipart/related upload.
          msgRoot = MIMEMultipart('related')
          # msgRoot should not write out it's own headers
          setattr(msgRoot, '_write_headers', lambda self: None)

          # attach the body as one part
          msg = MIMENonMultipart(*headers['content-type'].split('/'))
          msg.set_payload(body)
          msgRoot.attach(msg)

          # attach the media as the second part
          msg = MIMENonMultipart(*media_upload.mimetype().split('/'))
          msg['Content-Transfer-Encoding'] = 'binary'

          payload = media_upload.getbytes(0, media_upload.size())
          msg.set_payload(payload)
          msgRoot.attach(msg)
          # encode the body: note that we can't use `as_string`, because
          # it plays games with `From ` lines.
          fp = BytesIO()
          g = _BytesGenerator(fp, mangle_from_=False)
          g.flatten(msgRoot, unixfrom=False)
          body = fp.getvalue()

          multipart_boundary = msgRoot.get_boundary()
          headers['content-type'] = ('multipart/related; '
                                     'boundary="%s"') % multipart_boundary
          url = _add_query_parameter(url, 'uploadType', 'multipart')

    logger.info('URL being requested: %s %s' % (httpMethod,url))
    return self._requestBuilder(self._http,
                                model.response,
                                url,
                                method=httpMethod,
                                body=body,
                                headers=headers,
                                methodId=methodId,
                                resumable=resumable)

  docs = [methodDesc.get('description', DEFAULT_METHOD_DOC), '\n\n']
  if len(parameters.argmap) > 0:
    docs.append('Args:\n')

  # Skip undocumented params and params common to all methods.
  skip_parameters = list(rootDesc.get('parameters', {}).keys())
  skip_parameters.extend(STACK_QUERY_PARAMETERS)

  all_args = list(parameters.argmap.keys())
  args_ordered = [key2param(s) for s in methodDesc.get('parameterOrder', [])]

  # Move body to the front of the line.
  if 'body' in all_args:
    args_ordered.append('body')

  for name in all_args:
    if name not in args_ordered:
      args_ordered.append(name)

  for arg in args_ordered:
    if arg in skip_parameters:
      continue

    repeated = ''
    if arg in parameters.repeated_params:
      repeated = ' (repeated)'
    required = ''
    if arg in parameters.required_params:
      required = ' (required)'
    paramdesc = methodDesc['parameters'][parameters.argmap[arg]]
    paramdoc = paramdesc.get('description', 'A parameter')
    if '$ref' in paramdesc:
      docs.append(
          ('  %s: object, %s%s%s\n    The object takes the'
          ' form of:\n\n%s\n\n') % (arg, paramdoc, required, repeated,
            schema.prettyPrintByName(paramdesc['$ref'])))
    else:
      paramtype = paramdesc.get('type', 'string')
      docs.append('  %s: %s, %s%s%s\n' % (arg, paramtype, paramdoc, required,
                                          repeated))
    enum = paramdesc.get('enum', [])
    enumDesc = paramdesc.get('enumDescriptions', [])
    if enum and enumDesc:
      docs.append('    Allowed values\n')
      for (name, desc) in zip(enum, enumDesc):
        docs.append('      %s - %s\n' % (name, desc))
  if 'response' in methodDesc:
    if methodName.endswith('_media'):
      docs.append('\nReturns:\n  The media object as a string.\n\n    ')
    else:
      docs.append('\nReturns:\n  An object of the form:\n\n    ')
      docs.append(schema.prettyPrintSchema(methodDesc['response']))

  setattr(method, '__doc__', ''.join(docs))
  return (methodName, method)
Exemplo n.º 55
0
    def build(self):
        n = len(self.properties)
        self.n = n
        if self.n:
            float_fmt = self.model.float_fmt

            #: Property ID
            self.property_id = array(sorted(self.properties.keys()),
                                     dtype='int32')

            # number of plies
            self.make_nplies(self.property_id)
            self.model.log.debug('self.nplies = %s' % self.nplies)
            nplies = self.nplies.max()
            self.model.log.debug('nplies = %s' % nplies)

            #: Non-Structural Mass per unit Area
            self.nsm = zeros(n, dtype=float_fmt)

            self.sb = zeros(n, dtype=float_fmt)

            #: Failure Theory
            #:
            #:   ['HILL', 'HOFF', 'TSAI', 'STRN', '']
            self.ft = zeros((n, nplies),
                            dtype='|S4')  # 'HILL', 'HOFF', 'TSAI', 'STRN'

            #: Reference Temperature (default=0.0)
            self.tref = zeros(n, dtype=float_fmt)
            self.ge = zeros(n, dtype=float_fmt)

            #: symmetric flag - default = No Symmetry (NO)
            self.lam = zeros(n, dtype='|S8')

            self.material_id = zeros((n, nplies), dtype='int32')
            self.t = zeros((n, nplies), dtype=float_fmt)
            self.theta = zeros((n, nplies), dtype=float_fmt)
            self.sout = zeros((n, nplies), dtype='|S4')  # YES, NO
            self.z0 = zeros(n, dtype=float_fmt)

            for i, (pid,
                    prop) in enumerate(sorted(iteritems(self.properties))):
                self.nsm[i] = prop.nsm
                self.sb[i] = prop.sb
                self.ft[i] = prop.ft
                self.tref[i] = prop.tref
                self.ge[i] = prop.ge
                self.lam[i] = prop.lam
                self.z0[i] = prop.z0
                for iply, (mid, t, theta, sout) in zip(count(), prop.plies):
                    self.material_id[i, iply] = mid
                    self.t[i, iply] = t
                    self.theta[i, iply] = theta
                    self.sout[i, iply] = sout
            #self.model.log.debug('PCOMP.material_id = %s' % self.material_id)

            i = self.property_id.argsort()
            self.property_id = self.property_id[i]
            unique_pids = unique(self.property_id)

            if len(unique_pids) != len(self.property_id):
                raise RuntimeError('There are duplicate PCOMP IDs...')
        else:
            self.property_id = array([], dtype='int32')
Exemplo n.º 56
0
def generate(starts, stops, nums, name="grid_generate"):
    r"""Generates a M-D uniform axis-aligned grid.
  Warning:
    This op is not differentiable. Indeed, the gradient of tf.linspace and
    tf.meshgrid are currently not defined.
  Note:
    In the following, `B` is an optional batch dimension.
  Args:
    starts: A tensor of shape `[M]` or `[B, M]`, where the last dimension
      represents a M-D start point.
    stops: A tensor of shape `[M]` or `[B, M]`, where the last dimension
      represents a M-D end point.
    nums: A tensor of shape `[M]` representing the number of subdivisions for
      each dimension.
    name: A name for this op. Defaults to "grid_generate".
  Returns:
    A tensor of shape `[nums[0], ..., nums[M-1], M]` containing an M-D uniform
      grid or a tensor of shape `[B, nums[0], ..., nums[M-1], M]` containing B
      M-D uniform grids. Please refer to the example below for more details.
  Raises:
    ValueError: If the shape of `starts`, `stops`, or `nums` is not supported.
  Examples:
    ```python
    print(generate((-1.0, -2.0), (1.0, 2.0), (3, 5)))
    >>> [[[-1. -2.]
          [-1. -1.]
          [-1.  0.]
          [-1.  1.]
          [-1.  2.]]
         [[ 0. -2.]
          [ 0. -1.]
          [ 0.  0.]
          [ 0.  1.]
          [ 0.  2.]]
         [[ 1. -2.]
          [ 1. -1.]
          [ 1.  0.]
          [ 1.  1.]
          [ 1.  2.]]]
    ```
    Generates a 3x5 2d grid from -1.0 to 1.0 with 3 subdivisions for the x
    axis and from -2.0 to 2.0 with 5 subdivisions for the y axis. This lead to a
    tensor of shape (3, 5, 2).
  """
    with tf.name_scope(name):
        starts = tf.convert_to_tensor(value=starts)
        stops = tf.convert_to_tensor(value=stops)
        nums = tf.convert_to_tensor(value=nums)

        # shape.check_static(
        #     tensor=starts,
        #     tensor_name="starts",
        #     has_rank_greater_than=0,
        #     has_rank_less_than=3)
        # shape.check_static(
        #     tensor=stops,
        #     tensor_name="stops",
        #     has_rank_greater_than=0,
        #     has_rank_less_than=3)
        # shape.check_static(tensor=nums, tensor_name="nums", has_rank=1)
        # shape.compare_batch_dimensions(
        #     tensors=(starts, stops), last_axes=(-1, -1), broadcast_compatible=False)
        # shape.compare_dimensions((starts, stops, nums), -1,
        #                          ("starts", "stops", "nums"))

        if starts.shape.ndims == 1:
            return _grid(starts, stops, nums)
        else:
            return tf.stack([
                _grid(starts, stops, nums)
                for starts, stops in zip(tf.unstack(starts), tf.unstack(stops))
            ])
Exemplo n.º 57
0
def _create_learner_result_dicts(task_results, grid_scores,
                                 learner_result_dict_base):
    """
    Create the learner result dictionaries that are used to create JSON and
    plain-text results files.
    """
    res = []

    num_folds = len(task_results)
    accuracy_sum = 0.0
    pearson_sum = 0.0
    score_sum = None
    prec_sum_dict = defaultdict(float)
    recall_sum_dict = defaultdict(float)
    f_sum_dict = defaultdict(float)
    result_table = None

    for k, ((conf_matrix, fold_accuracy, result_dict, model_params, score),
            grid_score) in enumerate(zip(task_results, grid_scores), start=1):

        # create a new dict for this fold
        learner_result_dict = {}
        learner_result_dict.update(learner_result_dict_base)

        # initialize some variables to blanks so that the
        # set of columns is fixed.
        learner_result_dict['result_table'] = ''
        learner_result_dict['accuracy'] = ''
        learner_result_dict['pearson'] = ''
        learner_result_dict['score'] = ''
        learner_result_dict['fold'] = ''

        if learner_result_dict_base['task'] == 'cross_validate':
            learner_result_dict['fold'] = k

        learner_result_dict['model_params'] = json.dumps(model_params)
        if grid_score is not None:
            learner_result_dict['grid_score'] = grid_score

        if conf_matrix:
            labels = sorted(iterkeys(task_results[0][2]))
            result_table = PrettyTable([""] + labels +
                                       ["Precision", "Recall", "F-measure"],
                                       header=True,
                                       hrules=ALL)
            result_table.align = 'r'
            result_table.float_format = '.3'
            for i, actual_label in enumerate(labels):
                conf_matrix[i][i] = "[{}]".format(conf_matrix[i][i])
                label_prec = _get_stat_float(result_dict[actual_label],
                                             "Precision")
                label_recall = _get_stat_float(result_dict[actual_label],
                                               "Recall")
                label_f = _get_stat_float(result_dict[actual_label],
                                          "F-measure")
                if not math.isnan(label_prec):
                    prec_sum_dict[actual_label] += float(label_prec)
                if not math.isnan(label_recall):
                    recall_sum_dict[actual_label] += float(label_recall)
                if not math.isnan(label_f):
                    f_sum_dict[actual_label] += float(label_f)
                result_row = ([actual_label] + conf_matrix[i] +
                              [label_prec, label_recall, label_f])
                result_table.add_row(result_row)

            result_table_str = '{}'.format(result_table)
            result_table_str += '\n(row = reference; column = predicted)'
            learner_result_dict['result_table'] = result_table_str
            learner_result_dict['accuracy'] = fold_accuracy
            accuracy_sum += fold_accuracy

        # if there is no confusion matrix, then we must be dealing
        # with a regression model
        else:
            learner_result_dict.update(result_dict)
            pearson_sum += float(learner_result_dict['pearson'])

        if score is not None:
            if score_sum is None:
                score_sum = score
            else:
                score_sum += score
            learner_result_dict['score'] = score
        res.append(learner_result_dict)

    if num_folds > 1:
        learner_result_dict = {}
        learner_result_dict.update(learner_result_dict_base)

        learner_result_dict['fold'] = 'average'

        if result_table:
            result_table = PrettyTable(
                ["Label", "Precision", "Recall", "F-measure"], header=True)
            result_table.align = "r"
            result_table.align["Label"] = "l"
            result_table.float_format = '.3'
            for actual_label in labels:
                # Convert sums to means
                prec_mean = prec_sum_dict[actual_label] / num_folds
                recall_mean = recall_sum_dict[actual_label] / num_folds
                f_mean = f_sum_dict[actual_label] / num_folds
                result_table.add_row([actual_label] +
                                     [prec_mean, recall_mean, f_mean])

            learner_result_dict['result_table'] = '{}'.format(result_table)
            learner_result_dict['accuracy'] = accuracy_sum / num_folds
        else:
            learner_result_dict['pearson'] = pearson_sum / num_folds

        if score_sum is not None:
            learner_result_dict['score'] = score_sum / num_folds
        res.append(learner_result_dict)
    return res
Exemplo n.º 58
0
def RunBenchmarks():
    """Runs all benchmarks in PerfKitBenchmarker.

  Returns:
    Exit status for the process.
  """
    benchmark_specs = _CreateBenchmarkSpecs()
    if FLAGS.randomize_run_order:
        random.shuffle(benchmark_specs)
    if FLAGS.dry_run:
        print('PKB will run with the following configurations:')
        for spec in benchmark_specs:
            print(spec)
            print('')
        return 0

    collector = SampleCollector()
    try:
        tasks = [(RunBenchmarkTask, (spec, ), {}) for spec in benchmark_specs]
        if FLAGS.run_processes is None:
            spec_sample_tuples = RunBenchmarkTasksInSeries(tasks)
        else:
            spec_sample_tuples = background_tasks.RunParallelProcesses(
                tasks, FLAGS.run_processes, FLAGS.run_processes_delay)
        benchmark_specs, sample_lists = list(zip(*spec_sample_tuples))
        for sample_list in sample_lists:
            collector.samples.extend(sample_list)

    finally:
        if collector.samples:
            collector.PublishSamples()

        if benchmark_specs:
            logging.info(benchmark_status.CreateSummary(benchmark_specs))

        logging.info('Complete logs can be found at: %s',
                     vm_util.PrependTempDir(LOG_FILE_NAME))
        logging.info('Completion statuses can be found at: %s',
                     vm_util.PrependTempDir(COMPLETION_STATUS_FILE_NAME))

    if stages.TEARDOWN not in FLAGS.run_stage:
        logging.info('To run again with this setup, please use --run_uri=%s',
                     FLAGS.run_uri)

    if FLAGS.archive_bucket:
        archive.ArchiveRun(vm_util.GetTempDir(),
                           FLAGS.archive_bucket,
                           gsutil_path=FLAGS.gsutil_path,
                           prefix=FLAGS.run_uri + '_')

    # Write completion status file(s)
    completion_status_file_name = (
        vm_util.PrependTempDir(COMPLETION_STATUS_FILE_NAME))
    with open(completion_status_file_name, 'w') as status_file:
        _WriteCompletionStatusFile(benchmark_specs, status_file)
    if FLAGS.completion_status_file:
        with open(FLAGS.completion_status_file, 'w') as status_file:
            _WriteCompletionStatusFile(benchmark_specs, status_file)

    all_benchmarks_succeeded = all(spec.status == benchmark_status.SUCCEEDED
                                   for spec in benchmark_specs)
    return 0 if all_benchmarks_succeeded else 1
Exemplo n.º 59
0
    def _sub_read(self, f):
        """
        Parameters
        ----------
        f : file buffer
            A file buffer for an MegaM file.

        Yields
        ------
        curr_id : str
            The current ID for the example.
        class_name : float or str
            The name of the class label for the example.
        example : dict
            The example valued in dictionary format, with 'x'
            as list of features.

        Raises
        ------
        ValueError
            If there are duplicate feature names.
        """
        example_num = 0
        curr_id = 'EXAMPLE_0'
        for line in f:
            # Process encoding
            if not isinstance(line, text_type):
                line = UnicodeDammit(line, ['utf-8',
                                            'windows-1252']).unicode_markup
            line = line.strip()
            # Handle instance lines
            if line.startswith('#'):
                curr_id = line[1:].strip()
            elif line and line not in ['TRAIN', 'TEST', 'DEV']:
                split_line = line.split()
                num_cols = len(split_line)
                del line
                # Line is just a class label
                if num_cols == 1:
                    class_name = safe_float(split_line[0],
                                            replace_dict=self.class_map)
                    field_pairs = []
                # Line has a class label and feature-value pairs
                elif num_cols % 2 == 1:
                    class_name = safe_float(split_line[0],
                                            replace_dict=self.class_map)
                    field_pairs = split_line[1:]
                # Line just has feature-value pairs
                elif num_cols % 2 == 0:
                    class_name = None
                    field_pairs = split_line

                curr_info_dict = {}
                if len(field_pairs) > 0:
                    # Get current instances feature-value pairs
                    field_names = islice(field_pairs, 0, None, 2)
                    # Convert values to floats, because otherwise
                    # features'll be categorical
                    field_values = (safe_float(val) for val in
                                    islice(field_pairs, 1, None, 2))

                    # Add the feature-value pairs to dictionary
                    curr_info_dict.update(zip(field_names, field_values))

                    if len(curr_info_dict) != len(field_pairs) / 2:
                        raise ValueError(('There are duplicate feature ' +
                                          'names in {} for example ' +
                                          '{}.').format(self.path_or_list,
                                                        curr_id))

                yield curr_id, class_name, curr_info_dict

                # Set default example ID for next instance, in case we see a
                # line without an ID.
                example_num += 1
                curr_id = 'EXAMPLE_{}'.format(example_num)
Exemplo n.º 60
0
def run_configuration(config_file,
                      local=False,
                      overwrite=True,
                      queue='all.q',
                      hosts=None,
                      write_summary=True,
                      quiet=False,
                      ablation=0,
                      resume=False):
    """
    Takes a configuration file and runs the specified jobs on the grid.

    :param config_path: Path to the configuration file we would like to use.
    :type config_path: str
    :param local: Should this be run locally instead of on the cluster?
    :type local: bool
    :param overwrite: If the model files already exist, should we overwrite
                      them instead of re-using them?
    :type overwrite: bool
    :param queue: The DRMAA queue to use if we're running on the cluster.
    :type queue: str
    :param hosts: If running on the cluster, these are the machines we should
                  use.
    :type hosts: list of str
    :param write_summary: Write a tsv file with a summary of the results.
    :type write_summary: bool
    :param quiet: Suppress printing of "Loading..." messages.
    :type quiet: bool
    :param ablation: Number of features to remove when doing an ablation
                     experiment. If positive, we will perform repeated ablation
                     runs for all combinations of features removing the
                     specified number at a time. If ``None``, we will use all
                     combinations of all lengths. If 0, the default, no
                     ablation is performed. If negative, a ``ValueError`` is
                     raised.
    :type ablation: int or None
    :param resume: If result files already exist for an experiment, do not
                   overwrite them. This is very useful when doing a large
                   ablation experiment and part of it crashes.
    :type resume: bool

    :return: A list of paths to .json results files for each variation in the
             experiment.
    :rtype: list of str

    """
    # Initialize logger
    logger = logging.getLogger(__name__)

    # Read configuration
    (experiment_name, task, sampler, fixed_sampler_parameters, feature_hasher,
     hasher_features, id_col, label_col, train_set_name, test_set_name, suffix,
     featuresets, do_shuffle, model_path, do_grid_search, grid_objectives,
     probability, results_path, pos_label_str, feature_scaling,
     min_feature_count, grid_search_jobs, grid_search_folds, cv_folds,
     save_cv_folds, do_stratified_folds, fixed_parameter_list, param_grid_list,
     featureset_names, learners, prediction_dir, log_path, train_path,
     test_path, ids_to_floats, class_map,
     custom_learner_path) = _parse_config_file(config_file)

    # Check if we have gridmap
    if not local and not _HAVE_GRIDMAP:
        local = True
        logger.warning('gridmap 0.10.1+ not available. Forcing local '
                       'mode.  To run things on a DRMAA-compatible '
                       'cluster, install gridmap>=0.10.1 via pip.')

    # if performing ablation, expand featuresets to include combinations of
    # features within those sets
    if ablation is None or ablation > 0:
        # Make new feature set lists so that we can iterate without issue
        expanded_fs = []
        expanded_fs_names = []
        for features, featureset_name in zip(featuresets, featureset_names):
            features = sorted(features)
            featureset = set(features)
            # Expand to all feature combinations if ablation is None
            if ablation is None:
                for i in range(1, len(features)):
                    for excluded_features in combinations(features, i):
                        expanded_fs.append(
                            sorted(featureset - set(excluded_features)))
                        expanded_fs_names.append(
                            featureset_name + '_minus_' +
                            _munge_featureset_name(excluded_features))
            # Otherwise, just expand removing the specified number at a time
            else:
                for excluded_features in combinations(features, ablation):
                    expanded_fs.append(
                        sorted(featureset - set(excluded_features)))
                    expanded_fs_names.append(
                        featureset_name + '_minus_' +
                        _munge_featureset_name(excluded_features))
            # Also add version with nothing removed as baseline
            expanded_fs.append(features)
            expanded_fs_names.append(featureset_name + '_all')

        # Replace original feature set lists
        featuresets = expanded_fs
        featureset_names = expanded_fs_names
    elif ablation < 0:
        raise ValueError('Value for "ablation" argument must be either '
                         'positive integer or None.')

    # the list of jobs submitted (if running on grid)
    if not local:
        jobs = []

    # the list to hold the paths to all the result json files
    result_json_paths = []

    # check if the length of the featureset_name exceeds the maximum length
    # allowed
    for featureset_name in featureset_names:
        if len(featureset_name) > 210:
            raise OSError('System generated file length "{}" exceeds the '
                          'maximum length supported.  Please specify names of '
                          'your datasets with "featureset_names".  If you are '
                          'running ablation experiment, please reduce the '
                          'length of the features in "featuresets" because the'
                          ' auto-generated name would be longer than the file '
                          'system can handle'.format(featureset_name))

    # Run each featureset-learner combination
    for featureset, featureset_name in zip(featuresets, featureset_names):
        for learner_num, learner_name in enumerate(learners):
            for grid_objective in grid_objectives:

                # for the individual job name, we need to add the feature set name
                # and the learner name
                if len(grid_objectives) == 1:
                    job_name_components = [
                        experiment_name, featureset_name, learner_name
                    ]
                else:
                    job_name_components = [
                        experiment_name, featureset_name, learner_name,
                        grid_objective
                    ]

                job_name = '_'.join(job_name_components)

                # change the prediction prefix to include the feature set
                prediction_prefix = join(prediction_dir, job_name)

                # the log file that stores the actual output of this script (e.g.,
                # the tuned parameters, what kind of experiment was run, etc.)
                temp_logfile = join(log_path, '{}.log'.format(job_name))

                # Figure out result json file path
                result_json_path = join(results_path,
                                        '{}.results.json'.format(job_name))

                # save the path to the results json file that will be written
                result_json_paths.append(result_json_path)

                # If result file already exists and we're resuming, move on
                if resume and (exists(result_json_path)
                               and os.path.getsize(result_json_path)):
                    logger.info(
                        'Running in resume mode and %s exists, '
                        'so skipping job.', result_json_path)
                    continue

                # create job if we're doing things on the grid
                job_args = {}
                job_args["experiment_name"] = experiment_name
                job_args["task"] = task
                job_args["sampler"] = sampler
                job_args["feature_hasher"] = feature_hasher
                job_args["hasher_features"] = hasher_features
                job_args["job_name"] = job_name
                job_args["featureset"] = featureset
                job_args["featureset_name"] = featureset_name
                job_args["learner_name"] = learner_name
                job_args["train_path"] = train_path
                job_args["test_path"] = test_path
                job_args["train_set_name"] = train_set_name
                job_args["test_set_name"] = test_set_name
                job_args["shuffle"] = do_shuffle
                job_args["model_path"] = model_path
                job_args["prediction_prefix"] = prediction_prefix
                job_args["grid_search"] = do_grid_search
                job_args["grid_objective"] = grid_objective
                job_args["suffix"] = suffix
                job_args["log_path"] = temp_logfile
                job_args["probability"] = probability
                job_args["results_path"] = results_path
                job_args["sampler_parameters"] = (fixed_sampler_parameters
                                                  if fixed_sampler_parameters
                                                  else dict())
                job_args["fixed_parameters"] = (
                    fixed_parameter_list[learner_num]
                    if fixed_parameter_list else dict())
                job_args["param_grid"] = (param_grid_list[learner_num]
                                          if param_grid_list else None)
                job_args["pos_label_str"] = pos_label_str
                job_args["overwrite"] = overwrite
                job_args["feature_scaling"] = feature_scaling
                job_args["min_feature_count"] = min_feature_count
                job_args["grid_search_jobs"] = grid_search_jobs
                job_args["grid_search_folds"] = grid_search_folds
                job_args["cv_folds"] = cv_folds
                job_args["save_cv_folds"] = save_cv_folds
                job_args["do_stratified_folds"] = do_stratified_folds
                job_args["label_col"] = label_col
                job_args["id_col"] = id_col
                job_args["ids_to_floats"] = ids_to_floats
                job_args["quiet"] = quiet
                job_args["class_map"] = class_map
                job_args["custom_learner_path"] = custom_learner_path

                if not local:
                    jobs.append(
                        Job(_classify_featureset, [job_args],
                            num_slots=(MAX_CONCURRENT_PROCESSES
                                       if do_grid_search else 1),
                            name=job_name,
                            queue=queue))
                else:
                    _classify_featureset(job_args)
    test_set_name = basename(test_path)

    # submit the jobs (if running on grid)
    if not local and _HAVE_GRIDMAP:
        if log_path:
            job_results = process_jobs(jobs,
                                       white_list=hosts,
                                       temp_dir=log_path)
        else:
            job_results = process_jobs(jobs, white_list=hosts)
        _check_job_results(job_results)

    # write out the summary results file
    if (task == 'cross_validate' or task == 'evaluate') and write_summary:
        summary_file_name = experiment_name + '_summary.tsv'
        file_mode = 'w' if sys.version_info >= (3, 0) else 'wb'
        with open(join(results_path, summary_file_name),
                  file_mode) as output_file:
            _write_summary_file(result_json_paths,
                                output_file,
                                ablation=ablation)

    return result_json_paths