Exemplo n.º 1
0
    def test_is_instance(self, core_type_lib):
        assert PushInt.is_instance(5)
        assert PushInt.is_instance(np.int64(100))
        assert not PushInt.is_instance("Foo")
        assert not PushInt.is_instance(np.str_("Bar"))

        assert not PushStr.is_instance(5)
        assert not PushStr.is_instance(np.int64(100))
        assert PushStr.is_instance("Foo")
        assert PushStr.is_instance(np.str_("Bar"))
Exemplo n.º 2
0
    def makeHDF5(self, rewrite=False):
        """
        Create the HDF5 structure if needed ...
        """
        print("Initialization of HDF5 file")
        if os.path.exists(self.hdf5) and rewrite:
            os.unlink(self.hdf5)

        spath = self.hdf5path.split("/")
        assert len(spath) > 2
        nxs = Nexus(self.hdf5, mode="w")
        entry = nxs.new_entry(entry=spath[0], program_name="pyFAI", title="diffmap")
        grp = entry
        for subgrp in spath[1:-2]:
            grp = nxs.new_class(grp, name=subgrp, class_type="NXcollection")

        processgrp = nxs.new_class(grp, "pyFAI", class_type="NXprocess")
        processgrp["program"] = numpy.array([numpy.str_(i) for i in sys.argv])
        processgrp["version"] = numpy.str_(PyFAI_VERSION)
        processgrp["date"] = numpy.str_(get_isotime())
        if self.mask:
            processgrp["maskfile"] = numpy.str_(self.mask)
        if self.flat:
            processgrp["flatfiles"] = numpy.array([numpy.str_(i) for i in self.flat])
        if self.dark:
            processgrp["darkfiles"] = numpy.array([numpy.str_(i) for i in self.dark])
        processgrp["inputfiles"] = numpy.array([numpy.str_(i) for i in self.inputfiles])
        processgrp["PONIfile"] = numpy.str_(self.poni)

        processgrp["dim0"] = self.npt_slow
        processgrp["dim0"].attrs["axis"] = "Rotation"
        processgrp["dim1"] = self.npt_fast
        processgrp["dim1"].attrs["axis"] = "Translation"
        processgrp["dim2"] = self.npt_rad
        processgrp["dim2"].attrs["axis"] = "Diffraction"
        for k, v in self.ai.getPyFAI().items():
            if "__len__" in dir(v):
                processgrp[k] = numpy.str_(v)
            elif v:
                processgrp[k] = v

        self.group = nxs.new_class(grp, name=spath[-2], class_type="NXdata")

        if posixpath.basename(self.hdf5path) in self.group:
            self.dataset = self.group[posixpath.basename(self.hdf5path)]
        else:
            self.dataset = self.group.create_dataset(
                name=posixpath.basename(self.hdf5path),
                shape=(self.npt_slow, self.npt_fast, self.npt_rad),
                dtype="float32",
                chunks=(1, self.npt_fast, self.npt_rad),
                maxshape=(None, None, self.npt_rad))
            self.dataset.attrs["signal"] = "1"
            self.dataset.attrs["interpretation"] = "spectrum"
            self.dataset.attrs["axes"] = str(self.unit).split("_")[0]
            self.dataset.attrs["creator"] = "pyFAI"
            self.dataset.attrs["long_name"] = "Diffraction imaging experiment"
        self.nxs = nxs
Exemplo n.º 3
0
    def find_cliche(self,datapath,filename):
        """ Calculates the most common words (cliches) in csv
        Removes the punctuations from csv
        TF-IDF to calculate most common words
        Extracting most common words

        """
        data = self.common.read_csv(datapath,filename)
        ##speechtext = data.speechtext.str.replace(r'[^\w\s\,?]','')  #Removing all panctuations from speech text
        speechtext = data.speechtext.str.lower()

        #Using tf idf to find words or tokens that are less important
        vectorizer = TfidfVectorizer(decode_error='replace',stop_words='english',encoding='utf-8')
        tfidf = vectorizer.fit_transform(speechtext.apply(lambda x: np.str_(x)))

        terms = vectorizer.get_feature_names()
        sums = tfidf.sum(axis=0)
        data = []
        for col, term in enumerate(terms):
            data.append( (term, sums[0,col] ))

        ranking = pd.DataFrame(data, columns=['term','rank'])
        cliches = ranking.sort_values('rank', ascending=False).nlargest(25, 'rank')
        found_cliches = cliches.term.values
        #print(found_cliches)
        return found_cliches
Exemplo n.º 4
0
 def test_string(self):
     lr = LogisticRegression()
     for col in ['features', u'features', np.str_('features')]:
         lr.setFeaturesCol(col)
         self.assertEqual(lr.getFeaturesCol(), 'features')
     self.assertRaises(TypeError,
                       lambda: LogisticRegression(featuresCol=2.3))
Exemplo n.º 5
0
 def test_parse_to_string_convertible(self):
     try_to_convert = partial(self._try_to_convert, cv.utils.dumpString)
     for convertible in (None, '', 's', 'str', str(123), np.str('test1'), np.str_('test2')):
         expected = 'string: ' + (convertible if convertible else '')
         actual = try_to_convert(convertible)
         self.assertEqual(expected, actual,
                          msg=get_conversion_error_msg(convertible, expected, actual))
Exemplo n.º 6
0
 def testStringReduce2D(self):
     # Create a 2D array of strings
     x = np.asarray([["", "", "a", "", "", "b"], ["", "c", "", "d", "", ""],
                     ["e", "", "f", "", "", ""]])
     self._compare(x, None, keepdims=False, zero=np.str_(""))
     self._compare(x, [], keepdims=False, zero=np.str_(""))
     self._compare(x, [0], keepdims=False, zero=np.str_(""))
     self._compare(x, [1], keepdims=False, zero=np.str_(""))
     self._compare(x, [0, 1], keepdims=False, zero=np.str_(""))
     self._compare(x, None, keepdims=True, zero=np.str_(""))
     self._compare(x, [], keepdims=True, zero=np.str_(""))
     self._compare(x, [0], keepdims=True, zero=np.str_(""))
     self._compare(x, [0, 1], keepdims=True, zero=np.str_(""))
Exemplo n.º 7
0
 def test_character_assignment(self):
     # This is an example a function going through CopyObject which
     # used to have an untested special path for scalars
     # (the character special dtype case, should be deprecated probably)
     arr = np.zeros((1, 5), dtype="c")
     arr[0] = np.str_("asdfg")  # must assign as a sequence
     assert_array_equal(arr[0], np.array("asdfg", dtype="c"))
     assert arr[0, 1] == b"s"  # make sure not all were set to "a" for both
Exemplo n.º 8
0
    def entrainer(self, root):
        self.cv = TfidfVectorizer(min_df=1, stop_words='english')
        self.x_train = self.cv.fit_transform(
            self.df_x_train.apply(lambda x: np.str_(x)))

        self.x_test = self.cv.transform(
            self.df_x_test.apply(lambda x: np.str_(x)))

        self.mb = svm.SVC()
        try:
            self.mb.fit(self.x_train, self.df_y_train)
            print("entrinement terminé")

        except MemoryError:
            messagebox.showerror('la mémoire est insuffisante ',
                                 message=' baisser le pourcentage et ressayer')
            root.destroy()
Exemplo n.º 9
0
 def test___new___list_of_numpy(self):
     assert Vector([np.bool_(True)]).is_boolean()
     assert Vector([np.datetime64(DATE)]).is_datetime()
     assert Vector([np.datetime64(DATETIME)]).is_datetime()
     assert Vector([np.float_(0.5)]).is_float()
     assert Vector([np.int_(1)]).is_integer()
     assert Vector([np.object_(np)]).is_object()
     assert Vector([np.str_("")]).is_string()
Exemplo n.º 10
0
 def check_numpy_scalar_argument_return_string_2(self):
     f = PyCFunction('foo')
     f += Variable('a', 'npy_str', 'in, out')
     f += 'a.data[0] = \'H\';'
     foo = f.build()
     s = numpy.str_('hey')
     assert_equal(foo(s), 'Hey')
     assert_equal(s, 'hey')
Exemplo n.º 11
0
 def predictScore(self, reviews):
     # 清洗 并 分词
     reviews = [self.trim(review) for review in reviews]
     print(reviews)
     tfidf = self.tfidftransformer.transform(
         self.vectorizer.transform([np.str_(review) for review in reviews]))
     pred = self.clf.predict_proba(tfidf)
     return pred
Exemplo n.º 12
0
 def check_numpy_scalar_argument_return_string_2(self):
     f = PyCFunction('foo')
     f += Variable('a', 'npy_str', 'in, out')
     f += 'a.data[0] = \'H\';'
     foo = f.build()
     s = numpy.str_('hey')
     assert_equal(foo(s),'Hey')
     assert_equal(s,'hey')
Exemplo n.º 13
0
    def setUp(self):
        pass
        self.b_lit = b'bytes literal'
        self.s_lit = 'literal literal'
        self.u_lit = u'unicode literal'

        self.np_b_lit = np.bytes_('numpy bytes literal')
        self.np_s_lit = np.str_('numpy unicode literal')
        self.np_u_lit = np.unicode_('numpy unicode literal')
Exemplo n.º 14
0
 def __init__(self,value=0):
     if self.__class__.__name__ == 'Scalar':
         raise TypeError("cannot create 'Scalar' instances")
     if self.__class__.__name__ == 'String':
         self._value = _N.str_(_ver.tostr(value))
         return
     if isinstance(value,(_data.Data)):
         value = value.data()
     self._value = _N.__dict__[self.__class__.__name__.lower()](value)
Exemplo n.º 15
0
def save_data(fname, prob, npz_file=True, mat_file=True):
    # Remove file extension
    froot = os.path.splitext(fname)[0]

    # Get all OpenMDAO inputs and outputs into a dictionary
    var_dict = prob.model.list_inputs(values=True,
                                      prom_name=False,
                                      units=True,
                                      out_stream=None)
    out_dict = prob.model.list_outputs(values=True,
                                       prom_name=False,
                                       units=True,
                                       out_stream=None)
    var_dict.extend(out_dict)

    # Pickle the full archive so that we can load it back in if we need
    with open(froot + '.pkl', 'wb') as f:
        pickle.dump(var_dict, f)

    # Reduce to variables we can save for matlab or python
    if npz_file or mat_file:
        array_dict = {}
        for k in range(len(var_dict)):
            unit_str = var_dict[k][1]['units']
            if unit_str is None or unit_str == 'Unavailable':
                unit_str = ''
            elif len(unit_str) > 0:
                unit_str = '_' + unit_str

            iname = var_dict[k][0] + unit_str
            value = var_dict[k][1]['value']

            if type(value) in [
                    type(np.array([])),
                    type(0.0),
                    type(0), np.float64, np.int64
            ]:
                array_dict[iname] = value
            elif type(value) == type(True):
                array_dict[iname] = np.bool_(value)
            elif type(value) == type(''):
                array_dict[iname] = np.str_(value)
            elif type(value) == type([]):
                temp_val = np.empty(len(value), dtype=np.object)
                temp_val[:] = value[:]
                array_dict[iname] = temp_val
            #else:
            #    print(var_dict[k])

    # Save to numpy compatible
    if npz_file:
        kwargs = {key: array_dict[key] for key in array_dict.keys()}
        np.savez_compressed(froot + '.npz', **kwargs)

    # Save to matlab compatible
    if mat_file:
        sio.savemat(froot + '.mat', array_dict, long_field_names=True)
Exemplo n.º 16
0
    def setUp(self):
        pass
        self.b_lit = b'bytes literal'
        self.s_lit = 'literal literal'
        self.u_lit = u'unicode literal'

        self.np_b_lit = np.bytes_('numpy bytes literal')
        self.np_s_lit = np.str_('numpy unicode literal')
        self.np_u_lit = np.unicode_('numpy unicode literal')
Exemplo n.º 17
0
    def __getitem__(self, key):
        return_grid = self._grid.copy()

        if isinstance(key[0], slice):
            start = key[0].start
            stop = key[0].stop
            for i, i_index in enumerate(self._flowstruct.outer_index):
                if i_index == start:
                    start_i = i

                if i_index == stop:
                    stop_i = i

            if stop_i < start_i:
                tmp = stop_i
                stop_i = start_i
                start_i = tmp

            outer_list = self._flowstruct.outer_index[start_i:stop_i + 1]
        elif isinstance(key[0], list):
            outer_list = key[0]
        else:
            outer_list = [key[0]]

        if isinstance(key[1], slice):
            start = key[1].start
            stop = key[1].stop
            for i, i_index in enumerate(self._flowstruct.inner_index):
                if i_index == start:
                    start_i = i

                if i_index == stop:
                    stop_i = i

            if stop_i < start_i:
                tmp = stop_i
                stop_i = start_i
                start_i = tmp

            inner_list = self._flowstruct.inner_index[start_i:stop_i + 1]
        elif isinstance(key[1], list):
            inner_list = key[1]
        else:
            inner_list = [key[1]]

        keys = list(itertools.product(outer_list, inner_list))
        if len(keys) > 1:
            keys = [key for key in keys if key in self._flowstruct.index]

        for k in keys:

            data = self._flowstruct[k]
            if len(set(outer_list)) < 2:
                k = k[1]
            return_grid.cell_arrays[np.str_(k)] = data.flatten()
        return return_grid
Exemplo n.º 18
0
def main():

    path_bc03 = './'  # location of the BC03 .ised_ASCII files
    path_output = './'  # location of the output

    # define the bc03 SED file
    res = 'lr'  # resolution
    Z = '62'  # metallicity
    sfh = 'tau5'  # star formation history
    imf = 'chab'  # imf
    dust = '_dust00'  # dust
    sedfile = 'bc2003_' + res + '_m' + Z + '_' + imf + '_' + sfh + dust + '.ised_ASCII'
    outputfile = 'extracted_' + sedfile

    # extract the spectra from the .ised_ASCII. It will skip this if it finds a previously made
    # file extracted_<sedfile> in the foler specified by <path_output>
    if os.path.isfile(path_output + outputfile):
        print("Extracted file ", path_output + outputfile, " found. Good!")
    else:
        print("Extracted file ", path_output + outputfile,
              " does not yet exist. Building it...")
        process_ised_ascii_file(path_bc03, sedfile, path_output, outputfile)

    # read the extracted file. It will return:
    # nages : the number of ages for which there is a spectrum
    # specs_age : the list of ages of the spectra
    # nwavelengths : the number of wavelengths in each spectrum
    # specs_wavelengths : the list of wavelengths for each spectrum
    # specs_flux : an array where each column i corresponds to the fluxes of the ith age
    specs_age, specs_flux, specs_wavelength, nwavelengths, nages = import_spectra(
        path_output + outputfile)
    print("Extracted file ", path_output + outputfile, " read.")
    print("Numer of age bins: ", len(specs_age))
    print("Numer of wavelength bins: ", len(specs_wavelength))

    # select a spectrum of a certain age in years
    age_wanted = 1.e8  # for 100 Myr
    age_bin = find_closest_value(specs_age, age_wanted)
    age_myr = specs_age[age_bin][0] / 1.e6

    # this is your spectrum
    lam = specs_wavelength
    flux = (specs_flux[:, age_bin]).flatten()

    # plot the spectrum
    plt.figure()
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('Wavelength ($\\AA$)')
    plt.ylabel('$F_\\lambda$ (units)')
    title = sedfile
    age_info = "Age: " + np.str_(age_myr) + ' Myr'
    plt.plot(lam, flux)
    plt.text(1e4, 0.1 * np.max(flux), age_info, fontsize=20)
    plt.title(title)
    plt.show()
Exemplo n.º 19
0
    def transform(self, X, y=None, **params):
        X_ = X.copy()
        X_ = X_.explode("libelle_auteur")

        # Normalize libelle_auteur on specific autor names
        def replace_batch_auteur(list_of_s: list, replace: str):
            for s in list_of_s:
                X_["libelle_auteur"] = X_["libelle_auteur"].apply(
                    lambda x: x.replace(s, replace))

        replace_batch_auteur(
            ["M. Édouard Philippe", "M. Edouard Philippe", "M. Jean Castex"],
            "Gouvernement",
        )
        replace_batch_auteur(["Mme x", "M. XXX"], "Anonyme")
        # Add a slug column by removing accents and setting it lowercase
        X_["slug"] = X_["libelle_auteur"].apply(self._normalize_txt)
        # Try to merge with self.actors on several version of the lusgs
        va_merge_1 = X_.merge(self.actors,
                              how="inner",
                              left_on="slug",
                              right_on="slug_1")
        va_merge_2 = X_.merge(self.actors,
                              how="inner",
                              left_on="slug",
                              right_on="slug_2")
        va_merge_3 = X_.merge(self.actors,
                              how="inner",
                              left_on="slug",
                              right_on="slug_3")
        # Special case with "Gouvernement", that is not in self.actors
        va_merge_4 = X_.merge(
            pd.DataFrame({
                "slug": ["gouvernement"],
                "membre_parti": ["Gouvernement"]
            }),
            on="slug",
        )
        # Merge all the joins together
        va_merge = va_merge_1.append(va_merge_2).append(va_merge_3).append(
            va_merge_4)
        va_merge.rename({"membre_parti": "auteur_parti"}, axis=1, inplace=True)
        # Reverse the explosion made over X, using a groupby.
        X_ = (va_merge.groupby("vote_uid").agg({
            "auteur_parti":
            lambda x: x.tolist()
        }).reset_index())
        # Drop non-relevant column
        X_ = X_[["vote_uid", "auteur_parti"]]
        # print(X_.head(5))
        # print(X.head(5))
        # Join with the original dataframe
        X = X.merge(X_, how="left", on="vote_uid")
        X["auteur_parti"].fillna("[NAN]", inplace=True)
        X["auteur_parti"] = X["auteur_parti"].apply(lambda x: np.str_(x))
        return X
 def __init__(self, dataset):
     self.dataset = dataset
     self.additional_stopwords = ['rt']
     self.preprocess()
     self.vector = TfidfVectorizer(ngram_range=(1, 2),
                                   max_df=0.75,
                                   min_df=5,
                                   max_features=10000)
     self.tfidf = self.vector.fit_transform(
         dataset['processed_text'].apply(lambda x: np.str_(x)))
Exemplo n.º 21
0
def get_TF_IDF(data):
    words = data['words'].apply(lambda x: np.str_(x))

    vectorizer = CountVectorizer(
    )  # 该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频
    transformer = TfidfTransformer()  # 该类会统计每个词语的tf-idf权值
    tfidf = transformer.fit_transform(vectorizer.fit_transform(
        words))  # 第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵
    weight = tfidf.toarray()  # 将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重
    return weight, data['label']
Exemplo n.º 22
0
def predict_input(input, df):
    vectorizer = CountVectorizer(ngram_range=(1, 2))
    vectorizer.fit_transform(df["lemma"].apply(lambda x: np.str_(x)))
    enc = vectorizer.transform([input])
    results = []
    for mod in MODELS:
        model = MODELS.get(mod)
        pred = model.predict(enc)[0]
        results.append([mod, pred])
    return results
Exemplo n.º 23
0
 def test_isscalar_numpy_array_scalars(self):
     self.assertTrue(lib.isscalar(np.int64(1)))
     self.assertTrue(lib.isscalar(np.float64(1.)))
     self.assertTrue(lib.isscalar(np.int32(1)))
     self.assertTrue(lib.isscalar(np.object_('foobar')))
     self.assertTrue(lib.isscalar(np.str_('foobar')))
     self.assertTrue(lib.isscalar(np.unicode_(u('foobar'))))
     self.assertTrue(lib.isscalar(np.bytes_(b'foobar')))
     self.assertTrue(lib.isscalar(np.datetime64('2014-01-01')))
     self.assertTrue(lib.isscalar(np.timedelta64(1, 'h')))
Exemplo n.º 24
0
 def test_isscalar_numpy_array_scalars(self):
     self.assertTrue(is_scalar(np.int64(1)))
     self.assertTrue(is_scalar(np.float64(1.)))
     self.assertTrue(is_scalar(np.int32(1)))
     self.assertTrue(is_scalar(np.object_('foobar')))
     self.assertTrue(is_scalar(np.str_('foobar')))
     self.assertTrue(is_scalar(np.unicode_(u('foobar'))))
     self.assertTrue(is_scalar(np.bytes_(b'foobar')))
     self.assertTrue(is_scalar(np.datetime64('2014-01-01')))
     self.assertTrue(is_scalar(np.timedelta64(1, 'h')))
Exemplo n.º 25
0
    def test_scalar_none_comparison(self):
        # Scalars should still just return false and not give a warnings.
        with warnings.catch_warnings(record=True) as w:
            warnings.filterwarnings('always', '', FutureWarning)
            assert_(not np.float32(1) == None)
            assert_(not np.str_('test') == None)
            # This is dubious (see below):
            assert_(not np.datetime64('NaT') == None)

            assert_(np.float32(1) != None)
            assert_(np.str_('test') != None)
            # This is dubious (see below):
            assert_(np.datetime64('NaT') != None)
        assert_(len(w) == 0)

        # For documentaiton purpose, this is why the datetime is dubious.
        # At the time of deprecation this was no behaviour change, but
        # it has to be considered when the deprecations is done.
        assert_(np.equal(np.datetime64('NaT'), None))
Exemplo n.º 26
0
    def test_scalar_none_comparison(self):
        # Scalars should still just return false and not give a warnings.
        with warnings.catch_warnings(record=True) as w:
            warnings.filterwarnings('always', '', FutureWarning)
            assert_(not np.float32(1) == None)
            assert_(not np.str_('test') == None)
            # This is dubious (see below):
            assert_(not np.datetime64('NaT') == None)

            assert_(np.float32(1) != None)
            assert_(np.str_('test') != None)
            # This is dubious (see below):
            assert_(np.datetime64('NaT') != None)
        assert_(len(w) == 0)

        # For documentaiton purpose, this is why the datetime is dubious.
        # At the time of deprecation this was no behaviour change, but
        # it has to be considered when the deprecations is done.
        assert_(np.equal(np.datetime64('NaT'), None))
Exemplo n.º 27
0
 def test_isscalar_numpy_array_scalars(self):
     self.assertTrue(lib.isscalar(np.int64(1)))
     self.assertTrue(lib.isscalar(np.float64(1.0)))
     self.assertTrue(lib.isscalar(np.int32(1)))
     self.assertTrue(lib.isscalar(np.object_("foobar")))
     self.assertTrue(lib.isscalar(np.str_("foobar")))
     self.assertTrue(lib.isscalar(np.unicode_(u("foobar"))))
     self.assertTrue(lib.isscalar(np.bytes_(b"foobar")))
     self.assertTrue(lib.isscalar(np.datetime64("2014-01-01")))
     self.assertTrue(lib.isscalar(np.timedelta64(1, "h")))
Exemplo n.º 28
0
 def test_is_scalar_numpy_array_scalars(self):
     assert is_scalar(np.int64(1))
     assert is_scalar(np.float64(1.0))
     assert is_scalar(np.int32(1))
     assert is_scalar(np.object_("foobar"))
     assert is_scalar(np.str_("foobar"))
     assert is_scalar(np.unicode_("foobar"))
     assert is_scalar(np.bytes_(b"foobar"))
     assert is_scalar(np.datetime64("2014-01-01"))
     assert is_scalar(np.timedelta64(1, "h"))
Exemplo n.º 29
0
def _ndarray_dtype(fields):
    """
    Return the NumPy structured array data type

    Helper function
    """
    return [
        (np.str_(key), values)
        for key, values in fields
    ]
Exemplo n.º 30
0
    def predict(self, papers):
        """
        Generates predictions from the trained classifiers. Each binary
        classifier is applied once.

        Parameters
        ==========

        papers : pd.DataFrame
            papers that we want to classify. Required column:
                tokens_baseline - previously tokenized title-abstract

        Returns
        =======
        scores : pd.DataFrame
            Dataframe containing the predictions generated by each model.
            Each column corresponds to a review group and the values in
            that column are the probabilities that each paper belong to
            that review group.
        """

        scores = {}

        tokenized_papers = list(papers[self.tokens_col])

        # get vectorizer and determine tfidf for papers
        vec = self.vectorizer
        X = vec.transform(tokenized_papers)

        if self.tokens_col2 is not None:
            tokenized_papers2 = papers[self.tokens_col2].apply(
                lambda x: np.str_(x))

            # get vectorizer and determine tfidf for papers
            vec2 = self.vectorizer2
            X2 = vec2.transform(tokenized_papers2)

            X = hstack([X, X2])

        for model_group in tqdm(self.models, desc='Test Review Groups'):

            # get the classifier
            classifier = self.models[model_group]

            # predictions as probabilities
            y_preds = classifier.predict_proba(X)

            probabilities = y_preds[:, 1]

            # store scores of model
            scores[model_group] = probabilities

        scores = pd.DataFrame.from_dict(scores)

        return scores
Exemplo n.º 31
0
def sample_factory(freq='D',
                   start_date='2001-01-01',
                   value_generator=np.random.normal,
                   timeseries_prefix="T",
                   array_prefix="A",
                   scalar_prefix="S",
                   missing_prefix="M",
                   number_of_arrays=20,
                   number_of_timeseries=26,
                   number_of_missings=5,
                   number_of_scalars=15,
                   length_of_data=100,
                   dataset=None):



    if not dataset:
        dataset = InformationSet()

    M = max(number_of_timeseries,number_of_arrays,number_of_missings)
    fmt = "%s%0"+str(len(str(M-1)))+"d"

    # Timeseries
    for i in range(number_of_timeseries):
        Sname = fmt % (timeseries_prefix,i)
        np.random.seed(i)
        values=value_generator(0,1,length_of_data)
        per = period_range(start_date,periods=len(values),freq=freq)
        series = Series(values,index=per)
        dataset[Sname]=series

    # Arrays
    for i in range(number_of_arrays):
        Sname = fmt % (array_prefix,i)
        np.random.seed(i)
        values=value_generator(0,1,length_of_data)
        series = Series(values)
        dataset[Sname]=series

    # Scalars
    for i in range(number_of_scalars):
        Sname = fmt % (scalar_prefix,i)
        np.random.seed(i)
        if i % 2:
            values=np.float_(value_generator(0,1,1))
        else:
            values=np.str_(value_generator(0,1,1))
        series = Series(values)
        dataset[Sname]=series

    # Also add some missings
    if number_of_missings>0:
        dataset.add_missing(*[ fmt % (missing_prefix,t) for t in range(number_of_missings)])

    return dataset
def get_vectorizer(column, X, ngram_range, tokenizer=False):
    if tokenizer:
        vectorizer = TfidfVectorizer(max_features=4000,
                                     stop_words='english',
                                     ngram_range=ngram_range,
                                     tokenizer=tokenize)
    else:
        vectorizer = TfidfVectorizer(max_features=4000,
                                     ngram_range=ngram_range)
    vectorizer.fit(X[column].apply(lambda x: np.str_(x)))
    return vectorizer
 def __str__(self):
     ret_str = str(self.x_size) + " x " + str(self.y_size) + \
                 " simulation grid:"
     for y in range(self.y_size):
         ret_str += "\n["
         for x in range(self.x_size):
             if x > 0:
                 ret_str += ", "
             ret_str += numpy.str_(str(round(self.grid[x, y].state)))
         ret_str += ']'
     return ret_str
Exemplo n.º 34
0
    def test_scalar_none_comparison(self):
        # Scalars should still just return False and not give a warnings.
        # The comparisons are flagged by pep8, ignore that.
        with warnings.catch_warnings(record=True) as w:
            warnings.filterwarnings("always", "", FutureWarning)
            assert_(not np.float32(1) == None)
            assert_(not np.str_("test") == None)
            # This is dubious (see below):
            assert_(not np.datetime64("NaT") == None)

            assert_(np.float32(1) != None)
            assert_(np.str_("test") != None)
            # This is dubious (see below):
            assert_(np.datetime64("NaT") != None)
        assert_(len(w) == 0)

        # For documentation purposes, this is why the datetime is dubious.
        # At the time of deprecation this was no behaviour change, but
        # it has to be considered when the deprecations are done.
        assert_(np.equal(np.datetime64("NaT"), None))
Exemplo n.º 35
0
def convert2np(x):
    if x == "nan":
        return np.nan
    elif type(x) is float:
        return np.float64(x)
    elif type(x) is int:
        return np.int64(x)
    elif type(x) is str:
        return np.str_(x)
    else:
        return x
Exemplo n.º 36
0
def _serialize_tensor_value(
    value: Any, type_spec: computation_types.TensorType
) -> Tuple[executor_pb2.Value, computation_types.TensorType]:
    """Serializes a tensor value into `executor_pb2.Value`.

  Args:
    value: A Numpy array or other object understood by `tf.make_tensor_proto`.
    type_spec: A `tff.TensorType`.

  Returns:
    A tuple `(value_proto, ret_type_spec)` in which `value_proto` is an instance
    of `executor_pb2.Value` with the serialized content of `value`,
    and `ret_type_spec` is the type of the serialized value. The `ret_type_spec`
    is the same as the argument `type_spec` if that argument was not `None`. If
    the argument was `None`, `ret_type_spec` is a type determined from `value`.

  Raises:
    TypeError: If the arguments are of the wrong types.
    ValueError: If the value is malformed.
  """
    original_value = value
    if tf.is_tensor(value):
        if isinstance(value, tf.Variable):
            value = value.read_value()
        if tf.executing_eagerly():
            value = value.numpy()
        else:
            # Attempt to extract the value using the current graph context.
            with tf.compat.v1.Session() as sess:
                value = sess.run(value)
    # If we got a string or bytes scalar, wrap it in numpy so it has a dtype and
    # shape.
    if isinstance(value, bytes):
        value = np.bytes_(value)
    elif isinstance(value, str):
        value = np.str_(value)
    else:
        value = np.asarray(value)
    if not tf.TensorShape(value.shape).is_compatible_with(type_spec.shape):
        raise TypeError(f'Cannot serialize tensor with shape {value.shape} to '
                        f'shape {type_spec.shape}.')
    if value.dtype != type_spec.dtype.as_numpy_dtype:
        try:
            value = value.astype(type_spec.dtype.as_numpy_dtype,
                                 casting='same_kind')
        except TypeError as te:
            value_type_string = py_typecheck.type_string(type(original_value))
            raise TypeError(
                f'Failed to serialize value of Python type {value_type_string} to '
                f'a tensor of type {type_spec}.\nValue: {original_value}'
            ) from te

    value_proto = _value_proto_for_np_array(value, type_spec)
    return value_proto, type_spec
Exemplo n.º 37
0
    def test_scalar_comparison_to_none(self):
        # Scalars should just return False and not give a warnings.
        # The comparisons are flagged by pep8, ignore that.
        with warnings.catch_warnings(record=True) as w:
            warnings.filterwarnings("always", "", FutureWarning)
            assert_(not np.float32(1) == None)
            assert_(not np.str_("test") == None)
            # This is dubious (see below):
            assert_(not np.datetime64("NaT") == None)

            assert_(np.float32(1) != None)
            assert_(np.str_("test") != None)
            # This is dubious (see below):
            assert_(np.datetime64("NaT") != None)
        assert_(len(w) == 0)

        # For documentation purposes, this is why the datetime is dubious.
        # At the time of deprecation this was no behaviour change, but
        # it has to be considered when the deprecations are done.
        assert_(np.equal(np.datetime64("NaT"), None))
Exemplo n.º 38
0
 def test_astype(self):
     import numpy as np
     a = np.bool_(True).astype(np.float32)
     assert type(a) is np.float32
     assert a == 1.0
     a = np.bool_(True).astype('int32')
     assert type(a) is np.int32
     assert a == 1
     a = np.str_('123').astype('int32')
     assert type(a) is np.int32
     assert a == 123
Exemplo n.º 39
0
    def test_ends_with(self):
        run_test_ends_with(self.strings, self.test_strings, self.delim)
        run_test_ends_with(self.strings, self.test_strings, np.str_(self.delim))
        run_test_ends_with(self.strings, self.test_strings, str.encode(str(self.delim)))

        # Test gremlins delimiters
        g = self._get_ak_gremlins()
        run_test_ends_with(g.gremlins_strings, g.gremlins_test_strings, ' ')
        run_test_ends_with(g.gremlins_strings, g.gremlins_test_strings, '"')
        with self.assertRaises(AssertionError):
            self.assertFalse(run_test_ends_with(g.gremlins_strings,
                                            g.gremlins_test_strings, ''))
Exemplo n.º 40
0
def predictSpam():

    if request.method == 'POST':
        text = ''
        text = str(request.form['txtmessage'])
        if text:
            df = prep.create_dataframe(text)
            df.drop(['text'], inplace=True, axis=1)

            #input_vectors = vec.create_vectors(df)

            x_vectors = x_vector.transform(
                df['cleaned_text'].apply(lambda x: np.str_(x)))

            #combine features
            selected_features = df.columns[1:]
            feature_set1 = df[selected_features]

            #converting panda frame=feature_set1 to compress sparse notatation
            input_vectors = hstack(
                [x_vectors, csr_matrix(feature_set1)], "csr")

            #vectors = X_vector.fit_transform(df).toarrary()
            prediction = model.predict(input_vectors)
            output = round(prediction[0])

        if not text:
            return render_template(
                'index.html',
                prediction_text="Sorry, input is required for analysis!")

        else:
            '''
            if output == 0:
                return jsonify({output:"This is legit message"})
            else:
                return jsonify({output:"This is Spam message"})
            '''
            if output == 0:
                return render_template(
                    'index.html',
                    prediction_text=
                    "This is legit message! Model Predicted Value =  " +
                    str(output))
            else:
                return render_template(
                    'index.html',
                    prediction_text=
                    "This is spam. Be careful! Model Predicted Value = " +
                    str(output))

    else:
        return render_template('index.html')
Exemplo n.º 41
0
    def test_builtin(self):
        import numpy as np

        assert int(np.str_("12")) == 12
        exc = raises(ValueError, "int(np.str_('abc'))")
        assert exc.value.message.startswith("invalid literal for int()")
        assert int(np.uint64((2 << 63) - 1)) == (2 << 63) - 1
        exc = raises(ValueError, "int(np.float64(np.nan))")
        assert str(exc.value) == "cannot convert float NaN to integer"
        exc = raises(OverflowError, "int(np.float64(np.inf))")
        assert str(exc.value) == "cannot convert float infinity to integer"
        assert int(np.float64(1e100)) == int(1e100)
        assert long(np.float64(1e100)) == int(1e100)
        assert int(np.complex128(1e100 + 2j)) == int(1e100)
        exc = raises(OverflowError, "int(np.complex64(1e100+2j))")
        assert str(exc.value) == "cannot convert float infinity to integer"
        assert int(np.str_("100000000000000000000")) == 100000000000000000000
        assert long(np.str_("100000000000000000000")) == 100000000000000000000

        assert float(np.float64(1e100)) == 1e100
        assert float(np.complex128(1e100 + 2j)) == 1e100
        assert float(np.str_("1e100")) == 1e100
        assert float(np.str_("inf")) == np.inf
        assert str(float(np.float64(np.nan))) == "nan"

        assert oct(np.int32(11)) == "013"
        assert oct(np.float32(11.6)) == "013"
        assert oct(np.complex64(11 - 12j)) == "013"
        assert hex(np.int32(11)) == "0xb"
        assert hex(np.float32(11.6)) == "0xb"
        assert hex(np.complex64(11 - 12j)) == "0xb"
        assert bin(np.int32(11)) == "0b1011"
        exc = raises(TypeError, "bin(np.float32(11.6))")
        assert "index" in exc.value.message
        exc = raises(TypeError, "len(np.int32(11))")
        assert "has no len" in exc.value.message
        assert len(np.string_("123")) == 3
Exemplo n.º 42
0
def now_name(name_base, name_extension, time_step=1e6):
    """ insert a time stamp into the filename_ before .extension

    Args:
        name_base: file name first part - may include directory path
        name_extension: file extension without a period
        time_step: minimum time between two time stamps

    Returns:
        time_stamped_file_name: concatenation of the inputs with time-stamp
    """
    nstr = np.str_(int(time.time() * time_step))
    time_stamped_file_name = name_base + '_' + nstr + '.' + name_extension

    return time_stamped_file_name
Exemplo n.º 43
0
 def test_dataframe_roundtrip(self):
     if self.should_skip:
         return self.skip('pandas is not importable')
     df = pd.DataFrame({
         'an_int': np.int_([1, 2, 3]),
         'a_float': np.float_([2.5, 3.5, 4.5]),
         'a_nan': np.array([np.nan] * 3),
         'a_minus_inf': np.array([-np.inf] * 3),
         'an_inf': np.array([np.inf] * 3),
         'a_str': np.str_('foo'),
         'a_unicode': np.unicode_('bar'),
         'date': np.array([np.datetime64('2014-01-01')] * 3),
         'complex': np.complex_([1 - 2j, 2 - 1.2j, 3 - 1.3j]),
         # TODO: the following dtypes are not currently supported.
         # 'object': np.object_([{'a': 'b'}]*3),
     })
     decoded_df = self.roundtrip(df)
     assert_frame_equal(decoded_df, df)
Exemplo n.º 44
0
 def test_series_roundtrip(self):
     if self.should_skip:
         return self.skip('pandas is not importable')
     ser = pd.Series({
         'an_int': np.int_(1),
         'a_float': np.float_(2.5),
         'a_nan': np.nan,
         'a_minus_inf': -np.inf,
         'an_inf': np.inf,
         'a_str': np.str_('foo'),
         'a_unicode': np.unicode_('bar'),
         'date': np.datetime64('2014-01-01'),
         'complex': np.complex_(1 - 2j),
         # TODO: the following dtypes are not currently supported.
         # 'object': np.object_({'a': 'b'}),
     })
     decoded_ser = self.roundtrip(ser)
     assert_series_equal(decoded_ser, ser)
Exemplo n.º 45
0
 def test_generic_roundtrip(self):
     values = [
         np.int_(1),
         np.int32(-2),
         np.float_(2.5),
         np.nan,
         -np.inf,
         np.inf,
         np.datetime64('2014-01-01'),
         np.str_('foo'),
         np.unicode_('bar'),
         np.object_({'a': 'b'}),
         np.complex_(1 - 2j)
     ]
     for value in values:
         decoded = self.roundtrip(value)
         assert_equal(decoded, value)
         self.assertTrue(isinstance(decoded, type(value)))
Exemplo n.º 46
0
    def test_multindex_dataframe_roundtrip(self):
        if self.should_skip:
            return self.skip('pandas is not importable')

        df = pd.DataFrame({
            'idx_lvl0': ['a', 'b', 'c'],
            'idx_lvl1': np.int_([1, 1, 2]),
            'an_int': np.int_([1, 2, 3]),
            'a_float': np.float_([2.5, 3.5, 4.5]),
            'a_nan': np.array([np.nan] * 3),
            'a_minus_inf': np.array([-np.inf] * 3),
            'an_inf': np.array([np.inf] * 3),
            'a_str': np.str_('foo'),
            'a_unicode': np.unicode_('bar'),
        })
        df = df.set_index(['idx_lvl0', 'idx_lvl1', ])

        decoded_df = self.roundtrip(df)
        assert_frame_equal(decoded_df, df)
Exemplo n.º 47
0
 def test_generic_roundtrip(self):
     if self.should_skip:
         return self.skip("numpy is not importable")
     values = [
         np.int_(1),
         np.int32(-2),
         np.float_(2.5),
         np.nan,
         -np.inf,
         np.inf,
         np.datetime64("2014-01-01"),
         np.str_("foo"),
         np.unicode_("bar"),
         np.object_({"a": "b"}),
         np.complex_(1 - 2j),
     ]
     for value in values:
         decoded = self.roundtrip(value)
         assert_equal(decoded, value)
         self.assertTrue(isinstance(decoded, type(value)))
Exemplo n.º 48
0
def decode_table(data, encoding='ascii', native=True):
    '''
    Decode byte strings in a table into unicode strings

    Args:
        data : numpy structured array or astropy Table

    Options:
        encoding : encoding to use for converting bytes into unicode;
            default 'ascii'; if None, try ENCODING keyword in data instead
        native : if True (default), only decode if native str type is unicode
            (i.e. python3 but not python2)

    Note: `encoding` option overides data.meta['ENCODING'];
        use encoding=None to use data.meta['ENCODING'] instead
    '''
    from astropy.table import Table
    import numpy as np
    try:
        table = Table(data, copy=False)
    except ValueError:  #- https://github.com/astropy/astropy/issues/5298
        table = Table(data, copy=True)

    #- Check if native str type is bytes
    if native and np.str_('a').dtype.kind == 'S':
        return table

    encoding = _pick_encoding(table, encoding)
    for col in table.colnames:
        dtype = table[col].dtype
        if dtype.kind == 'S':
            Un = 'U{}'.format(_dtype_size(dtype))
            table.replace_column(col, np.char.decode(table[col], encoding=encoding).astype(Un))

    table.meta['ENCODING'] = encoding
    return table
Exemplo n.º 49
0
Creator: Phil Bentley
"""
__version_info__ = (0, 0, 8, 'beta', 0)
__version__ = "%d.%d.%d-%s" % __version_info__[0:4]

import sys, os, logging, types
import ply.lex as lex
from ply.lex import TOKEN
import ply.yacc as yacc
import netCDF4 as nc4
import numpy as np

# default fill values for netCDF-3 data types (as defined in netcdf.h include file)
NC_FILL_BYTE   = np.int8(-127)
NC_FILL_CHAR   = np.str_('\0')
NC_FILL_SHORT  = np.int16(-32767)
NC_FILL_INT    = np.int32(-2147483647)
NC_FILL_FLOAT  = np.float32(9.9692099683868690e+36)   # should get rounded to 9.96921e+36
NC_FILL_DOUBLE = np.float64(9.9692099683868690e+36)

# miscellaneous constants as defined in the ncgen3.l file
FILL_STRING = "_"
XDR_INT_MIN = -2147483648
XDR_INT_MAX =  2147483647

# netcdf to numpy data type map
NC_NP_DATA_TYPE_MAP = {
   'byte':    'b',
   'char':    'c',
   'short':   'h',
Exemplo n.º 50
0
 def test_string(self):
     lr = LogisticRegression()
     for col in ['features', u'features', np.str_('features')]:
         lr.setFeaturesCol(col)
         self.assertEqual(lr.getFeaturesCol(), 'features')
     self.assertRaises(TypeError, lambda: LogisticRegression(featuresCol=2.3))
Exemplo n.º 51
0
            # crop to 100x100
            result = result.crop((50, 50, 150, 150))

            # random brightness
            enhancer = ImageEnhance.Brightness(result)
            result = enhancer.enhance(random.random() + 0.5)

            # save result to HDF5 DB
            dset = f.create_dataset('%07d' % x, (100, 100), dtype='uint8')
            dset[...] = np.array(result)
            
            # update mean
            datasetMean += dset[...].astype('double') / N

            # set attributes for grayscale images
            dset.attrs['CLASS'] = np.str_('IMAGE')
            dset.attrs['VERSION'] = np.str_('1.2')
            dset.attrs['IMAGE_SUBCLASS'] = np.str_('IMAGE_GRAYSCALE')
            dset.attrs['IMAGE_WHITE_IS_ZERO'] = np.uint8(0)
            
            # save attributes for training
            dset.attrs['HAS_SPHERE'] = np.uint8(hasSphere)
            if (hasSphere):
                dset.attrs['RADIUS'] = np.float(sphereDiameter / 2)
                dset.attrs['CENTER_X'] = np.float(sphereCenter[0] - 50)
                dset.attrs['CENTER_Y'] = np.float(sphereCenter[1] - 50)
        except IOError as e:
            print('I/O Error(%d): %s' % (e.errno, e.strerror))
            
        x += 1
    for item in feat_lst:
        attr  = item['name'].split('/')[-2].split('_')[0].split('-')[-1]
        video = item['name'].split('/')[-2].split('_')[1]
        frame = item['frame']
        # feature -> scores
        feature_key = '_'.join([attr, video, frame])
        try:
            score_key   = '_'.join([attr, video, index.data[feature_key]])
            # The score
            score = prof.data[tracker_type][score_key]
            # The feature
            feat_name   = feature_dir+ feat_type+'/'+attr+'_'+video+'-'+frame+'.'+feat_type
            feature     = _upack_feature_(feat_name)
            svm_feature = _convert_svm_format_(feature)
            # Gen_svm
            line = np.str_(score[0]) + ' ' + svm_feature + '\n'
            output_list.append(line)
            output.write(line)
            count = count +1
        except: 
            pass
        #break

print 'Done,', count, 'features saved in', output_name
output.close()


output_train = open(output_name + '_train', 'w+')
output_test  = open(output_name + '_test', 'w+')

shuffle(output_list)
Exemplo n.º 53
0
    np.bool_().dtype.num      : 'bit',
    np.uint8().dtype.num      : 'unsignedByte',
    np.int16().dtype.num      : 'short',
    np.int32().dtype.num      : 'int',
    np.int64().dtype.num      : 'long',
    np.complex64().dtype.num  : 'floatComplex',
    np.complex128().dtype.num : 'doubleComplex',
    np.unicode_().dtype.num   : 'unicodeChar'
}


# numpy 1.4.1 doesn't have a "bytes_" type
if hasattr(np, 'bytes_'):
    numpy_dtype_to_field_mapping[np.bytes_().dtype.num] = 'char'
else:
    numpy_dtype_to_field_mapping[np.str_().dtype.num] = 'char'


def _all_bytes(column):
    for x in column:
        if not isinstance(x, bytes):
            return False
    return True


def _all_unicode(column):
    for x in column:
        if not isinstance(x, unicode):
            return False
    return True
Exemplo n.º 54
0
            abaixo = get_pixel(img, x, y+1)
            values = limiar(centro, [acima_esquerda, acima, acima_direita, direta, abaixo_direita, abaixo,
                                     abaixo_esquerda, esquerda])
            weights = [1, 2, 4, 8, 16, 32, 64, 128]
            res = 0
            for a in range(0, values.__len__()):
                res += weights[a] * values[a]
            img_lbp[x, y] = res
    return img_lbp

# Selecionando as imagens

n_query = 2  # 1 - 2
n_image = 8  # 1 - 8

img_aux_1 = cv2.imread('../imagens/query_' + str_(n_query) + '.jpg', 0)
img_aux_2 = cv2.imread('../imagens/query_' + str_(n_query) + '.jpg', 0)
img_aux_2 = cv2.imread('../imagens/texture_sample_' + str_(n_image) + '.jpg', 0)
img_lbp = cv2.imread('../imagens/query_' + str_(n_query) + '.jpg', 0)
amostra_lbp = cv2.imread('../imagens/texture_sample_' + str_(n_image) + '.jpg', 0)
img_lbp = get_lbp(img_aux_1, img_lbp)
amostra_lbp = get_lbp(img_aux_2, amostra_lbp)

plt.subplot(121), plt.imshow(img_lbp, cmap='gray')
plt.title('LBP Query ' + str_(n_query)), plt.xticks([]), plt.yticks([])

plt.subplot(122), plt.imshow(amostra_lbp, cmap='gray')
plt.title('LBP Sample ' + str_(n_image)), plt.xticks([]), plt.yticks([])

plt.show()
Exemplo n.º 55
0
def test_numpy_dtype_string():
    """
    Test Python2/3 string compatibility with Python data type
    """
    np.empty(1, dtype=[(np.str_('a'), '?')])
Exemplo n.º 56
0
 def test_string_boxes(self):
     from numpy import str_
     assert isinstance(str_(3), str_)
     assert str_(3) == '3'
     assert str(str_(3)) == '3'
     assert repr(str_(3)) == "'3'"
Exemplo n.º 57
0
 def test_numpy_str(self):
     array = self.h5file.get_node(numpy.str_('/'), numpy.str_('a'))
     self.assertEqual(array.shape, (3, 1))
Exemplo n.º 58
0
 def test_accepts_numpy_string(self):
     numpy_string = numpy.str_("this is a numpy string!")
     a = A()
     a.string = numpy_string
     self.assertEqual(a.string, numpy_string)
     self.assertIs(type(a.string), str)
Exemplo n.º 59
0
        return pd.CategoricalIndex(data, categories=cats,
                                   ordered=idx.ordered, name=idx.name)
    elif typ is pd.MultiIndex:
        levels = [_nonempty_index(i) for i in idx.levels]
        labels = [[0, 0] for i in idx.levels]
        return pd.MultiIndex(levels=levels, labels=labels, names=idx.names)
    raise TypeError("Don't know how to handle index of "
                    "type {0}".format(type(idx).__name__))


_simple_fake_mapping = {
    'b': np.bool_(True),
    'V': np.void(b' '),
    'M': np.datetime64('1970-01-01'),
    'm': np.timedelta64(1),
    'S': np.str_('foo'),
    'a': np.str_('foo'),
    'U': np.unicode_('foo'),
    'O': 'foo'
}


def _scalar_from_dtype(dtype):
    if dtype.kind in ('i', 'f', 'u'):
        return dtype.type(1)
    elif dtype.kind == 'c':
        return dtype.type(complex(1, 0))
    elif dtype.kind in _simple_fake_mapping:
        o = _simple_fake_mapping[dtype.kind]
        return o.astype(dtype) if dtype.kind in ('m', 'M') else o
    else: