예제 #1
0
 def test_save(self):
     t = XArray([1, 2, 3])
     path = '{}/tmp/array-csv'.format(hdfs_prefix)
     t.save(path)
     success_path = os.path.join(path, '_SUCCESS')
     self.assertTrue(fileio.is_file(success_path))
     fileio.delete(path)
예제 #2
0
 def test_save_format(self):
     t = XArray([1, 2, 3])
     path = '{}/tmp/array-csv'.format(hdfs_prefix)
     t.save(path, format='csv')
     with fileio.open_file(path) as f:
         self.assertEqual('1', f.readline().strip())
         self.assertEqual('2', f.readline().strip())
         self.assertEqual('3', f.readline().strip())
     fileio.delete(path)
예제 #3
0
    def user_features(self):
        """
        The user features.

        Underlying model parameters.
        """
        return XArray.from_rdd(self.model.userFeatures(), list)
예제 #4
0
    def item_features(self):
        """
        The item features.

        Underlying model parameters.
        """
        return XArray.from_rdd(self.model.productFeatures(), list)
예제 #5
0
    def _base_predict(self, data):
        """
        Call the model's predict function.
        
        Data can be a single item or a collection of items.
        """
        
        features = self.make_features(data)
        if isinstance(features, DenseVector): 
            return self.model.predict(features)
        if isinstance(features, XArray) and issubclass(features.dtype(), DenseVector):
            res = self.model.predict(features.to_spark_rdd())
            return XArray.from_rdd(res, float)

        raise TypeError('must pass a DenseVector or XArray of DenseVector')
예제 #6
0
def crossproduct(d):
    """
    Create an SFrame containing the crossproduct of all provided options.

    Parameters
    ----------
    d : dict
        Each key is the name of an option, and each value is a list
        of the possible values for that option.

    Returns
    -------
    out : SFrame
        There will be a column for each key in the provided dictionary,
        and a row for each unique combination of all values.

    Example
    -------
    settings = {'argument_1':[0, 1],
                'argument_2':['a', 'b', 'c']}
    print crossproduct(settings)
    +------------+------------+
    | argument_2 | argument_1 |
    +------------+------------+
    |     a      |     0      |
    |     a      |     1      |
    |     b      |     0      |
    |     b      |     1      |
    |     c      |     0      |
    |     c      |     1      |
    +------------+------------+
    [6 rows x 2 columns]
    """

    from xframes import XArray
    d = [zip(d.keys(), x) for x in itertools.product(*d.values())]
    sa = [{k: v for (k, v) in x} for x in d]
    return XArray(sa).unpack(column_name_prefix='')
예제 #7
0
 def test_construct_local_file_dict(self):
     t = XArray('{}/user/xpatterns/files/test-array-dict'.format(hdfs_prefix))
     self.assertEqualLen(4, t)
     self.assertIs(dict, t.dtype())
     self.assertDictEqual({1: 'a', 2: 'b'}, t[0])
예제 #8
0
 def test_construct_local_file_list(self):
     t = XArray('{}/user/xpatterns/files/test-array-list'.format(hdfs_prefix))
     self.assertEqualLen(4, t)
     self.assertIs(list, t.dtype())
     self.assertListEqual([1, 2], t[0])
예제 #9
0
 def test_construct_local_file_str(self):
     t = XArray('{}/user/xpatterns/files/test-array-str'.format(hdfs_prefix))
     self.assertEqualLen(4, t)
     self.assertIs(str, t.dtype())
     self.assertEqual('a', t[0])
예제 #10
0
 def test_construct_local_file_float(self):
     t = XArray('{}/user/xpatterns/files/test-array-float'.format(hdfs_prefix))
     self.assertEqualLen(4, t)
     self.assertIs(float, t.dtype())
     self.assertEqual(1.0, t[0])
예제 #11
0
 def test_construct_file_int(self):
     path = '{}/user/xpatterns/files/test-array-int'.format(hdfs_prefix)
     t = XArray(path)
     self.assertEqualLen(4, t)
     self.assertIs(int, t.dtype())
     self.assertEqual(1, t[0])