def check_terms(self, terms, expected, initial_workspace, mask): """ Compile the given terms into a TermGraph, compute it with initial_workspace, and compare the results with ``expected``. """ graph = TermGraph(terms) results = self.run_graph(graph, initial_workspace, mask) for key, (res, exp) in dzip_exact(results, expected).items(): check_arrays(res, exp)
def assert_dict_equal(result, expected, path=(), msg="", **kwargs): _check_sets(viewkeys(result), viewkeys(expected), msg, path + (".%s()" % ("viewkeys" if PY2 else "keys"),), "key") failures = [] for k, (resultv, expectedv) in iteritems(dzip_exact(result, expected)): try: assert_equal(resultv, expectedv, path=path + ("[%r]" % k,), msg=msg, **kwargs) except AssertionError as e: failures.append(str(e)) if failures: raise AssertionError("\n".join(failures))
def assert_dict_equal(result, expected, path=(), msg='', **kwargs): if path is None: path = () result_keys = viewkeys(result) expected_keys = viewkeys(expected) if result_keys != expected_keys: if result_keys > expected_keys: diff = result_keys - expected_keys msg = 'extra %s in result: %r' % (_s('key', diff), diff) elif result_keys < expected_keys: diff = expected_keys - result_keys msg = 'result is missing %s: %r' % (_s('key', diff), diff) else: sym = result_keys ^ expected_keys in_result = sym - expected_keys in_expected = sym - result_keys msg = '%s only in result: %s\n%s only in expected: %s' % ( _s('key', in_result), in_result, _s('key', in_expected), in_expected, ) raise AssertionError( '%sdict keys do not match\n%s' % ( _fmt_msg(msg), _fmt_path(path + ('.%s()' % ('viewkeys' if PY2 else 'keys'),)), ), ) failures = [] for k, (resultv, expectedv) in iteritems(dzip_exact(result, expected)): try: assert_equal( resultv, expectedv, path=path + ('[%r]' % k,), msg=msg, **kwargs ) except AssertionError as e: failures.append(str(e)) if failures: raise AssertionError('\n'.join(failures))
def assert_dict_equal(result, expected, path=(), msg='', **kwargs): _check_sets( viewkeys(result), viewkeys(expected), msg, path + ('.%s()' % ('viewkeys' if PY2 else 'keys'), ), 'key', ) failures = [] for k, (resultv, expectedv) in iteritems(dzip_exact(result, expected)): try: assert_equal(resultv, expectedv, path=path + ('[%r]' % (k, ), ), msg=msg, **kwargs) except AssertionError as e: failures.append(str(e)) if failures: raise AssertionError('\n===\n'.join(failures))
def assert_dict_equal(result, expected, path=(), msg="", **kwargs): _check_sets( result.keys(), expected.keys(), msg, path + (".keys()", ), "key", ) failures = [] for k, (resultv, expectedv) in dzip_exact(result, expected).items(): try: assert_equal(resultv, expectedv, path=path + ("[%r]" % (k, ), ), msg=msg, **kwargs) except AssertionError as e: failures.append(str(e)) if failures: raise AssertionError("\n===\n".join(failures))
def check_terms(self, terms, expected, initial_workspace, mask, check=check_arrays): """ Compile the given terms into a TermGraph, compute it with initial_workspace, and compare the results with ``expected``. """ start_date, end_date = mask.index[[0, -1]] graph = ExecutionPlan( domain=US_EQUITIES, terms=terms, start_date=start_date, end_date=end_date, ) results = self.run_graph(graph, initial_workspace, mask) for key, (res, exp) in dzip_exact(results, expected).items(): check(res, exp) return results
def assert_dict_equal(result, expected, path=(), msg='', **kwargs): _check_sets( viewkeys(result), viewkeys(expected), msg, path + ('.%s()' % ('viewkeys' if PY2 else 'keys'),), 'key', ) failures = [] for k, (resultv, expectedv) in iteritems(dzip_exact(result, expected)): try: assert_equal( resultv, expectedv, path=path + ('[%r]' % (k,),), msg=msg, **kwargs ) except AssertionError as e: failures.append(str(e)) if failures: raise AssertionError('\n'.join(failures))
def test_normalizations_hand_computed(self): """ Test the hand-computed example in factor.demean. """ f = self.f m = Mask() c = C() str_c = C(dtype=categorical_dtype, missing_value=None) factor_data = array([[1.0, 2.0, 3.0, 4.0], [1.5, 2.5, 3.5, 1.0], [2.0, 3.0, 4.0, 1.5], [2.5, 3.5, 1.0, 2.0]], ) filter_data = array( [[False, True, True, True], [True, False, True, True], [True, True, False, True], [True, True, True, False]], dtype=bool, ) classifier_data = array( [[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]], dtype=int64_dtype, ) string_classifier_data = LabelArray( classifier_data.astype(str).astype(object), missing_value=None, ) terms = { 'vanilla': f.demean(), 'masked': f.demean(mask=m), 'grouped': f.demean(groupby=c), 'grouped_str': f.demean(groupby=str_c), 'grouped_masked': f.demean(mask=m, groupby=c), 'grouped_masked_str': f.demean(mask=m, groupby=str_c), } expected = { 'vanilla': array([[-1.500, -0.500, 0.500, 1.500], [-0.625, 0.375, 1.375, -1.125], [-0.625, 0.375, 1.375, -1.125], [0.250, 1.250, -1.250, -0.250]], ), 'masked': array( [[nan, -1.000, 0.000, 1.000], [-0.500, nan, 1.500, -1.000], [-0.166, 0.833, nan, -0.666], [0.166, 1.166, -1.333, nan]], ), 'grouped': array([[-0.500, 0.500, -0.500, 0.500], [-0.500, 0.500, 1.250, -1.250], [-0.500, 0.500, 1.250, -1.250], [-0.500, 0.500, -0.500, 0.500]], ), 'grouped_masked': array([[nan, 0.000, -0.500, 0.500], [0.000, nan, 1.250, -1.250], [-0.500, 0.500, nan, 0.000], [-0.500, 0.500, 0.000, nan]]) } # Changing the classifier dtype shouldn't affect anything. expected['grouped_str'] = expected['grouped'] expected['grouped_masked_str'] = expected['grouped_masked'] graph = TermGraph(terms) results = self.run_graph( graph, initial_workspace={ f: factor_data, c: classifier_data, str_c: string_classifier_data, m: filter_data, }, mask=self.build_mask(self.ones_mask(shape=factor_data.shape)), ) for key, (res, exp) in dzip_exact(results, expected).items(): check_allclose( res, exp, # The hand-computed values aren't very precise (in particular, # we truncate repeating decimals at 3 places) This is just # asserting that the example isn't misleading by being totally # wrong. atol=0.001, err_msg="Mismatch for %r" % key)
def test_normalizations_hand_computed(self): """ Test the hand-computed example in factor.demean. """ f = self.f m = Mask() c = C() str_c = C(dtype=categorical_dtype, missing_value=None) factor_data = array( [[1.0, 2.0, 3.0, 4.0], [1.5, 2.5, 3.5, 1.0], [2.0, 3.0, 4.0, 1.5], [2.5, 3.5, 1.0, 2.0]], ) filter_data = array( [[False, True, True, True], [True, False, True, True], [True, True, False, True], [True, True, True, False]], dtype=bool, ) classifier_data = array( [[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]], dtype=int64_dtype, ) string_classifier_data = LabelArray( classifier_data.astype(str).astype(object), missing_value=None, ) terms = { 'vanilla': f.demean(), 'masked': f.demean(mask=m), 'grouped': f.demean(groupby=c), 'grouped_str': f.demean(groupby=str_c), 'grouped_masked': f.demean(mask=m, groupby=c), 'grouped_masked_str': f.demean(mask=m, groupby=str_c), } expected = { 'vanilla': array( [[-1.500, -0.500, 0.500, 1.500], [-0.625, 0.375, 1.375, -1.125], [-0.625, 0.375, 1.375, -1.125], [0.250, 1.250, -1.250, -0.250]], ), 'masked': array( [[nan, -1.000, 0.000, 1.000], [-0.500, nan, 1.500, -1.000], [-0.166, 0.833, nan, -0.666], [0.166, 1.166, -1.333, nan]], ), 'grouped': array( [[-0.500, 0.500, -0.500, 0.500], [-0.500, 0.500, 1.250, -1.250], [-0.500, 0.500, 1.250, -1.250], [-0.500, 0.500, -0.500, 0.500]], ), 'grouped_masked': array( [[nan, 0.000, -0.500, 0.500], [0.000, nan, 1.250, -1.250], [-0.500, 0.500, nan, 0.000], [-0.500, 0.500, 0.000, nan]] ) } # Changing the classifier dtype shouldn't affect anything. expected['grouped_str'] = expected['grouped'] expected['grouped_masked_str'] = expected['grouped_masked'] graph = TermGraph(terms) results = self.run_graph( graph, initial_workspace={ f: factor_data, c: classifier_data, str_c: string_classifier_data, m: filter_data, }, mask=self.build_mask(self.ones_mask(shape=factor_data.shape)), ) for key, (res, exp) in dzip_exact(results, expected).items(): check_allclose( res, exp, # The hand-computed values aren't very precise (in particular, # we truncate repeating decimals at 3 places) This is just # asserting that the example isn't misleading by being totally # wrong. atol=0.001, err_msg="Mismatch for %r" % key )