def test_dumps_datelike_string_does_not_roundtrip(self): """A string that looks like a date *will* be interpreted as a date. If for whatever reason, you don't want that to happen, yo'll need to do some pre or post-processing to fixup the results. """ orig_dict = dict(created_at="2011-01-01") expected = dict(created_at=datetime.date(2011, 1, 1)) self.assertEqual(expected, jsondate3.loads(jsondate3.dumps(orig_dict)))
def test_dump_unicode_roundtrips(self): orig_dict = {u"foo": u"bar", "empty": u""} # json module broken: unicode objects, empty-string objects are str result = json.loads(json.dumps(orig_dict)) self.assertTypeAndValue(six.text_type, u"bar", result[u"foo"]) self.assertTypeAndValue(six.text_type, "", result[u"empty"]) # jsondate fix: always return unicode objects result = jsondate3.loads(jsondate3.dumps(orig_dict)) self.assertTypeAndValue(six.text_type, u"bar", result[u"foo"]) self.assertTypeAndValue(six.text_type, u"", result[u"empty"])
def parse_files(self, path_root, file_ext, test_class): """Can we do a simple query and parse?""" paths = [] for root, dirnames, filenames in os.walk(path_root): for filename in fnmatch.filter(filenames, file_ext): paths.append(os.path.join(root, filename)) paths.sort() path_max_len = max(len(path) for path in paths) + 2 for i, path in enumerate(paths): t1 = time.time() sys.stdout.write("%s. Doing %s" % (i, path.ljust(path_max_len))) dirname, filename = os.path.split(path) filename_sans_ext = filename.split(".")[0] json_path = os.path.join(dirname, "%s.json" % filename_sans_ext) court = filename_sans_ext.split("_")[0] report = test_class(court) with open(path, "r") as f: report._parse_text(f.read()) # Does the metadata function work too? It usually, but not always, # gets called by report.data try: _ = report.metadata except AttributeError: # Some reports don't have this method. pass data = report.data if not os.path.exists(json_path): with open(json_path, "w") as f: print("Creating new file at %s" % json_path) json.dump(data, f, indent=2, sort_keys=True) continue data = json.loads(json.dumps(data, sort_keys=True)) with open(json_path) as f: j = json.load(f) with self.subTest("Parsing PACER", file=filename, klass=test_class): self.assertEqual(j, data) t2 = time.time() duration = t2 - t1 warn_or_crash_slow_parser(duration, max_duration=2) sys.stdout.write("✓\n")
#!/usr/bin/env python # # Takes an .html file on the command line, parses it using the PACER # Docket Report parser, and outputs json to stdout. import jsondate3 as json import sys from juriscraper.pacer.http import PacerSession from juriscraper.pacer import DocketReport pacer_session = PacerSession(username="******", password="******") report = DocketReport("psc", pacer_session) for path in sys.argv[1:]: with open(path, "r") as f: report._parse_text(f.read().decode("utf-8")) data = report.data print json.dumps(data, indent=2, sort_keys=True, separators=(",", ": "))
def test_unexpected_type_raises(self): dict_ = {"foo": set(["a"])} with self.assertRaises(TypeError): jsondate3.dumps(dict_)
def test_dumps_date_roundtrips(self): orig_dict = dict(created_at=datetime.date(2011, 1, 1)) self.assertEqual(orig_dict, jsondate3.loads(jsondate3.dumps(orig_dict)))
def test_dumps_none_roundtrips(self): # Generates a TypeError from _datetime_object_hook orig_dict = dict(foo=None) self.assertEqual(orig_dict, jsondate3.loads(jsondate3.dumps(orig_dict)))
def test_dumps_str_roundtrips(self): # Generates a ValueError from _datetime_object_hook orig_dict = dict(foo="bar") self.assertEqual(orig_dict, jsondate3.loads(jsondate3.dumps(orig_dict)))
def test_dumps_empty_roundtrips(self): self.assertEqual({}, jsondate3.loads(jsondate3.dumps({})))