def test_invalid_rfc3339_timestamps(self): """ Validate that datetime strings that are valid ISO 8601 but invalid RFC 3339 trigger a ValueError when passed to RFC 3339, and that this ValueError explicitly mentions RFC 3339. """ for timestamp in [ "2018-01-02", # Missing mandatory time "2018-01-02T03", # Missing mandatory minute and second "2018-01-02T03Z", # Missing mandatory minute and second "2018-01-02T03:04", # Missing mandatory minute and second "2018-01-02T03:04Z", # Missing mandatory minute and second "2018-01-02T03:04:01+04", # Missing mandatory offset minute "2018-01-02T03:04:05", # Missing mandatory offset "2018-01-02T03:04:05.12345", # Missing mandatory offset "2018-01-02T24:00:00Z", # 24:00:00 is not valid in RFC 3339 '20180102T03:04:05-12:34', # Missing mandatory date separators '2018-01-02T030405-12:34', # Missing mandatory time separators '2018-01-02T03:04:05-1234', # Missing mandatory offset separator '2018-01-02T03:04:05,12345Z' # Invalid comma fractional second separator ]: with self.assertRaisesRegex( ValueError, r"RFC 3339", msg= "Timestamp '{0}' was supposed to be invalid, but parsing it didn't raise ValueError." .format(timestamp)): ciso8601.parse_rfc3339(timestamp)
def munge(dct: Dict[str, str]) -> AttrDict: dct = AttrDict(**dct) if 'text' in dct: dct.text = html.escape(dct.text) for pat, repl in HTML_FILTERS.items(): dct.text = pat.sub(repl, dct.text) for pat, repl in ABBREVS.items(): dct.text = pat.sub(repl, dct.text) dct.text = dct.text.replace('\N{STX}', '<').replace('\N{ETX}', '>') if 'timestamp' in dct: dct.timestamp = parse_rfc3339(dct.timestamp) if 'self' in dct: dct.self = parse_rfc3339(dct.self) return dct
def safe_parse_rfc3339(time_string): """ >>> from datetime import datetime >>> isinstance(safe_parse_rfc3339('2018-01-01T10:00:00.52Z'), datetime) True >>> safe_parse_rfc3339('2018-01-01T10:00:00.52Z') datetime.datetime(2018, 1, 1, 10, 0, 0, 520000, tzinfo=datetime.timezone.utc) >>> safe_parse_rfc3339('2018-01-01T10:00:58-06:00') datetime.datetime(2018, 1, 1, 10, 0, 58, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=64800))) >>> safe_parse_rfc3339('2018-05-10') is None True >>> safe_parse_rfc3339('malformed $h14') is None True >>> safe_parse_rfc3339('2018-01-01T00:00:00') is None True >>> safe_parse_rfc3339(None) is None True """ if not isinstance(time_string, str): return None try: return parse_rfc3339(time_string) except ValueError: # noqa return None
def test_valid_rfc3339_timestamps(self): """ Validate that valid RFC 3339 datetimes are parseable by parse_rfc3339 and produce the same result as parse_datetime. """ for string in [ '2018-01-02T03:04:05Z', '2018-01-02t03:04:05z', '2018-01-02 03:04:05z', '2018-01-02T03:04:05+00:00', '2018-01-02T03:04:05-00:00', '2018-01-02T03:04:05.12345Z', '2018-01-02T03:04:05+01:23', '2018-01-02T03:04:05-12:34', '2018-01-02T03:04:05-12:34', ]: self.assertEqual(ciso8601.parse_datetime(string), ciso8601.parse_rfc3339(string))
def test_valid_rfc3339_timestamps(self): """ Validate that valid RFC 3339 datetimes are parseable by parse_rfc3339 and produce the same result as parse_datetime. """ for string in [ "2018-01-02T03:04:05Z", "2018-01-02t03:04:05z", "2018-01-02 03:04:05z", "2018-01-02T03:04:05+00:00", "2018-01-02T03:04:05-00:00", "2018-01-02T03:04:05.12345Z", "2018-01-02T03:04:05+01:23", "2018-01-02T03:04:05-12:34", "2018-01-02T03:04:05-12:34", ]: self.assertEqual( parse_datetime(string), parse_rfc3339(string) )
def valid_timestamp(s: str) -> str: try: ciso8601.parse_rfc3339(s) except ValueError as e: raise argparse.ArgumentTypeError(e.args[0]) return s
def _time_check(v): # Return datetime.datetime here to avoid parsing twice in places return ciso8601.parse_rfc3339(v)
from ciso8601 import parse_rfc3339 from jnrbase.attrdict import AttrDict from lxml import html from feedwerk.atom import AtomFeed with open(sys.argv[1]) as f: page = html.parse(f) with open('data/µnotes.json') as f: notes = json.load(f, object_hook=AttrDict) with open('data/config.json') as f: config = json.load(f, object_hook=AttrDict) feed = AtomFeed(**config) for note, post in list(zip(reversed(notes), page.getroot().cssselect('.note')))[:15]: title = note.text content = html.tostring(post, True).decode() content = content.strip().replace('\n', '') time = parse_rfc3339(post.cssselect('p.meta time')[0].get('datetime')) feed.add(title=title, content=content, content_type='html', url='%s#%s' % (config.url, post.get("id")), updated=time, published=time, xml_base=config.url) print(feed.to_string())