Пример #1
0
 def test_invalid_rfc3339_timestamps(self):
     """
     Validate that datetime strings that are valid ISO 8601 but invalid RFC
     3339 trigger a ValueError when passed to RFC 3339, and that this
     ValueError explicitly mentions RFC 3339.
     """
     for timestamp in [
             "2018-01-02",  # Missing mandatory time
             "2018-01-02T03",  # Missing mandatory minute and second
             "2018-01-02T03Z",  # Missing mandatory minute and second
             "2018-01-02T03:04",  # Missing mandatory minute and second
             "2018-01-02T03:04Z",  # Missing mandatory minute and second
             "2018-01-02T03:04:01+04",  # Missing mandatory offset minute
             "2018-01-02T03:04:05",  # Missing mandatory offset
             "2018-01-02T03:04:05.12345",  # Missing mandatory offset
             "2018-01-02T24:00:00Z",  # 24:00:00 is not valid in RFC 3339
             '20180102T03:04:05-12:34',  # Missing mandatory date separators
             '2018-01-02T030405-12:34',  # Missing mandatory time separators
             '2018-01-02T03:04:05-1234',  # Missing mandatory offset separator
             '2018-01-02T03:04:05,12345Z'  # Invalid comma fractional second separator
     ]:
         with self.assertRaisesRegex(
                 ValueError,
                 r"RFC 3339",
                 msg=
                 "Timestamp '{0}' was supposed to be invalid, but parsing it didn't raise ValueError."
                 .format(timestamp)):
             ciso8601.parse_rfc3339(timestamp)
Пример #2
0
def munge(dct: Dict[str, str]) -> AttrDict:
    dct = AttrDict(**dct)
    if 'text' in dct:
        dct.text = html.escape(dct.text)
        for pat, repl in HTML_FILTERS.items():
            dct.text = pat.sub(repl, dct.text)
        for pat, repl in ABBREVS.items():
            dct.text = pat.sub(repl, dct.text)
        dct.text = dct.text.replace('\N{STX}', '<').replace('\N{ETX}', '>')
    if 'timestamp' in dct:
        dct.timestamp = parse_rfc3339(dct.timestamp)
    if 'self' in dct:
        dct.self = parse_rfc3339(dct.self)
    return dct
Пример #3
0
def safe_parse_rfc3339(time_string):
    """
    >>> from datetime import datetime
    >>> isinstance(safe_parse_rfc3339('2018-01-01T10:00:00.52Z'), datetime)
    True
    >>> safe_parse_rfc3339('2018-01-01T10:00:00.52Z')
    datetime.datetime(2018, 1, 1, 10, 0, 0, 520000, tzinfo=datetime.timezone.utc)
    >>> safe_parse_rfc3339('2018-01-01T10:00:58-06:00')
    datetime.datetime(2018, 1, 1, 10, 0, 58, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=64800)))
    >>> safe_parse_rfc3339('2018-05-10') is None
    True
    >>> safe_parse_rfc3339('malformed $h14') is None
    True
    >>> safe_parse_rfc3339('2018-01-01T00:00:00') is None
    True
    >>> safe_parse_rfc3339(None) is None
    True
    """
    if not isinstance(time_string, str):
        return None

    try:
        return parse_rfc3339(time_string)
    except ValueError:  # noqa
        return None
Пример #4
0
 def test_valid_rfc3339_timestamps(self):
     """
     Validate that valid RFC 3339 datetimes are parseable by parse_rfc3339
     and produce the same result as parse_datetime.
     """
     for string in [
             '2018-01-02T03:04:05Z',
             '2018-01-02t03:04:05z',
             '2018-01-02 03:04:05z',
             '2018-01-02T03:04:05+00:00',
             '2018-01-02T03:04:05-00:00',
             '2018-01-02T03:04:05.12345Z',
             '2018-01-02T03:04:05+01:23',
             '2018-01-02T03:04:05-12:34',
             '2018-01-02T03:04:05-12:34',
     ]:
         self.assertEqual(ciso8601.parse_datetime(string),
                          ciso8601.parse_rfc3339(string))
Пример #5
0
 def test_valid_rfc3339_timestamps(self):
     """
     Validate that valid RFC 3339 datetimes are parseable by parse_rfc3339
     and produce the same result as parse_datetime.
     """
     for string in [
         "2018-01-02T03:04:05Z",
         "2018-01-02t03:04:05z",
         "2018-01-02 03:04:05z",
         "2018-01-02T03:04:05+00:00",
         "2018-01-02T03:04:05-00:00",
         "2018-01-02T03:04:05.12345Z",
         "2018-01-02T03:04:05+01:23",
         "2018-01-02T03:04:05-12:34",
         "2018-01-02T03:04:05-12:34",
     ]:
         self.assertEqual(
             parse_datetime(string), parse_rfc3339(string)
         )
Пример #6
0
def valid_timestamp(s: str) -> str:
    try:
        ciso8601.parse_rfc3339(s)
    except ValueError as e:
        raise argparse.ArgumentTypeError(e.args[0])
    return s
Пример #7
0
def _time_check(v):
    # Return datetime.datetime here to avoid parsing twice in places
    return ciso8601.parse_rfc3339(v)
Пример #8
0
from ciso8601 import parse_rfc3339
from jnrbase.attrdict import AttrDict
from lxml import html
from feedwerk.atom import AtomFeed

with open(sys.argv[1]) as f:
    page = html.parse(f)

with open('data/µnotes.json') as f:
    notes = json.load(f, object_hook=AttrDict)

with open('data/config.json') as f:
    config = json.load(f, object_hook=AttrDict)

feed = AtomFeed(**config)
for note, post in list(zip(reversed(notes),
                           page.getroot().cssselect('.note')))[:15]:
    title = note.text
    content = html.tostring(post, True).decode()
    content = content.strip().replace('\n', '')
    time = parse_rfc3339(post.cssselect('p.meta time')[0].get('datetime'))
    feed.add(title=title,
             content=content,
             content_type='html',
             url='%s#%s' % (config.url, post.get("id")),
             updated=time,
             published=time,
             xml_base=config.url)

print(feed.to_string())