Exemplo n.º 1
0
    def get_change(self, *, page_id, to_version_id, from_version_id=''):
        """
        Get a Changes between two Versions.

        Parameters
        ----------
        page_id : string
        to_version_id : string
        from_version_id : string, optional
            If from_version_id is not given, it will be treated as version
            immediately prior to ``to_version``.

        Returns
        -------
        response : dict
        """
        url = (f'/pages/{page_id}/changes/'
               f'{from_version_id}..{to_version_id}')
        result = self.request_json(GET, url)
        # In place, replace datetime strings with datetime objects.
        data = result['data']
        # For changes which were created just-in-time to fulfill this API request,
        # created/updated_at will be None, and that's OK.
        if data['created_at'] and data['updated_at']:
            data['created_at'] = parse_timestamp(data['created_at'])
            data['updated_at'] = parse_timestamp(data['updated_at'])
        return result
Exemplo n.º 2
0
    def get_version(self,
                    version_id,
                    include_change_from_previous=None,
                    include_change_from_earliest=None):
        """
        Lookup a specific Version by ID.

        Parameters
        ----------
        version_id : string
        include_change_from_previous : boolean, optional
            If True, include a `change_from_previous` field in that represents
            a change object between this and the previous version of the same
            page.
        include_change_from_earliest : boolean, optional
            If True, include a `change_from_earliest` field in that represents
            a change object between this and the earliest version of the same
            page.

        Returns
        -------
        response : dict
        """
        url = f'/versions/{version_id}'
        params = {
            'include_change_from_previous': include_change_from_previous,
            'include_change_from_earliest': include_change_from_earliest
        }
        result = self.request_json(GET, url, params=params)
        data = result['data']
        data['capture_time'] = parse_timestamp(data['capture_time'])
        data['updated_at'] = parse_timestamp(data['updated_at'])
        data['created_at'] = parse_timestamp(data['created_at'])
        return result
Exemplo n.º 3
0
    def list_changes(self, page_id, include_total=False):
        """
        List Changes between two Versions on a Page.

        Parameters
        ----------
        page_id : string
        include_total : boolean, optional
            Whether to include a `meta.total_results` field in the response.
            If not set, `links.last` will usually be empty unless you are on
            the last chunk. Setting this option runs a pretty expensive query,
            so use it sparingly. (Default: False)

        Returns
        -------
        response : dict
        """
        url = f'/pages/{page_id}/changes/'
        result = self.request_json(
            GET, url, params={'include_total': include_total or None})
        # In place, replace datetime strings with datetime objects.
        for change in result['data']:
            change['created_at'] = parse_timestamp(change['created_at'])
            change['updated_at'] = parse_timestamp(change['updated_at'])
        return result
Exemplo n.º 4
0
    def list_versions(self,
                      *,
                      page_id=None,
                      chunk=None,
                      chunk_size=None,
                      start_date=None,
                      end_date=None,
                      source_type=None,
                      hash=None,
                      source_metadata=None):
        """
        List Versions, optionally filtered by serach criteria, including Page.

        Parameters
        ----------
        page_id : string, optional
            restricts serach to Versions of a specific Page
        chunk : integer, optional
            pagination parameter
        chunk_size : integer, optional
            number of items per chunk
        start_date : datetime, optional
        end_date : datetime, optional
        source_type : string, optional
            such as 'versionista' or 'internetarchive'
        hash : string, optional
            SHA256 hash of Version content
        source_metadata : dict, optional
            Examples:

            * ``{'version_id': 12345678}``
            * ``{'account': 'versionista1', 'has_content': True}``

        Returns
        -------
        response : dict
        """
        params = {
            'chunk': chunk,
            'chunk_size': chunk_size,
            'capture_time': _time_range_string(start_date, end_date),
            'source_type': source_type,
            'hash': hash
        }
        if source_metadata is not None:
            for k, v in source_metadata.items():
                params[f'source_metadata[{k}]'] = v
        if page_id is None:
            url = f'{self._api_url}/versions'
        else:
            url = f'{self._api_url}/pages/{page_id}/versions'
        res = requests.get(url, auth=self._auth, params=params)
        _process_errors(res)
        result = res.json()
        # In place, replace datetime strings with datetime objects.
        for v in result['data']:
            v['created_at'] = parse_timestamp(v['created_at'])
            v['updated_at'] = parse_timestamp(v['updated_at'])
            v['capture_time'] = parse_timestamp(v['capture_time'])
        return result
Exemplo n.º 5
0
    def get_annotation(self,
                       *,
                       annotation_id,
                       page_id,
                       to_version_id,
                       from_version_id=''):
        """
        Get a specific Annontation.

        Parameters
        ----------
        annotation_id : string
        page_id : string
        to_version_id : string
        from_version_id : string, optional
            If from_version_id is not given, it will be treated as version
            immediately prior to ``to_version``.

        Returns
        -------
        response : dict
        """
        url = (f'{self._api_url}/pages/{page_id}/changes/'
               f'{from_version_id}..{to_version_id}/annotations/'
               f'{annotation_id}')
        res = requests.get(url, auth=self._auth)
        _process_errors(res)
        result = res.json()
        # In place, replace datetime strings with datetime objects.
        data = result['data']
        data['created_at'] = parse_timestamp(data['created_at'])
        data['updated_at'] = parse_timestamp(data['updated_at'])
        return result
Exemplo n.º 6
0
    def handle(self, *args, **options):
        from_date = parse_timestamp(options["from"])
        to_date = parse_timestamp(options["to"])
        uploads = UploadEvent.objects.filter(created__gte=from_date,
                                             created__lte=to_date)

        queue_upload_events_for_reprocessing(uploads, use_kinesis=True)
Exemplo n.º 7
0
    def endElement(self, tag):
        if tag == 'title':
            self._note['title'] = self._data
        elif tag == 'author':
            self._note['author'] = self._data
        elif tag == 'created':
            dt = parse_timestamp(self._data)
            self._note['created'] = dt
        elif tag == 'updated':
            dt = parse_timestamp(self._data)
            self._note['updated'] = dt
        elif tag == 'content':
            self._note['content'] = self._data.strip()
        elif tag == 'note':
            self._exporter.export(self._note)
            self._note = None
        elif tag == 'data':
            self._attachment['data'] = b64decode(self._data)
        elif tag == 'mime':
            self._attachment['mimetype'] = self._data
        elif tag == 'file-name':
            self._attachment['filename'] = self._data
        elif tag == 'resource':
            if 'attachments' not in self._note:
                self._note['attachments'] = []
            self._note['attachments'].append(self._attachment)
            self._attachments = None
        else:
            return

        self._capture = False
Exemplo n.º 8
0
    def get_page(self, page_id):
        """
        Lookup a specific Page by ID.

        Parameters
        ----------
        page_id : string

        Returns
        -------
        response : dict
        """
        url = f'{self._api_url}/pages/{page_id}'
        res = requests.get(url, auth=self._auth)
        _process_errors(res)
        result = res.json()
        # In place, replace datetime strings with datetime objects.
        data = result['data']
        data['created_at'] = parse_timestamp(data['created_at'])
        data['updated_at'] = parse_timestamp(data['updated_at'])
        for v in data['versions']:
            v['created_at'] = parse_timestamp(v['created_at'])
            v['updated_at'] = parse_timestamp(v['updated_at'])
            v['capture_time'] = parse_timestamp(v['capture_time'])
        return result
Exemplo n.º 9
0
    def list_annotations(self,
                         *,
                         page_id,
                         to_version_id,
                         from_version_id='',
                         include_total=False):
        """
        List Annotations for a Change between two Versions.

        Parameters
        ----------
        page_id : string
        to_version_id : string
        from_version_id : string, optional
            If from_version_id is not given, it will be treated as version
            immediately prior to ``to_version``.
        include_total : boolean, optional
            Whether to include a `meta.total_results` field in the response.
            If not set, `links.last` will usually be empty unless you are on
            the last chunk. Setting this option runs a pretty expensive query,
            so use it sparingly. (Default: False)

        Returns
        -------
        response : dict
        """
        url = (f'/pages/{page_id}/changes/'
               f'{from_version_id}..{to_version_id}/annotations')
        result = self.request_json(
            GET, url, params={'include_total': include_total or None})
        # In place, replace datetime strings with datetime objects.
        for a in result['data']:
            a['created_at'] = parse_timestamp(a['created_at'])
            a['updated_at'] = parse_timestamp(a['updated_at'])
        return result
Exemplo n.º 10
0
    def list_annotations(self, *, page_id, to_version_id, from_version_id=''):
        """
        List Annotations for a Change between two Versions.

        Parameters
        ----------
        page_id : string
        to_version_id : string
        from_version_id : string, optional
            If from_version_id is not given, it will be treated as version
            immediately prior to ``to_version``.

        Returns
        -------
        response : dict
        """
        url = (f'{self._api_url}/pages/{page_id}/changes/'
               f'{from_version_id}..{to_version_id}/annotations')
        res = requests.get(url, auth=self._auth)
        _process_errors(res)
        result = res.json()
        # In place, replace datetime strings with datetime objects.
        for a in result['data']:
            a['created_at'] = parse_timestamp(a['created_at'])
            a['updated_at'] = parse_timestamp(a['updated_at'])
        return result
    def test_04_last_run(self):
        task1_id = set_periodic_task("task one", "*/5 * * * *", ["pinode1", "pinode2"], "some.task.module", 3, {
            "key1": 1,
            "key2": False
        })
        self.assertEqual(len(PeriodicTask.query.all()), 1)
        task1_entry = PeriodicTask.query.filter_by(id=task1_id).one()

        # We have no initial last runs
        self.assertEqual(len(list(task1_entry.last_runs)), 0)

        set_periodic_task_last_run(task1_id, "pinode1", parse_timestamp("2018-06-26 08:00+02:00"))
        set_periodic_task_last_run(task1_id, "pinode1", parse_timestamp("2018-06-26 08:05+02:00"))

        task1 = get_periodic_tasks("task one")[0]
        self.assertEqual(len(list(task1_entry.last_runs)), 1)
        self.assertEqual(task1_entry.last_runs[0].timestamp,
                         parse_timestamp("2018-06-26 06:05"))
        self.assertEqual(task1["last_runs"]["pinode1"],
                         parse_timestamp("2018-06-26 06:05 UTC"))

        set_periodic_task_last_run(task1_id, "pinode2", parse_timestamp("2018-06-26 08:10+01:00"))
        set_periodic_task_last_run(task1_id, "pinode3", parse_timestamp("2018-06-26 08:10-08:00"))
        task1 = get_periodic_tasks("task one")[0]
        self.assertEqual(task1["last_runs"]["pinode1"],
                         parse_timestamp("2018-06-26 06:05 UTC"))
        self.assertEqual(task1["last_runs"]["pinode2"],
                         parse_timestamp("2018-06-26 07:10 UTC"))
        self.assertEqual(task1["last_runs"]["pinode3"],
                         parse_timestamp("2018-06-26 16:10 UTC"))

        delete_periodic_task(task1_id)
Exemplo n.º 12
0
 def __init__(self, data):
     if data.get('created'): self.created = (parse_timestamp(data['created']))
     if data.get('createdBy'): self.created_by = Profile(data['createdBy'])
     if data.get('edited'): self.edited = (parse_timestamp(data['edited']))
     if data.get('editedBy'): self.edited_by = Profile(data['editedBy'])
     if data.get('flags'): self.flags = data['flags']
     if data.get('permissions'): self.permissions = PermissionSet(data['permissions'])
     if data.get('links'):
         self.links = {}
         for item in data['links']:
             if 'title' in item:
                 self.links[item['rel']] = {'href': str.replace(str(item['href']),'/api/v1',''), 'title': item['title']}
             else:
                 self.links[item['rel']] = {'href': str.replace(str(item['href']),'/api/v1','')}
Exemplo n.º 13
0
    def process_timestamp(resource):
        """
        Recurse over unmarshalled json and convert
        any strings that are ISO8601-like into python
        datetime objects. This is far from ideal and will be replaced
        in future with xpath-like notation for visiting specific
        attributes.

        Args:
            resource: an JSON API response that has been deserialized. This will
            usually be a dictionary but could also be a list.

        Returns:
            the same resource, but with timestamp strings as datetime objects.
        """

        if isinstance(resource, list):
            for item in resource:
                APIResource.process_timestamp(item)
        else:
            for key in resource.keys():
                if isinstance(resource[key], unicode):
                    if bool(VALID_DATETIME.search(resource[key])):
                        resource[key] = parse_timestamp(resource[key])
                elif isinstance(resource[key], list) or isinstance(
                        resource[key], dict):
                    APIResource.process_timestamp(resource[key])
            return resource
Exemplo n.º 14
0
def ensure_tztime(ts):
    if isinstance(ts, str):
        ts = parse_timestamp(ts)
    try:
        return pytz.utc.localize(ts)
    except:
        return ts
Exemplo n.º 15
0
    def process_timestamp(resource):
        """
        Recurse over unmarshalled json and convert
        any strings that are ISO8601-like into python
        datetime objects. This is far from ideal and will be replaced
        in future with xpath-like notation for visiting specific
        attributes.

        Args:
            resource: an JSON API response that has been deserialized. This will
            usually be a dictionary but could also be a list.

        Returns:
            the same resource, but with timestamp strings as datetime objects.
        """

        if isinstance(resource, list):
            for item in resource:
                APIResource.process_timestamp(item)
        else:
            for key in resource.keys():
                if isinstance(resource[key], unicode):
                    if bool(VALID_DATETIME.search(resource[key])):
                        resource[key] = parse_timestamp(resource[key])
                elif isinstance(resource[key], list) or isinstance(resource[key], dict):
                    APIResource.process_timestamp(resource[key])
            return resource
Exemplo n.º 16
0
 def from_api_response(cls, data):
     file_metadata = cls()
     file_metadata.created = parse_timestamp(data[0]['created'])
     file_metadata.file_size = data[0]['fileSize']
     file_metadata.file_hash = data[0]['fileHash']
     file_metadata.mime_type = data[0]['mimeType']
     if data[0].get('fileExt'):
         file_metadata.file_ext = data[0]['fileExt']
     return file_metadata
Exemplo n.º 17
0
    def parse_cmd_timestamp(args):
        if len(args) == 0:
            return datetime.now()
        elif args[0][0] == '+':
            s = timedelta(seconds=int(args[0][1:]))
            timestamp = datetime.now() + s
        else:
            timestamp = parse_timestamp(args[0])

        return timestamp
Exemplo n.º 18
0
 def get_console_output(self, xml_bytes):
     root = XML(xml_bytes)
     output_node = root.find("output")
     instance_id = root.find("instanceId").text.decode("ascii").strip()
     timestamp = parse_timestamp(root.find("timestamp").text)
     console_text = output_node.text.decode("base64").decode("utf-8")
     return model.ConsoleOutput(
         instance_id,
         timestamp,
         console_text,
     )
Exemplo n.º 19
0
 def get_console_output(self, xml_bytes):
     root = XML(xml_bytes)
     output_node = root.find("output")
     instance_id = root.find("instanceId").text.decode("ascii").strip()
     timestamp = parse_timestamp(root.find("timestamp").text)
     console_text = output_node.text.decode("base64").decode("utf-8")
     return model.ConsoleOutput(
         instance_id,
         timestamp,
         console_text,
     )
Exemplo n.º 20
0
    def load_from_web_dict (d):
        s = d["OrderType"]

        if "BUY" == s:
            c = Buy_op
        elif "SELL" == s:
            c = Sell_op
        else:
            m = "'{}' is wrong string for parsing of Operation's type"
            raise Exception(m.format(s))

        return c(parse_timestamp(d["TimeStamp"]))
Exemplo n.º 21
0
    def __init__(self, data, summary=True):
        """
        We're permissive about the data passed in, since it may
        be a PUT or PATCH operation and not have all the expected keys.
        """

        if data.get('id'): self.id = data['id']
        if data.get('siteId'): self.site_id = data['siteId']
        if data.get('userId'): self.user_id = data['userId']
        if data.get('email'): self.email = data['email']
        if data.get('profileName'): self.profile_name = data['profileName']
        if data.get('visible'): self.visible = data['visible']
        if data.get('avatar'): self.avatar = data['avatar']
        if data.get('meta'): self.meta = Meta(data['meta'])

        if not summary:
            self.style_id = data['styleId']
            self.item_count = data['itemCount']
            self.comment_count = data['commentCount']
            self.created = parse_timestamp(data['created'])
            self.last_active = parse_timestamp(data['lastActive'])
Exemplo n.º 22
0
    def get_version(self, version_id):
        """
        Lookup a specific Version by ID.

        Parameters
        ----------
        version_id : string

        Returns
        -------
        response : dict
        """
        url = f'{self._api_url}/versions/{version_id}'
        res = requests.get(url, auth=self._auth)
        _process_errors(res)
        result = res.json()
        data = result['data']
        data['capture_time'] = parse_timestamp(data['capture_time'])
        data['updated_at'] = parse_timestamp(data['updated_at'])
        data['created_at'] = parse_timestamp(data['created_at'])
        return result
Exemplo n.º 23
0
    def list_changes(self, page_id):
        """
        List Changes between two Versions on a Page.

        Parameters
        ----------
        page_id : string

        Returns
        -------
        response : dict
        """
        url = f'{self._api_url}/pages/{page_id}/changes/'
        res = requests.get(url, auth=self._auth)
        _process_errors(res)
        result = res.json()
        # In place, replace datetime strings with datetime objects.
        for change in result['data']:
            change['created_at'] = parse_timestamp(change['created_at'])
            change['updated_at'] = parse_timestamp(change['updated_at'])
        return result
Exemplo n.º 24
0
    def validate_and_clean_timestamp(self, data, field_name):
        value = data.get(field_name)
        error_msg = "Field '{}' is required and expected to be an ISO 8601 UTC timestamp".format(field_name)

        if not value:
            raise OrderStatusValidationError(error_msg)

        try:
            value = parse_timestamp(value)
        except ValueError:
            raise OrderStatusValidationError(error_msg)

        data[field_name] = value
 def _normalize_last_modified(self, last_modified):
     """
     Normalize timestamp of S3 key which is from bucket.get_key method.
     :param last_modified:
     :return:
     """
     try:
         mtime = parse_timestamp(last_modified)
         return mtime.strftime('%Y-%m-%dT%H:%M:%S.000Z')
     except Exception:
         logger.exception("Failed to normalize last modified time.",
                          minimum_last_modified=last_modified)
         raise
Exemplo n.º 26
0
    def validate_and_clean_timestamp(self, data, field_name):
        value = data.get(field_name)
        error_msg = "Field '{}' is required and expected to be an ISO 8601 UTC timestamp".format(field_name)

        if not value:
            raise OrderStatusValidationError(error_msg)

        try:
            value = parse_timestamp(value)
        except ValueError:
            raise OrderStatusValidationError(error_msg)

        data[field_name] = value
Exemplo n.º 27
0
    def get_page(self, page_id):
        """
        Lookup a specific Page by ID.

        Parameters
        ----------
        page_id : string

        Returns
        -------
        response : dict
        """
        url = f'/pages/{page_id}'
        result = self.request_json(GET, url)
        # In place, replace datetime strings with datetime objects.
        data = result['data']
        data['created_at'] = parse_timestamp(data['created_at'])
        data['updated_at'] = parse_timestamp(data['updated_at'])
        for v in data['versions']:
            v['created_at'] = parse_timestamp(v['created_at'])
            v['updated_at'] = parse_timestamp(v['updated_at'])
            v['capture_time'] = parse_timestamp(v['capture_time'])
        return result
Exemplo n.º 28
0
 def from_xml(cls, xml):
     if xml.tag != cls.tagname:
         raise ValueError("%s.from_xml() called with %r, not %r" %
                          (cls.__name__, xml.tag, cls.tagname))
     ts = xml.attrib.get("ts")
     if ts:
         ts = parse_timestamp(ts)
     ret = cls(ts)
     for element in xml:
         ecls = node_for_tagname(element.tag)
         node = ecls.from_xml(element)
         for attrname in ecls.attributes:
             setattr(node, attrname, element.attrib.get(attrname))
         ret.nodes.append(node)
     return ret
Exemplo n.º 29
0
def convert_datetime_params(params: Dict[str, Any]) -> Dict[str, Any]:
    """Convert any dates, datetimes, or timestamps in other formats into ISO 8601 strings.

    API behavior note: params that take date but not time info will accept a full timestamp and
    just ignore the time, so it's safe to parse both date and datetime strings into timestamps

    :raises: :py:exc:`dateutil.parser._parser.ParserError` if a date/datetime format is invalid
    """
    for k, v in params.items():
        if isinstance(v, datetime) or isinstance(v, date):
            params[k] = _isoformat(v)
        if k in DATETIME_PARAMS:
            params[k] = _isoformat(parse_timestamp(v))

    return params
Exemplo n.º 30
0
    def published_at(self):
        date = NULL_DATE

        def dt_specificity(dt):
            return sum(1 if date.__getattribute__(k) else 0
                       for k in ('month', 'day', 'hour', 'minute', 'second',
                                 'microsecond', 'tzinfo'))

        if "lastmod" in self.sitemap_data and self.sitemap_data[
                "lastmod"] and (datetime.datetime.now() -
                                self.sitemap_data['lastmod']
                                ) < datetime.timedelta(days=365 * 10):
            date = self.sitemap_data["lastmod"]
        else:
            # print(f"Guessing date for {self.url}...")
            # guess = guess_date(url=self.url, html=self.html)
            # date = guess.date or NULL_DATE
            # if date is not NULL_DATE:
            #     print(f"Found a date: {guess.date}! Accuracy is to: {guess.accuracy}")
            # else:
            #     print(
            #         f"No date extracted. Attempting to find a date in the metadata..."
            #     )
            most_specific = None
            curr_specificity = 0
            seen = set()
            for match in self.lxml.xpath(UniversalSelector.published_at):
                if match in seen:
                    continue
                seen.add(match)
                try:
                    date = parse_timestamp(
                        str(re.sub(r"\n", " ", match.strip())))
                    print(f"Found a date! {date}")
                    specificity = dt_specificity(date)
                    if specificity > curr_specificity:
                        most_specific = date
                        curr_specificity = specificity
                except:
                    pass
            if most_specific:
                date = most_specific
        try:
            date = date.replace(tzinfo=None)
        except:
            pass

        return date
Exemplo n.º 31
0
    def validate_and_clean_time(self, data, field_name):
        value = data.get(field_name)
        error_msg = "Field '{}' is required and expected to be a 24-hour time string HH:MM".format(field_name)

        if not isinstance(value, basestring):
            raise OrderStatusValidationError(error_msg)

        if not re.match('^\d\d?:\d\d$', value):
            raise OrderStatusValidationError(error_msg)

        try:
            value = parse_timestamp(value).time()
        except ValueError:
            raise OrderStatusValidationError(error_msg)

        data[field_name] = value
Exemplo n.º 32
0
    def validate_and_clean_time(self, data, field_name):
        value = data.get(field_name)
        error_msg = "Field '{}' is required and expected to be a 24-hour time string HH:MM".format(field_name)

        if not isinstance(value, str):
            raise OrderStatusValidationError(error_msg)

        if not re.match(r'^\d\d?:\d\d$', value):
            raise OrderStatusValidationError(error_msg)

        try:
            value = parse_timestamp(value).time()
        except ValueError:
            raise OrderStatusValidationError(error_msg)

        data[field_name] = value
Exemplo n.º 33
0
	def from_xml(cls, xml):
		if xml.tag != cls.tagname:
			raise ValueError("%s.from_xml() called with %r, not %r" % (
				cls.__name__, xml.tag, cls.tagname
			))
		ts = xml.attrib.get("ts")
		if ts:
			ts = parse_timestamp(ts)
		ret = cls(ts)
		for element in xml:
			ecls = node_for_tagname(element.tag)
			node = ecls.from_xml(element)
			for attrname in ecls.attributes:
				setattr(node, attrname, element.attrib.get(attrname))
			ret.nodes.append(node)
		return ret
Exemplo n.º 34
0
def parse_document_timestamps(doc, date_attrs=('modified', 'created')):
    """Converts the `modified' and `created' dates from
    ISO 8601 format to a date time object for the given 
    document.
    
    """
    
    for date in date_attrs:
        date_str = doc.get(date)
        try:
            date_obj = parse_timestamp(date_str)
        except ValueError:
            logging.error('Error trying to parse "%s"', date_str)
            date_obj = None
        doc.update({date: date_obj})
    
    return doc
    def test_04_last_run(self):
        task1_id = set_periodic_task("task one", "*/5 * * * *",
                                     ["pinode1", "pinode2"],
                                     "some.task.module", 3, {
                                         "key1": 1,
                                         "key2": False
                                     })
        self.assertEqual(len(PeriodicTask.query.all()), 1)
        task1_entry = PeriodicTask.query.filter_by(id=task1_id).one()

        # We have no initial last runs
        self.assertEqual(len(list(task1_entry.last_runs)), 0)

        set_periodic_task_last_run(task1_id, "pinode1",
                                   parse_timestamp("2018-06-26 08:00+02:00"))
        set_periodic_task_last_run(task1_id, "pinode1",
                                   parse_timestamp("2018-06-26 08:05+02:00"))

        task1 = get_periodic_tasks("task one")[0]
        self.assertEqual(len(list(task1_entry.last_runs)), 1)
        self.assertEqual(task1_entry.last_runs[0].timestamp,
                         parse_timestamp("2018-06-26 06:05"))
        self.assertEqual(task1["last_runs"]["pinode1"],
                         parse_timestamp("2018-06-26 06:05 UTC"))

        set_periodic_task_last_run(task1_id, "pinode2",
                                   parse_timestamp("2018-06-26 08:10+01:00"))
        set_periodic_task_last_run(task1_id, "pinode3",
                                   parse_timestamp("2018-06-26 08:10-08:00"))
        task1 = get_periodic_tasks("task one")[0]
        self.assertEqual(task1["last_runs"]["pinode1"],
                         parse_timestamp("2018-06-26 06:05 UTC"))
        self.assertEqual(task1["last_runs"]["pinode2"],
                         parse_timestamp("2018-06-26 07:10 UTC"))
        self.assertEqual(task1["last_runs"]["pinode3"],
                         parse_timestamp("2018-06-26 16:10 UTC"))

        delete_periodic_task(task1_id)
Exemplo n.º 36
0
 def __init__(self, xml):
     url = xml.find("loc")
     lastmod = xml.find("lastmod")
     title = xml.find("news:title")
     description = xml.find("news:description")
     keywords = xml.find("news:keywords")
     publication_date = xml.find("news:publication_date")
     if not title:
         title = xml.find("video:title")
     if not description:
         description = xml.find("video:description")
     self.url = format_text(url_normalize(url.text.strip().lower()))
     self.html = ""
     self.tree = None
     parsed = urlparse(self.url)
     self.site = parsed.netloc
     self.path = parsed.path
     try:
         pardir = "/".join(re.sub(r"(/)$", "", self.path).split("/")[:-2])
     except:
         pardir = "/"
     self.base_url = f"{parsed.scheme}://{parsed.netloc}{pardir}"
     self.lastmod = parse_timestamp(format_text(
         lastmod.text)) if lastmod else None
     self.headline = format_text(title.text.strip()) if title else ""
     self.keywords = ([format_text(kw) for kw in keywords.text.split(",")]
                      if keywords else [])
     self.publication_date = (format_text(publication_date.text)
                              if publication_date else "")
     self.description = format_text(description.text) if description else ""
     self.xml = format_text(xml.__repr__())
     self.metadata = {"schemata": [], "errors": []}
     self.has_metadata = False
     self.seen = self.url in seen
     # seen.add(self.url)
     self.articlebody = ""
     self.visited = False
Exemplo n.º 37
0
    def test_01_crud(self):
        # no tasks yet
        status_code, data = self.simulate_request('/periodictask/',
                                                  method='GET')
        self.assertEqual(status_code, 200)
        self.assertTrue(data['result']['status'])
        self.assertEqual(data['result']['value'], [])

        # need authorization
        status_code, data = self.simulate_request(
            '/periodictask/', method='GET', headers={'Authorization': 'ABC'})
        self.assertEqual(status_code, 401)
        self.assertFalse(data['result']['status'])

        # create task
        with self.mock_task_module():
            task_dict1 = {
                'name': 'some task',
                'nodes': 'pinode1, pinode2',
                'active': False,
                'interval': '0 8 * * *',
                'taskmodule': 'UnitTest',
                'ordering': 5,
                'options': '{"something": 123, "else": true}',
            }
            status_code, data = self.simulate_request('/periodictask/',
                                                      method='POST',
                                                      data=task_dict1)
            self.assertEqual(status_code, 200)
            self.assertEqual(data['result']['status'], True)
            ptask_id1 = data['result']['value']

        # some invalid tasks
        invalid_task_dicts = [
            # invalid ordering
            {
                'name': 'some other task',
                'active': False,
                'nodes': 'a, b',
                'interval': '0 8 * * *',
                'taskmodule': 'UnitTest',
                'ordering': '-3',
                'options': '{"something": "123", "else": true}',
            },
            # no nodes
            {
                'name': 'some other task',
                'active': False,
                'interval': '0 8 * * *',
                'taskmodule': 'UnitTest',
                'options': '{"something": "123", "else": true}',
            },
            # empty nodes
            {
                'name': 'some other task',
                'active': False,
                'interval': '0 8 * * *',
                'nodes': '    ',
                'taskmodule': 'UnitTest',
                'options': '{"something": "123", "else": true}',
            },
            # unknown taskmodule
            {
                'name': 'some other task',
                'nodes': 'pinode1, pinode2',
                'active': False,
                'interval': '0 8 * * *',
                'taskmodule': 'Unknown',
                'options': '{"something": "123"}',
            },
            # invalid interval
            {
                'name': 'some other task',
                'nodes': 'pinode1, pinode2',
                'active': False,
                'interval': 'every day',
                'taskmodule': 'UnitTest',
                'options': '{"something": "123"}',
            },
            # invalid options
            {
                'name': 'some task',
                'nodes': 'pinode1, pinode2',
                'active': False,
                'interval': '0 8 * * *',
                'taskmodule': 'UnitTest',
                'options': '[1, 2]',
            }
        ]
        # all result in ERR905
        with self.mock_task_module():
            for invalid_task_dict in invalid_task_dicts:
                status_code, data = self.simulate_request(
                    '/periodictask/', method='POST', data=invalid_task_dict)
                self.assertEqual(status_code, 400)
                self.assertFalse(data['result']['status'])
                self.assertIn('ERR905', data['result']['error']['message'])

        # create another task
        with self.mock_task_module():
            task_dict2 = {
                'name': 'some other task',
                'nodes': 'pinode1',
                'active': False,
                'interval': '0 8 * * 0',
                'taskmodule': 'UnitTest',
                'ordering': 2,
            }
            status_code, data = self.simulate_request('/periodictask/',
                                                      method='POST',
                                                      data=task_dict2)

            self.assertEqual(status_code, 200)
            self.assertTrue(data['result']['status'])
            ptask_id2 = data['result']['value']

        # can list the periodic tasks
        status_code, data = self.simulate_request('/periodictask/',
                                                  method='GET')
        self.assertEqual(status_code, 200)
        self.assertTrue(data['result']['status'])
        self.assertEqual(len(data['result']['value']), 2)
        self.assertEqual([task['name'] for task in data['result']['value']],
                         ['some other task', 'some task'])

        # find first task
        result_dict = data['result']['value'][1]
        self.assertEqual(result_dict['id'], ptask_id1)
        self.assertEqual(result_dict['ordering'], 5)
        self.assertEqual(result_dict['name'], 'some task')
        self.assertEqual(result_dict['active'], False)
        self.assertEqual(result_dict['interval'], '0 8 * * *')
        self.assertEqual(result_dict['nodes'], ['pinode1', 'pinode2'])
        self.assertEqual(result_dict['last_runs'], {})
        last_update = parse_timestamp(result_dict['last_update'])
        self.assertIsNotNone(last_update)
        self.assertEqual(result_dict['options'], {
            'something': '123',
            'else': 'True'
        })

        # get one
        status_code, data = self.simulate_request(
            '/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value']['id'], ptask_id1)

        # unknown ID
        status_code, data = self.simulate_request('/periodictask/4242',
                                                  method='GET')
        self.assertEqual(status_code, 404)
        self.assertFalse(data['result']['status'])

        # update existing task
        task_dict1['name'] = 'new name'
        task_dict1['options'] = '{"key": "value"}'
        task_dict1['id'] = ptask_id1
        task_dict1['ordering'] = '2'
        with self.mock_task_module():
            status_code, data = self.simulate_request('/periodictask/',
                                                      method='POST',
                                                      data=task_dict1)
            self.assertEqual(status_code, 200)
            self.assertTrue(data['result']['status'])
            self.assertEqual(data['result']['value'], ptask_id1)

        # can list the periodic tasks in new order
        status_code, data = self.simulate_request('/periodictask/',
                                                  method='GET')
        self.assertEqual(status_code, 200)
        self.assertTrue(data['result']['status'])
        self.assertEqual(len(data['result']['value']), 2)
        self.assertEqual([task['name'] for task in data['result']['value']],
                         ['new name', 'some other task'])

        # get updated task
        status_code, data = self.simulate_request(
            '/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value']['id'], ptask_id1)
        self.assertEqual(data['result']['value']['ordering'], 2)
        self.assertEqual(data['result']['value']['name'], 'new name')
        self.assertEqual(data['result']['value']['options'], {'key': 'value'})
        self.assertGreater(
            parse_timestamp(data['result']['value']['last_update']),
            last_update)
        last_update = parse_timestamp(data['result']['value']['last_update'])

        # enable
        status_code, data = self.simulate_request(
            '/periodictask/enable/{}'.format(ptask_id1), method='POST')
        self.assertEqual(status_code, 200)

        # get updated task
        status_code, data = self.simulate_request(
            '/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value']['name'], 'new name')
        self.assertEqual(data['result']['value']['active'], True)
        self.assertGreater(
            parse_timestamp(data['result']['value']['last_update']),
            last_update)
        last_update = parse_timestamp(data['result']['value']['last_update'])

        # disable
        status_code, data = self.simulate_request(
            '/periodictask/disable/{}'.format(ptask_id1), method='POST')
        self.assertEqual(status_code, 200)

        # get updated task
        status_code, data = self.simulate_request(
            '/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value']['name'], 'new name')
        self.assertEqual(data['result']['value']['active'], False)
        self.assertGreater(
            parse_timestamp(data['result']['value']['last_update']),
            last_update)

        # disable again without effect
        status_code, data = self.simulate_request(
            '/periodictask/disable/{}'.format(ptask_id1), method='POST')
        self.assertEqual(status_code, 200)

        # get updated task
        status_code, data = self.simulate_request(
            '/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value']['name'], 'new name')
        self.assertEqual(data['result']['value']['active'], False)

        # delete
        status_code, data = self.simulate_request(
            '/periodictask/{}'.format(ptask_id1), method='DELETE')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value'], ptask_id1)

        # get updated task impossible now
        status_code, data = self.simulate_request(
            '/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 404)
        self.assertFalse(data['result']['status'], False)

        # only 1 task left
        status_code, data = self.simulate_request('/periodictask/',
                                                  method='GET')
        self.assertEqual(status_code, 200)
        self.assertTrue(data['result']['status'])
        self.assertEqual(len(data['result']['value']), 1)

        # delete the second task as well
        status_code, data = self.simulate_request(
            '/periodictask/{}'.format(ptask_id2), method='DELETE')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value'], ptask_id2)

        # no tasks left
        status_code, data = self.simulate_request('/periodictask/',
                                                  method='GET')
        self.assertEqual(status_code, 200)
        self.assertTrue(data['result']['status'])
        self.assertEqual(data['result']['value'], [])
Exemplo n.º 38
0
from dateutil.parser import parse as parse_timestamp
from math import ceil
from sys import argv


a_string, b_string = argv[1:]
try:
    a_timestamp = parse_timestamp(a_string)
except ValueError:
    exit('a_timestamp.error = could not parse timestamp (%s)' % a_string)
try:
    b_timestamp = parse_timestamp(b_string)
except ValueError:
    exit('b_timestamp.error = could not parse timestamp (%s)' % b_string)
second_count = (b_timestamp - a_timestamp).total_seconds()
minute_count = second_count / 60.
hour_count = minute_count / 60.
day_count = hour_count / 24.


if day_count > 1:
    print('day_count = %s' % int(ceil(day_count)))
elif hour_count > 1:
    print('hour_count = %s' % int(ceil(hour_count)))
elif minute_count > 1:
    print('minute_count = %s' % int(ceil(minute_count)))
else:
    print('second_count = %s' % int(ceil(second_count)))
    def test_05_scheduling(self):
        # this unit test operates in russian time
        tzinfo = gettz("Europe/Moscow")

        # at midnight on each 1st
        task1 = set_periodic_task("task one", "0 0 1 * *", ["pinode1"], "some.task.module", 3, {
            "key1": 1,
            "key2": False
        })
        # at 08:00 on wednesdays
        current_utc_time = parse_timestamp("2018-05-31 05:08:00")
        with mock.patch('privacyidea.models.datetime') as mock_dt:
            mock_dt.utcnow.return_value = current_utc_time
            task2 = set_periodic_task("task two", "0 8 * * WED", ["pinode2", "pinode3"], "some.task.module", 1, {
                "key1": "value",
                "key2": "foo"
            }, active=False)
        self.assertEqual(get_periodic_task_by_id(task2)["last_update"],
                         parse_timestamp("2018-05-31 08:08:00+03:00"))
        self.assertEqual(get_periodic_task_by_id(task2)["last_runs"], {})

        # every 30 minutes, on Tuesdays
        task3 = set_periodic_task("task three", "*/30 * * * 2", ["pinode1", "pinode2"], "some.task.module", 2, {
            "key1": 1234,
            "key2": 5678,
        })
        # on each 1st of august at midnight
        task4 = set_periodic_task("task four", "0 0 1 8 *", ["pinode2"], "some.task.module", 0)

        # we need some last runs
        set_periodic_task_last_run(task1, "pinode1", parse_timestamp("2018-06-01 00:00:05+03:00"))

        # no last run for pinode3 here!
        set_periodic_task_last_run(task2, "pinode2", parse_timestamp("2018-06-20 08:00:05+03:00"))

        set_periodic_task_last_run(task3, "pinode1", parse_timestamp("2018-06-26 11:36:37+03:00"))
        set_periodic_task_last_run(task3, "pinode2", parse_timestamp("2018-06-26 11:30:33+03:00"))

        set_periodic_task_last_run(task4, "pinode2", parse_timestamp("2017-08-01 00:00:43+03:00"))

        self.assertEqual([task["name"] for task in get_periodic_tasks()],
                         ["task four", "task two", "task three", "task one"])

        # Invalid timestamp
        with self.assertRaises(ParameterError):
            get_scheduled_periodic_tasks("pinode1", parse_timestamp("2017-08-01 00:00:00"), tzinfo)

        # On pinode1:
        # task1 at midnight on each 1st
        # task3 every 30 minutes on tuesdays

        # On pinode2:
        # task2 on 08:00 on wednesdays, but it is inactive
        # task3 every 30 minutes on tuesdays
        # task4 on each 1st August at midnight

        # 26th June (Tuesday), 11:59
        # No tasks on both nodes
        current_timestamp = parse_timestamp("2018-06-26 11:59+03:00")

        scheduled = get_scheduled_periodic_tasks("pinode1", current_timestamp, tzinfo)
        self.assertEqual(scheduled, [])

        scheduled = get_scheduled_periodic_tasks("pinode2", current_timestamp, tzinfo)
        self.assertEqual(scheduled, [])

        # 26th June (Tuesday), 12:00
        # Run task3 on both nodes
        current_timestamp = parse_timestamp("2018-06-26 12:00+03:00")

        scheduled = get_scheduled_periodic_tasks("pinode1", current_timestamp, tzinfo)
        self.assertEqual([task["name"] for task in scheduled], ["task three"])

        scheduled = get_scheduled_periodic_tasks("pinode2", current_timestamp, tzinfo)
        self.assertEqual([task["name"] for task in scheduled], ["task three"])

        # 1th August (Wednesday), 13:57
        # Assume task3 has been run successfully on 30th July (Tuesday)
        set_periodic_task_last_run(task3, "pinode1", parse_timestamp("2018-08-01 00:00+03:00"))
        set_periodic_task_last_run(task3, "pinode2", parse_timestamp("2018-08-01 00:00+03:00"))

        # On pinode1, run task1
        # On pinode2, run task4
        current_timestamp = parse_timestamp("2018-08-01 11:59+03:00")

        scheduled = get_scheduled_periodic_tasks("pinode1", current_timestamp, tzinfo)
        self.assertEqual([task["name"] for task in scheduled], ["task one"])

        scheduled = get_scheduled_periodic_tasks("pinode2", current_timestamp, tzinfo)
        self.assertEqual([task["name"] for task in scheduled], ["task four"])

        # Enable task2, now we also have to run it on pinode2 and pinode3
        with mock.patch('privacyidea.models.datetime') as mock_dt:
            mock_dt.utcnow.return_value = current_utc_time
            enable_periodic_task(task2)

        scheduled = get_scheduled_periodic_tasks("pinode1", current_timestamp, tzinfo)
        self.assertEqual([task["name"] for task in scheduled], ["task one"])

        scheduled = get_scheduled_periodic_tasks("pinode2", current_timestamp, tzinfo)
        self.assertEqual([task["name"] for task in scheduled], ["task four", "task two"])

        scheduled = get_scheduled_periodic_tasks("pinode3", current_timestamp, tzinfo)
        self.assertEqual([task["name"] for task in scheduled], ["task two"])

        # Simulate runs
        set_periodic_task_last_run(task1, "pinode1", current_timestamp)
        set_periodic_task_last_run(task2, "pinode2", current_timestamp)
        set_periodic_task_last_run(task2, "pinode3", current_timestamp)
        set_periodic_task_last_run(task4, "pinode2", current_timestamp)

        # Now, we don't have to run anything
        current_timestamp += timedelta(seconds=1)

        self.assertEqual(get_scheduled_periodic_tasks("pinode1", current_timestamp, tzinfo), [])
        self.assertEqual(get_scheduled_periodic_tasks("pinode2", current_timestamp, tzinfo), [])

        delete_periodic_task(task1)
        delete_periodic_task(task2)
        delete_periodic_task(task3)
        delete_periodic_task(task4)
    def test_01_crud(self):
        # no tasks yet
        status_code, data = self.simulate_request('/periodictask/', method='GET')
        self.assertEqual(status_code, 200)
        self.assertTrue(data['result']['status'])
        self.assertEqual(data['result']['value'], [])

        # need authorization
        status_code, data = self.simulate_request('/periodictask/', method='GET', headers={'Authorization': 'ABC'})
        self.assertEqual(status_code, 401)
        self.assertFalse(data['result']['status'])

        # create task
        with self.mock_task_module():
            task_dict1 = {
                'name': 'some task',
                'nodes': 'pinode1, pinode2',
                'active': False,
                'interval': '0 8 * * *',
                'taskmodule': 'UnitTest',
                'ordering': 5,
                'options': '{"something": 123, "else": true}',
            }
            status_code, data = self.simulate_request('/periodictask/', method='POST', data=task_dict1)
            self.assertEqual(status_code, 200)
            self.assertEqual(data['result']['status'], True)
            ptask_id1 = data['result']['value']

        # some invalid tasks
        invalid_task_dicts = [
            # invalid ordering
            {
                'name': 'some other task',
                'active': False,
                'nodes': 'a, b',
                'interval': '0 8 * * *',
                'taskmodule': 'UnitTest',
                'ordering': '-3',
                'options': '{"something": "123", "else": true}',
            },
            # no nodes
            {
                'name': 'some other task',
                'active': False,
                'interval': '0 8 * * *',
                'taskmodule': 'UnitTest',
                'options': '{"something": "123", "else": true}',
            },
            # empty nodes
            {
                'name': 'some other task',
                'active': False,
                'interval': '0 8 * * *',
                'nodes': '    ',
                'taskmodule': 'UnitTest',
                'options': '{"something": "123", "else": true}',
            },
            # unknown taskmodule
            {
                'name': 'some other task',
                'nodes': 'pinode1, pinode2',
                'active': False,
                'interval': '0 8 * * *',
                'taskmodule': 'Unknown',
                'options': '{"something": "123"}',
            },
            # invalid interval
            {
                'name': 'some other task',
                'nodes': 'pinode1, pinode2',
                'active': False,
                'interval': 'every day',
                'taskmodule': 'UnitTest',
                'options': '{"something": "123"}',
            },
            # invalid options
            {
                'name': 'some task',
                'nodes': 'pinode1, pinode2',
                'active': False,
                'interval': '0 8 * * *',
                'taskmodule': 'UnitTest',
                'options': '[1, 2]',
            }
        ]
        # all result in ERR905
        with self.mock_task_module():
            for invalid_task_dict in invalid_task_dicts:
                status_code, data = self.simulate_request('/periodictask/', method='POST',
                                                          data=invalid_task_dict)
                self.assertEqual(status_code, 400)
                self.assertFalse(data['result']['status'])
                self.assertIn('ERR905', data['result']['error']['message'])

        # create another task
        with self.mock_task_module():
            task_dict2 = {
                'name': 'some other task',
                'nodes': 'pinode1',
                'active': False,
                'interval': '0 8 * * 0',
                'taskmodule': 'UnitTest',
                'ordering': 2,
            }
            status_code, data = self.simulate_request('/periodictask/', method='POST',
                                                      data=task_dict2)

            self.assertEqual(status_code, 200)
            self.assertTrue(data['result']['status'])
            ptask_id2 = data['result']['value']

        # can list the periodic tasks
        status_code, data = self.simulate_request('/periodictask/', method='GET')
        self.assertEqual(status_code, 200)
        self.assertTrue(data['result']['status'])
        self.assertEqual(len(data['result']['value']), 2)
        self.assertEqual([task['name'] for task in data['result']['value']],
                         ['some other task', 'some task'])

        # find first task
        result_dict = data['result']['value'][1]
        self.assertEqual(result_dict['id'], ptask_id1)
        self.assertEqual(result_dict['ordering'], 5)
        self.assertEqual(result_dict['name'], 'some task')
        self.assertEqual(result_dict['active'], False)
        self.assertEqual(result_dict['interval'], '0 8 * * *')
        self.assertEqual(result_dict['nodes'], ['pinode1', 'pinode2'])
        self.assertEqual(result_dict['last_runs'], {})
        last_update = parse_timestamp(result_dict['last_update'])
        self.assertIsNotNone(last_update)
        self.assertEqual(result_dict['options'], {'something': '123', 'else': 'True'})


        # get one
        status_code, data = self.simulate_request('/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value']['id'], ptask_id1)

        # unknown ID
        status_code, data = self.simulate_request('/periodictask/4242', method='GET')
        self.assertEqual(status_code, 404)
        self.assertFalse(data['result']['status'])

        # update existing task
        task_dict1['name'] = 'new name'
        task_dict1['options'] = '{"key": "value"}'
        task_dict1['id'] = ptask_id1
        task_dict1['ordering'] = '2'
        with self.mock_task_module():
            status_code, data = self.simulate_request('/periodictask/', method='POST',
                                                      data=task_dict1)
            self.assertEqual(status_code, 200)
            self.assertTrue(data['result']['status'])
            self.assertEqual(data['result']['value'], ptask_id1)

        # can list the periodic tasks in new order
        status_code, data = self.simulate_request('/periodictask/', method='GET')
        self.assertEqual(status_code, 200)
        self.assertTrue(data['result']['status'])
        self.assertEqual(len(data['result']['value']), 2)
        self.assertEqual([task['name'] for task in data['result']['value']],
                         ['new name', 'some other task'])

        # get updated task
        status_code, data = self.simulate_request('/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value']['id'], ptask_id1)
        self.assertEqual(data['result']['value']['ordering'], 2)
        self.assertEqual(data['result']['value']['name'], 'new name')
        self.assertEqual(data['result']['value']['options'], {'key': 'value'})
        self.assertGreater(parse_timestamp(data['result']['value']['last_update']),
                           last_update)
        last_update = parse_timestamp(data['result']['value']['last_update'])

        # enable
        status_code, data = self.simulate_request('/periodictask/enable/{}'.format(ptask_id1), method='POST')
        self.assertEqual(status_code, 200)

        # get updated task
        status_code, data = self.simulate_request('/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value']['name'], 'new name')
        self.assertEqual(data['result']['value']['active'], True)
        self.assertGreater(parse_timestamp(data['result']['value']['last_update']),
                           last_update)
        last_update = parse_timestamp(data['result']['value']['last_update'])

        # disable
        status_code, data = self.simulate_request('/periodictask/disable/{}'.format(ptask_id1), method='POST')
        self.assertEqual(status_code, 200)

        # get updated task
        status_code, data = self.simulate_request('/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value']['name'], 'new name')
        self.assertEqual(data['result']['value']['active'], False)
        self.assertGreater(parse_timestamp(data['result']['value']['last_update']),
                           last_update)

        # disable again without effect
        status_code, data = self.simulate_request('/periodictask/disable/{}'.format(ptask_id1), method='POST')
        self.assertEqual(status_code, 200)

        # get updated task
        status_code, data = self.simulate_request('/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value']['name'], 'new name')
        self.assertEqual(data['result']['value']['active'], False)

        # delete
        status_code, data = self.simulate_request('/periodictask/{}'.format(ptask_id1), method='DELETE')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value'], ptask_id1)

        # get updated task impossible now
        status_code, data = self.simulate_request('/periodictask/{}'.format(ptask_id1), method='GET')
        self.assertEqual(status_code, 404)
        self.assertFalse(data['result']['status'], False)

        # only 1 task left
        status_code, data = self.simulate_request('/periodictask/', method='GET')
        self.assertEqual(status_code, 200)
        self.assertTrue(data['result']['status'])
        self.assertEqual(len(data['result']['value']), 1)

        # delete the second task as well
        status_code, data = self.simulate_request('/periodictask/{}'.format(ptask_id2), method='DELETE')
        self.assertEqual(status_code, 200)
        self.assertEqual(data['result']['value'], ptask_id2)

        # no tasks left
        status_code, data = self.simulate_request('/periodictask/', method='GET')
        self.assertEqual(status_code, 200)
        self.assertTrue(data['result']['status'])
        self.assertEqual(data['result']['value'], [])
    def test_01_calculate_next_timestamp_utc(self):
        # The easy case: calculate everything in UTC
        tzinfo = tzutc()

        # every day at 08:00
        task1 = {
            "id": 1,
            "active": True,
            "name": "task one",
            "interval": "0 8 * * *",
            "last_update": parse_timestamp("2018-06-23 07:55:00 UTC"),
            "nodes": ["foo", "bar", "baz"],
            "taskmodule": "some.module",
            "options": {"KEY2": "value number 2",
                        "key 4": "1234"},
            "last_runs": {
                "foo": parse_timestamp("2018-06-25 08:04:30 UTC"),
                "bar": parse_timestamp("2018-06-24 07:05:37 UTC"),
            }
        }

        self.assertEqual(calculate_next_timestamp(task1, "foo", tzinfo),
                         parse_timestamp("2018-06-26 08:00 UTC"))
        self.assertEqual(calculate_next_timestamp(task1, "bar", tzinfo),
                         parse_timestamp("2018-06-24 08:00 UTC"))
        # the next run of baz is calculated based on last_update
        self.assertEqual(calculate_next_timestamp(task1, "baz", tzinfo),
                         parse_timestamp("2018-06-23 08:00 UTC"))

        # no last run recorded
        task1b = {
            "id": 1,
            "active": True,
            "name": "task one",
            "interval": "0 8 * * *",
            "last_update": parse_timestamp("2018-06-24 07:55:00 UTC"),
            "nodes": ["foo", "bar"],
            "taskmodule": "some.module",
            "options": {"KEY2": "value number 2",
                        "key 4": "1234"},
            "last_runs": {}
        }

        self.assertEqual(calculate_next_timestamp(task1b, "foo", tzinfo),
                         parse_timestamp("2018-06-24 08:00 UTC"))
        self.assertEqual(calculate_next_timestamp(task1b, "bar", tzinfo),
                         parse_timestamp("2018-06-24 08:00 UTC"))

        # now, "foo" has a last run!
        task1b["last_runs"]["foo"] = parse_timestamp("2018-06-24 08:00 UTC")
        self.assertEqual(calculate_next_timestamp(task1b, "foo", tzinfo),
                         parse_timestamp("2018-06-25 08:00 UTC"))
        # ... bar has still not run
        self.assertEqual(calculate_next_timestamp(task1b, "bar", tzinfo),
                         parse_timestamp("2018-06-24 08:00 UTC"))

        # every weekday
        task2 = {
            "id": 2,
            "active": True,
            "name": "task two",
            "interval": "0 0 * * 1-5",
            "last_update": parse_timestamp("2018-06-24 08:00:00 UTC"),
            "nodes": ["foo", "bar"],
            "taskmodule": "some.module",
            "options": {"KEY2": "value number 2",
                        "key 4": "1234"},
            "last_runs": {
                "localhost": parse_timestamp("2018-06-29 00:00 UTC"),
            }
        }
        self.assertEqual(calculate_next_timestamp(task2, "localhost", tzinfo),
                         parse_timestamp("2018-07-02 00:00 UTC"))

        # at 00:05 in August
        task3 = {
            "id": 3,
            "active": True,
            "name": "task two",
            "interval": "5 0 * 8 *",
            "last_update": parse_timestamp("2018-06-24 08:00:00 UTC"),
            "nodes": ["foo", "bar"],
            "taskmodule": "some.module",
            "options": {"KEY2": "value number 2",
                        "key 4": "1234"},
            "last_runs": {
                "localhost": parse_timestamp("2017-08-31 00:06 UTC"),
            }
        }
        self.assertEqual(calculate_next_timestamp(task3, "localhost", tzinfo),
                         parse_timestamp("2018-08-01 00:05 UTC"))

        # malformed
        task4 = {
            "id": 3,
            "active": True,
            "name": "task two",
            "interval": "every two days",
            "last_update": parse_timestamp("2018-06-24 08:00:00 UTC"),
            "nodes": ["foo", "bar"],
            "taskmodule": "some.module",
            "options": {"KEY2": "value number 2",
                        "key 4": "1234"},
            "last_runs": {
                "localhost": parse_timestamp("2017-08-31 00:06 UTC"),
            }
        }
        with self.assertRaises(ValueError):
            calculate_next_timestamp(task4, "localhost")
    def test_02_calculate_next_timestamp_localtime(self):
        # The harder case: Calculate everything in a local timezone
        # There is no DST in russia, so we operate in +03:00
        tzinfo = gettz("Europe/Moscow")

        # every day at 08:00
        task = {
            "interval": "0 8 * * *",
            "last_update": parse_timestamp("2018-06-24 02:30 UTC"),
            "last_runs": {
                "foo": parse_timestamp("2018-06-25 05:04:30 UTC"),
            }
        }
        self.assertEqual(calculate_next_timestamp(task, "foo", tzinfo),
                         parse_timestamp("2018-06-26 05:00 UTC"))
        self.assertEqual(calculate_next_timestamp(task, "bar", tzinfo),
                         parse_timestamp("2018-06-24 05:00 UTC"))
        self.assertEqual(calculate_next_timestamp(task, "this_node_does_not_exist", tzinfo),
                         parse_timestamp("2018-06-24 05:00 UTC"))

        # every day at 08:00
        task = {
            "interval": "0 8 * * *",
            "last_update": parse_timestamp("2018-06-24 02:30 UTC"),
            "last_runs": {
                "foo": parse_timestamp("2018-06-25 04:04:30 UTC"),
            }
        }
        self.assertEqual(calculate_next_timestamp(task, "foo", tzinfo),
                         parse_timestamp("2018-06-25 05:00 UTC"))

        # every day at midnight
        task = {
            "interval": "0 0 * * *",
            "last_update": parse_timestamp("2018-06-24 02:30 UTC"),
            "last_runs": {
                "foo": parse_timestamp("2018-06-25 21:01 UTC"),
            }
        }
        self.assertEqual(calculate_next_timestamp(task, "foo", tzinfo),
                         parse_timestamp("2018-06-26 21:00 UTC"))

        # every wednesday at midnight
        task = {
            "interval": "0 0 * * 3",
            "last_update": parse_timestamp("2018-06-24 02:30 UTC"),
            "last_runs": {
                "foo": parse_timestamp("2018-06-24 21:00 UTC"),  # this is actually monday 00:00 in russia
            }
        }
        self.assertEqual(calculate_next_timestamp(task, "foo", tzinfo),
                         parse_timestamp("2018-06-26 21:00 UTC"))

        # every 15th at 01:00
        task = {
            "interval": "0 1 15 * *",
            "last_update": parse_timestamp("2018-06-24 02:30 UTC"),
            "last_runs": {
                "foo": parse_timestamp("2018-05-15 00:00 UTC"),
            }
        }
        self.assertEqual(calculate_next_timestamp(task, "foo", tzinfo),
                         parse_timestamp("2018-06-14 22:00 UTC"))

        # every 15th at 01:00
        task = {
            "interval": "0 1 15 * *",
            "last_update": parse_timestamp("2018-06-24 02:30 UTC"),
            "last_runs": {
                "foo": parse_timestamp("2018-05-14 21:59 UTC"),
            }
        }
        self.assertEqual(calculate_next_timestamp(task, "foo", tzinfo),
                         parse_timestamp("2018-05-14 22:00 UTC"))
Exemplo n.º 43
0
def parse_sitemap(row, seen):
    if not row or not row["content"]:
        return [], set()
    sitemap_url = row["url"]

    soup = BeautifulSoup(row["content"], "xml")
    elements = soup.findAll("url")
    rows = []
    for elem in elements:
        url_node = elem.find("loc")
        lastmod_node = elem.find("lastmod")
        xmlmeta = "\n".join([str(e) for e in elem.children]).encode("utf-8")

        try:
            lastmod = parse_timestamp(lastmod_node.text.strip())
        except:
            lastmod = NULL_DATE

        try:
            url = url_node.text.strip()
        except:
            continue

        if url:
            url = url.strip()
            if url in seen:
                continue

            row = {
                "url": url.strip(),
                "site": row["site"],
                "name": row["name"],
                "city": row["city"],
                "state": row["state"],
                "loc": row["loc"],
                "lastmod": lastmod,
                "xmlmeta": xmlmeta,
                "is_dumpsterfire": row["is_dumpsterfire"],
                "selector": row['selector']
            }
            if row["url"] not in seen:
                rows.append(row)
                print(
                    blue(
                        json.dumps(
                            row,
                            indent=4,
                            default=lambda x: str(x)
                            if isinstance(x,
                                          (bytes, datetime.datetime)) else x,
                        )))
                seen.add(url.strip())
            if len(rows) > MAX_ARTICLES_PER_SOURCE:
                break

    rows = list(
        sorted(rows,
               key=lambda row: ensure_tztime(row["lastmod"]),
               reverse=True))
    print(
        magenta("[ fetch_sitemap ] "),
        f":: Extracted {len(rows)} urls from sitemap: {sitemap_url}",
    )
    return rows, seen
Exemplo n.º 44
0
    def list_pages(self,
                   *,
                   chunk=None,
                   chunk_size=None,
                   tags=None,
                   maintainers=None,
                   url=None,
                   title=None,
                   include_versions=None,
                   include_earliest=None,
                   include_latest=None,
                   source_type=None,
                   hash=None,
                   start_date=None,
                   end_date=None,
                   active=None):
        """
        List all Pages, optionally filtered by search criteria.

        Parameters
        ----------
        chunk : integer, optional
            pagination parameter
        chunk_size : integer, optional
            number of items per chunk
        tags : list of string, optional
        maintainers : list of string, optional
        url : string, optional
        title : string, optional
        include_versions : boolean, optional
        include_earliest : boolean, optional
        include_latest : boolean, optional
        source_type : string, optional
            such as 'versionista' or 'internet_archive'
        hash : string, optional
            SHA256 hash of Version content
        start_date : datetime, optional
        end_date : datetime, optional
        active : boolean, optional

        Returns
        -------
        response : dict
        """
        params = {
            'chunk': chunk,
            'chunk_size': chunk_size,
            'tags[]': tags,
            'maintainers[]': maintainers,
            'url': url,
            'title': title,
            'include_versions': include_versions,
            'include_earliest': include_earliest,
            'include_latest': include_latest,
            'source_type': source_type,
            'hash': hash,
            'capture_time': _time_range_string(start_date, end_date),
            'active': active
        }
        url = f'{self._api_url}/pages'
        res = requests.get(url, auth=self._auth, params=params)
        _process_errors(res)
        result = res.json()
        data = result['data']
        # In place, replace datetime strings with datetime objects.
        for page in data:
            page['created_at'] = parse_timestamp(page['created_at'])
            page['updated_at'] = parse_timestamp(page['updated_at'])
            if 'earliest' in page:
                page['earliest']['capture_time'] = parse_timestamp(
                    page['earliest']['capture_time'])
                page['earliest']['created_at'] = parse_timestamp(
                    page['earliest']['created_at'])
                page['earliest']['updated_at'] = parse_timestamp(
                    page['earliest']['updated_at'])
            if 'latest' in page:
                page['latest']['capture_time'] = parse_timestamp(
                    page['latest']['capture_time'])
                page['latest']['created_at'] = parse_timestamp(
                    page['latest']['created_at'])
                page['latest']['updated_at'] = parse_timestamp(
                    page['latest']['updated_at'])
            if 'versions' in page:
                for v in page['versions']:
                    v['created_at'] = parse_timestamp(v['created_at'])
                    v['updated_at'] = parse_timestamp(v['updated_at'])
                    v['capture_time'] = parse_timestamp(v['capture_time'])
        return result
Exemplo n.º 45
0
    def list_versions(self,
                      *,
                      page_id=None,
                      chunk=None,
                      chunk_size=None,
                      start_date=None,
                      end_date=None,
                      source_type=None,
                      hash=None,
                      source_metadata=None,
                      different=None,
                      include_change_from_previous=None,
                      include_change_from_earliest=None):
        """
        List Versions, optionally filtered by serach criteria, including Page.

        Parameters
        ----------
        page_id : string, optional
            restricts serach to Versions of a specific Page
        chunk : integer, optional
            pagination parameter
        chunk_size : integer, optional
            number of items per chunk
        start_date : datetime, optional
        end_date : datetime, optional
        source_type : string, optional
            such as 'versionista' or 'internetarchive'
        hash : string, optional
            SHA256 hash of Version content
        source_metadata : dict, optional
            Examples:

            * ``{'version_id': 12345678}``
            * ``{'account': 'versionista1', 'has_content': True}``
        different : boolean, optional
            If False, include versions that aren't actually different from the
            previous version of the same page in the response.
        include_change_from_previous : boolean, optional
            If True, include a `change_from_previous` field in each version
            that represents a change object between it and the previous version
            of the same page.
        include_change_from_earliest : boolean, optional
            If True, include a `change_from_earliest` field in each version
            that represents a change object between it and the earliest version
            of the same page.
        Returns
        -------
        response : dict
        """
        params = {
            'chunk': chunk,
            'chunk_size': chunk_size,
            'capture_time': _time_range_string(start_date, end_date),
            'source_type': source_type,
            'hash': hash,
            'different': different,
            'include_change_from_previous': include_change_from_previous,
            'include_change_from_earliest': include_change_from_earliest
        }
        if source_metadata is not None:
            for k, v in source_metadata.items():
                params[f'source_metadata[{k}]'] = v
        if page_id is None:
            url = f'{self._api_url}/versions'
        else:
            url = f'{self._api_url}/pages/{page_id}/versions'
        res = requests.get(url, auth=self._auth, params=params)
        _process_errors(res)
        result = res.json()
        # In place, replace datetime strings with datetime objects.
        for v in result['data']:
            v['created_at'] = parse_timestamp(v['created_at'])
            v['updated_at'] = parse_timestamp(v['updated_at'])
            v['capture_time'] = parse_timestamp(v['capture_time'])
        return result