Пример #1
0
def main():
    url = 'https://data-bs.ch/lufthygiene/nmbs_pm25/airmet_bs_museum_pm25_aktuell.csv'
    print(f'Downloading data from {url}...')
    urllib3.disable_warnings()
    df = common.pandas_read_csv(url,
                                sep=';',
                                encoding='cp1252',
                                skiprows=range(1, 2))
    print(f'Calculating ISO8601 time string...')
    df['timestamp'] = pd.to_datetime(
        df.Anfangszeit,
        format='%d.%m.%Y %H:%M:%S').dt.tz_localize('Europe/Zurich',
                                                   ambiguous='infer',
                                                   nonexistent='shift_forward')

    # We simplify the code and re-push all current data all the time instead of checking for the latest timestamp in ODS.
    # print(f'Reading latest timestamp from ODS dataset...')
    # urllib3.disable_warnings()
    # r = common.requests_get('https://data.bs.ch/api/records/1.0/search/?dataset=100100&q=&rows=1&sort=anfangszeit', verify=False)
    # r.raise_for_status()
    # latest_ods_timestamp = r.json()['records'][0]['fields']['anfangszeit']
    # print(f'Latest timestamp is {latest_ods_timestamp}.')
    # print(f'Filtering data after {latest_ods_timestamp} for submission to ODS via realtime API...')
    # realtime_df = df[df['timestamp'] > latest_ods_timestamp]
    # print(f'Pushing {realtime_df.timestamp.count()} rows to ODS realtime API...')

    realtime_df = df

    if len(realtime_df) == 0:
        print(f'No rows to push to ODS... ')
    else:
        print(f'Dropping empty values...')
        realtime_df.PM25_Sensirion = realtime_df.PM25_Sensirion.replace(
            ' ', numpy.nan)
        realtime_df = realtime_df.dropna(subset=['PM25_Sensirion'])
        # Realtime API bootstrap data:
        # {
        #     "anfangszeit": "23.02.2021 10:30:00",
        #     "pm25": 13.3
        # }
        payload = (realtime_df.rename(columns={
            'Anfangszeit': 'anfangszeit',
            'PM25_Sensirion': 'pm25'
        })[['anfangszeit', 'pm25']].to_json(orient="records"))
        print(
            f'Pushing {realtime_df.Anfangszeit.count()} rows to ODS realtime API...'
        )
        # print(f'Pushing the following data to ODS: {json.dumps(json.loads(payload), indent=4)}')
        # use data=payload here because payload is a string. If it was an object, we'd have to use json=payload.
        r = common.requests_post(url=credentials.ods_live_push_api_url,
                                 data=payload,
                                 verify=False)
        r.raise_for_status()

    print('Job successful!')
Пример #2
0
def send(activity, inbox_url, user_domain):
    """Sends an ActivityPub request to an inbox.

    Args:
      activity: dict, AS2 activity
      inbox_url: string
      user_domain: string, domain of the bridgy fed user sending the request

    Returns:
      requests.Response
    """
    logging.info(
        'Sending AP request from {user_domain}: {json_dumps(activity, indent=2)}'
    )

    # prepare HTTP Signature (required by Mastodon)
    # https://w3c.github.io/activitypub/#authorization
    # https://tools.ietf.org/html/draft-cavage-http-signatures-07
    # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846
    acct = 'acct:%s@%s' % (user_domain, user_domain)
    key = MagicKey.get_or_create(user_domain)
    auth = HTTPSignatureAuth(secret=key.private_pem(),
                             key_id=acct,
                             algorithm='rsa-sha256',
                             sign_header='signature',
                             headers=('Date', 'Digest', 'Host'))

    # deliver to inbox
    body = json_dumps(activity).encode()
    headers = {
        'Content-Type': common.CONTENT_TYPE_AS2,
        # required for HTTP Signature
        # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
        'Date':
        datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
        # required by Mastodon
        # https://github.com/tootsuite/mastodon/pull/14556#issuecomment-674077648
        'Digest': 'SHA-256=' + b64encode(sha256(body).digest()).decode(),
        'Host': util.domain_from_link(inbox_url),
    }
    return common.requests_post(inbox_url,
                                data=body,
                                auth=auth,
                                headers=headers)
Пример #3
0
def send(activity, inbox_url, user_domain):
    """Sends an ActivityPub request to an inbox.

    Args:
      activity: dict, AS2 activity
      inbox_url: string
      user_domain: string, domain of the bridgy fed user sending the request

    Returns:
      requests.Response
    """
    logging.info('Sending AP request from %s: %s', user_domain,
                 json.dumps(activity, indent=2))

    # prepare HTTP Signature (required by Mastodon)
    # https://w3c.github.io/activitypub/#authorization-lds
    # https://tools.ietf.org/html/draft-cavage-http-signatures-07
    # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846
    acct = 'acct:%s@%s' % (user_domain, user_domain)
    key = MagicKey.get_or_create(user_domain)
    auth = HTTPSignatureAuth(secret=key.private_pem(),
                             key_id=acct,
                             algorithm='rsa-sha256')

    # deliver to inbox
    headers = {
        'Content-Type': common.CONTENT_TYPE_AS2,
        # required for HTTP Signature
        # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
        'Date':
        datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
    }
    return common.requests_post(inbox_url,
                                json=activity,
                                auth=auth,
                                headers=headers)
Пример #4
0
    def _try_salmon(self, resp):
        """
        Args:
          resp: Response
        """
        # fetch target HTML page, extract Atom rel-alternate link
        target = resp.target()
        if not self.target_resp:
            self.target_resp = common.requests_get(target)

        parsed = util.parse_html(self.target_resp)
        atom_url = parsed.find('link',
                               rel='alternate',
                               type=common.CONTENT_TYPE_ATOM)
        if not atom_url or not atom_url.get('href'):
            self.error('Target post %s has no Atom link' % resp.target(),
                       status=400)

        # fetch Atom target post, extract and inject id into source object
        base_url = ''
        base = parsed.find('base')
        if base and base.get('href'):
            base_url = base['href']
        atom_link = parsed.find('link',
                                rel='alternate',
                                type=common.CONTENT_TYPE_ATOM)
        atom_url = urllib.parse.urljoin(
            resp.target(), urllib.parse.urljoin(base_url, atom_link['href']))

        feed = common.requests_get(atom_url).text
        parsed = feedparser.parse(feed)
        logging.info('Parsed: %s', json_dumps(parsed, indent=2))
        entry = parsed.entries[0]
        target_id = entry.id
        in_reply_to = self.source_obj.get('inReplyTo')
        source_obj_obj = self.source_obj.get('object')
        if in_reply_to:
            for elem in in_reply_to:
                if elem.get('url') == target:
                    elem['id'] = target_id
        elif isinstance(source_obj_obj, dict):
            source_obj_obj['id'] = target_id

        # Mastodon (and maybe others?) require a rel-mentioned link to the
        # original post's author to make it show up as a reply:
        #   app/services/process_interaction_service.rb
        # ...so add them as a tag, which atom renders as a rel-mention link.
        authors = entry.get('authors', None)
        if authors:
            url = entry.authors[0].get('href')
            if url:
                self.source_obj.setdefault('tags', []).append({'url': url})

        # extract and discover salmon endpoint
        logging.info('Discovering Salmon endpoint in %s', atom_url)
        endpoint = django_salmon.discover_salmon_endpoint(feed)

        if not endpoint:
            # try webfinger
            parsed = urllib.parse.urlparse(resp.target())
            # TODO: test missing email
            author = entry.get('author_detail', {})
            email = author.get('email') or '@'.join(
                (author.get('name', ''), parsed.netloc))
            try:
                # TODO: always https?
                profile = common.requests_get(
                    '%s://%s/.well-known/webfinger?resource=acct:%s' %
                    (parsed.scheme, parsed.netloc, email),
                    verify=False)
                endpoint = django_salmon.get_salmon_replies_link(
                    profile.json())
            except requests.HTTPError as e:
                pass

        if not endpoint:
            self.error('No salmon endpoint found!', status=400)
        logging.info('Discovered Salmon endpoint %s', endpoint)

        # construct reply Atom object
        self.source_url = resp.source()
        activity = self.source_obj
        if self.source_obj.get('verb') not in source.VERBS_WITH_OBJECT:
            activity = {'object': self.source_obj}
        entry = atom.activity_to_atom(activity, xml_base=self.source_url)
        logging.info('Converted %s to Atom:\n%s', self.source_url, entry)

        # sign reply and wrap in magic envelope
        domain = urllib.parse.urlparse(self.source_url).netloc
        key = MagicKey.get_or_create(domain)
        logging.info('Using key for %s: %s', domain, key)
        magic_envelope = magicsigs.magic_envelope(entry,
                                                  common.CONTENT_TYPE_ATOM,
                                                  key).decode()

        logging.info('Sending Salmon slap to %s', endpoint)
        common.requests_post(
            endpoint,
            data=common.XML_UTF8 + magic_envelope,
            headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE})
        return True
Пример #5
0
    def try_activitypub(self):
        source = util.get_required_param(self, 'source')

        # fetch source page, convert to ActivityStreams
        source_resp = common.requests_get(source)
        source_url = source_resp.url or source
        source_mf2 = mf2py.parse(source_resp.text, url=source_url)
        # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(source_mf2, indent=2))

        entry = mf2util.find_first_entry(source_mf2, ['h-entry'])
        logging.info('First entry: %s', json.dumps(entry, indent=2))
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [source_url]

        source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2))

        # fetch target page as AS object. target is first in-reply-to, like-of,
        # or repost-of, *not* target query param.)
        target = util.get_url(util.get_first(source_obj, 'inReplyTo') or
                              util.get_first(source_obj, 'object'))
        if not target:
            common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of '
                         'found in %s' % source_url)

        try:
            target_resp = common.get_as2(target)
        except (requests.HTTPError, exc.HTTPBadGateway) as e:
            if (e.response.status_code // 100 == 2 and
                common.content_type(e.response).startswith('text/html')):
                self.resp = Response.get_or_create(
                    source=source_url, target=e.response.url or target,
                    direction='out', source_mf2=json.dumps(source_mf2))
                return self.send_salmon(source_obj, target_resp=e.response)
            raise

        target_url = target_resp.url or target
        self.resp = Response.get_or_create(
            source=source_url, target=target_url, direction='out',
            protocol='activitypub', source_mf2=json.dumps(source_mf2))

        # find actor's inbox
        target_obj = target_resp.json()
        inbox_url = target_obj.get('inbox')

        if not inbox_url:
            # TODO: test actor/attributedTo and not, with/without inbox
            actor = target_obj.get('actor') or target_obj.get('attributedTo')
            if isinstance(actor, dict):
                inbox_url = actor.get('inbox')
                actor = actor.get('url')
            if not inbox_url and not actor:
                common.error(self, 'Target object has no actor or attributedTo URL')

        if not inbox_url:
            # fetch actor as AS object
            actor = common.get_as2(actor).json()
            inbox_url = actor.get('inbox')

        if not inbox_url:
            # TODO: probably need a way to save errors like this so that we can
            # return them if ostatus fails too.
            # common.error(self, 'Target actor has no inbox')
            return self.send_salmon(source_obj, target_resp=target_resp)

        # convert to AS2
        source_domain = urlparse.urlparse(source_url).netloc
        key = MagicKey.get_or_create(source_domain)
        source_activity = common.postprocess_as2(
            as2.from_as1(source_obj), target=target_obj, key=key)

        if self.resp.status == 'complete':
            source_activity['type'] = 'Update'

        # prepare HTTP Signature (required by Mastodon)
        # https://w3c.github.io/activitypub/#authorization-lds
        # https://tools.ietf.org/html/draft-cavage-http-signatures-07
        # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846
        acct = 'acct:%s@%s' % (source_domain, source_domain)
        auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct,
                                 algorithm='rsa-sha256')

        # deliver source object to target actor's inbox.
        headers = {
            'Content-Type': common.CONTENT_TYPE_AS2,
            # required for HTTP Signature
            # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
            'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
        }
        inbox_url = urlparse.urljoin(target_url, inbox_url)
        resp = common.requests_post(inbox_url, json=source_activity, auth=auth,
                                    headers=headers)
        self.response.status_int = resp.status_code
        if resp.status_code == 202:
            self.response.write('202 response! If this is Mastodon 1.x, their '
                                'signature verification probably failed. :(\n')
        self.response.write(resp.text)
Пример #6
0
    def send_salmon(self, source_obj, target_resp=None):
        self.resp.protocol = 'ostatus'

        # fetch target HTML page, extract Atom rel-alternate link
        if not target_resp:
            target_resp = common.requests_get(self.resp.target())

        parsed = BeautifulSoup(target_resp.content, from_encoding=target_resp.encoding)
        atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM)
        if not atom_url or not atom_url.get('href'):
            common.error(self, 'Target post %s has no Atom link' % self.resp.target(),
                         status=400)

        # fetch Atom target post, extract and inject id into source object
        feed = common.requests_get(atom_url['href']).text
        parsed = feedparser.parse(feed)
        logging.info('Parsed: %s', json.dumps(parsed, indent=2,
                                              default=lambda key: '-'))
        entry = parsed.entries[0]
        target_id = entry.id
        in_reply_to = source_obj.get('inReplyTo')
        source_obj_obj = source_obj.get('object')
        if in_reply_to:
            in_reply_to[0]['id'] = target_id
        elif isinstance(source_obj_obj, dict):
            source_obj_obj['id'] = target_id

        # Mastodon (and maybe others?) require a rel-mentioned link to the
        # original post's author to make it show up as a reply:
        #   app/services/process_interaction_service.rb
        # ...so add them as a tag, which atom renders as a rel-mention link.
        authors = entry.get('authors', None)
        if authors:
            url = entry.authors[0].get('href')
            if url:
                source_obj.setdefault('tags', []).append({'url': url})

        # extract and discover salmon endpoint
        logging.info('Discovering Salmon endpoint in %s', atom_url['href'])
        endpoint = django_salmon.discover_salmon_endpoint(feed)

        if not endpoint:
            # try webfinger
            parsed = urlparse.urlparse(self.resp.target())
            # TODO: test missing email
            email = entry.author_detail.get('email') or '@'.join(
                (entry.author_detail.name, parsed.netloc))
            try:
                # TODO: always https?
                resp = common.requests_get(
                    '%s://%s/.well-known/webfinger?resource=acct:%s' %
                    (parsed.scheme, parsed.netloc, email), verify=False)
                endpoint = django_salmon.get_salmon_replies_link(resp.json())
            except requests.HTTPError as e:
                pass

        if not endpoint:
            common.error(self, 'No salmon endpoint found!', status=400)
        logging.info('Discovered Salmon endpoint %s', endpoint)

        # construct reply Atom object
        source_url = self.resp.source()
        activity = (source_obj if source_obj.get('verb') in source.VERBS_WITH_OBJECT
                    else {'object': source_obj})
        entry = atom.activity_to_atom(activity, xml_base=source_url)
        logging.info('Converted %s to Atom:\n%s', source_url, entry)

        # sign reply and wrap in magic envelope
        domain = urlparse.urlparse(source_url).netloc
        key = MagicKey.get_or_create(domain)
        logging.info('Using key for %s: %s', domain, key)
        magic_envelope = magicsigs.magic_envelope(
            entry, common.CONTENT_TYPE_ATOM, key)

        logging.info('Sending Salmon slap to %s', endpoint)
        common.requests_post(
            endpoint, data=common.XML_UTF8 + magic_envelope,
            headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE})
Пример #7
0
# print(f'Filtering data after {latest_ods_value} for submission to ODS via realtime API...')
# realtime_df = merged_df[merged_df['timestamp'] > latest_ods_value]
realtime_df = merged_df

if len(realtime_df) == 0:
    print(f'No rows to push to ODS... ')
else:
    # Realtime API bootstrap data:
    # {
    #   "timestamp": "2020-07-28T01:35:00+02:00",
    #   "pegel": "245.16",
    #   "abfluss": "591.2"
    # }

    # only keep columns that need to be pushed, and rename if necessary.
    realtime_df = realtime_df[['timestamp_text', 'pegel', 'abfluss']]
    realtime_df = realtime_df.rename(columns={'timestamp_text': 'timestamp'})

    payload = realtime_df.to_json(orient="records")
    print(
        f'Pushing {realtime_df.timestamp.count()} rows to ODS realtime API...')
    # print(f'Pushing the following data to ODS: {json.dumps(json.loads(payload), indent=4)}')
    # use data=payload here because payload is a string. If it was an object, we'd have to use json=payload.
    r = common.requests_post(url=credentials.ods_live_push_api_url,
                             data=payload,
                             verify=False)
    r.raise_for_status()

print('Job successful!')