def _notes_rss_poll(self): url_templ = 'http://api.openstreetmap.org/api/0.6/notes/%d.json' try: if not os.path.exists('notes_state.txt'): log.error("No notes_state file found to poll note feed.") return notes_state = self.readState('notes_state.txt') last_note_id = int(notes_state.get('last_note_id', None)) while True: last_note_id += 1 url = url_templ % last_note_id log.info("Requesting %s" % url) try: result = urllib2.urlopen(url) note = json.load(result) attrs = note.get('properties') opening_comment = attrs['comments'][0] author = opening_comment['user'].encode('utf-8') if 'user' in opening_comment else 'Anonymous' geo = note.get('geometry').get('coordinates') link = 'http://osm.org/browse/note/%d' % last_note_id location = "" country_code = None if stathat: stathat.ez_post_count('*****@*****.**', 'new notes', 1, attrs['date_created']) try: country_code, location = self.reverse_geocode(geo[1], geo[0]) except urllib2.HTTPError as e: log.warn("HTTP problem when looking for note location: %s" % (e)) response = "%s posted a new note%s %s" % (author, location, link) log.info("Response is %s" % response) irc = world.ircs[0] for chan in irc.state.channels: if chan == "#osm-bot" or country_code in _note_edit_region_channels.get(chan, ()): msg = ircmsgs.privmsg(chan, response) world.ircs[0].queueMsg(msg) except urllib2.URLError, e: if e.code == 404: log.info("%s doesn't exist. Stopping." % last_note_id) last_note_id -= 1 break with open('notes_state.txt', 'w') as f: f.write('last_note_id=%s\n' % last_note_id)
def die(self): log.info('Stopping D-Bus service') self.dbus_service.stop() if self.mainloop is not None: log.info('Stopping Glib main loop') self.mainloop.quit() self.mainloop_thread.join(timeout=1.0) if self.mainloop_thread.is_alive(): log.warn('Glib main loop thread is still alive.') self.mainloop = None self.mainloop_thread = None for source in self.data_sources: try: schedule.removePeriodicEvent(source.NAME) except KeyError: pass super().die()
def _minutely_diff_poll(self): try: if not os.path.exists('state.txt'): log.error("No state file found to poll minutelies.") return log.info("Looking for new users.") seen_uids = {} keep_updating = True while keep_updating: state = self.readState() minuteNumber = int(isoToTimestamp(state['timestamp'])) / 60 # Grab the next sequence number and build a URL out of it sqnStr = state['sequenceNumber'].zfill(9) url = "http://planet.openstreetmap.org/redaction-period/minute-replicate/%s/%s/%s.osc.gz" % (sqnStr[0:3], sqnStr[3:6], sqnStr[6:9]) log.debug("Downloading change file (%s)." % (url)) content = urllib2.urlopen(url) content = StringIO.StringIO(content.read()) gzipper = gzip.GzipFile(fileobj=content) handler = OscHandler() parseOsm(gzipper, handler) for (id, prim) in itertools.chain(handler.nodes.iteritems(), handler.ways.iteritems(), handler.relations.iteritems()): uid = str(prim['uid']) if uid not in seen_uids: seen_uids[str(prim['uid'])] = {'changeset': prim['changeset'], 'username': prim['user']} if 'lat' in prim and 'lat' not in seen_uids[str(prim['uid'])]: seen_uids[str(prim['uid'])]['lat'] = prim['lat'] seen_uids[str(prim['uid'])]['lon'] = prim['lon'] keep_updating = self.fetchNextState(state) log.info("There were %s users editing this time." % len(seen_uids)) f = open('uid.txt', 'r') for line in f: for uid in seen_uids.keys(): if uid in line: seen_uids.pop(uid) continue if len(seen_uids) == 0: break f.close() f = open('uid.txt', 'a') for (uid, data) in seen_uids.iteritems(): f.write('%s\t%s\n' % (data['username'], uid)) location = "" if 'lat' in data: try: urldata = urllib2.urlopen('http://nominatim.openstreetmap.org/reverse?format=json&lat=%s&lon=%s' % (data['lat'], data['lon'])) info = json.load(urldata) if 'address' in info: address = info.get('address') if 'country' in address: location = address.get('country') if 'state' in address: location = "%s, %s" % (address.get('state'), location) if 'county' in address: location = "%s, %s" % (address.get('county'), location) location = " near %s" % (location) except urllib2.HTTPError as e: log.warn("HTTP problem when looking for edit location: %s" % (e)) log.info("%s just started editing%s with changeset http://osm.org/browse/changeset/%s!" % (data['username'], location, data['changeset'])) f.close() except Exception as e: log.error(traceback.format_exc(e))
def _minutely_diff_poll(self): try: if not os.path.exists('state.txt'): log.error("No state file found to poll minutelies.") return seen_uids = {} seen_changesets = self.seen_changesets state = self.readState('state.txt') while self.fetchNextState(state): state = self.readState('state.txt') # Grab the next sequence number and build a URL out of it sqnStr = state['sequenceNumber'].zfill(9) url = "http://planet.openstreetmap.org/replication/minute/%s/%s/%s.osc.gz" % (sqnStr[0:3], sqnStr[3:6], sqnStr[6:9]) log.info("Downloading change file (%s)." % (url)) content = urllib2.urlopen(url) content = StringIO.StringIO(content.read()) gzipper = gzip.GzipFile(fileobj=content) handler = OscHandler() parseOsm(gzipper, handler) for (id, prim) in itertools.chain(handler.nodes.iteritems(), handler.ways.iteritems(), handler.relations.iteritems()): changeset_id = str(prim['changeset']) action = prim['action'] prim_type = prim['type'] changeset_data = seen_changesets.get(changeset_id, {}) cs_type_data = changeset_data.get(prim_type, {}) cs_type_data[action] = cs_type_data.get(action, 0) + 1 cs_type_data['total_changes'] = cs_type_data.get('total_changes', 0) + 1 changeset_data[prim_type] = cs_type_data changeset_data['total_changes'] = changeset_data.get('total_changes', 0) + 1 changeset_data['last_modified'] = prim['timestamp'] seen_changesets[changeset_id] = changeset_data uid = str(prim['uid']) if uid in seen_uids: continue else: seen_uids[str(prim['uid'])] = {'changeset': prim['changeset'], 'username': prim['user']} if 'lat' in prim and 'lat' not in seen_uids[str(prim['uid'])]: seen_uids[str(prim['uid'])]['lat'] = prim['lat'] seen_uids[str(prim['uid'])]['lon'] = prim['lon'] #log.info("Changeset actions: %s" % json.dumps(seen_changesets)) # Check the changesets for anomolies now = datetime.datetime.utcnow() cs_flags = [] for (id, cs_data) in seen_changesets.items(): last_modified = datetime.datetime.utcfromtimestamp(cs_data['last_modified']) age = (now - last_modified).seconds if age > 3600: del seen_changesets[id] continue total_changes = cs_data['total_changes'] node_changes = cs_data.get('node', {}).get('total_changes', 0) way_changes = cs_data.get('way', {}).get('total_changes', 0) relation_changes = cs_data.get('relation', {}).get('total_changes', 0) node_pct = node_changes / float(total_changes) way_pct = way_changes / float(total_changes) relation_pct = relation_changes / float(total_changes) # Flag a changeset that's big and made up of all one primitive type if total_changes > 2000 and (node_pct > 0.97 or way_pct > 0.97 or relation_pct > 0.97): cs_flags.append((id, "it is mostly changes to one data type")) creates = cs_data.get('node', {}).get('create', 0) + cs_data.get('way', {}).get('create', 0) + cs_data.get('relation', {}).get('create', 0) mods = cs_data.get('node', {}).get('modify', 0) + cs_data.get('way', {}).get('modify', 0) + cs_data.get('relation', {}).get('modify', 0) deletes = cs_data.get('node', {}).get('delete', 0) + cs_data.get('way', {}).get('delete', 0) + cs_data.get('relation', {}).get('delete', 0) create_pct = creates / float(total_changes) mod_pct = mods / float(total_changes) delete_pct = deletes / float(total_changes) # Flag a changeset that's big and made up of only one change type if total_changes > 2000 and (create_pct > 0.97 or mod_pct > 0.97 or delete_pct > 0.97): cs_flags.append((id, "it is mostly creates, modifies, or deletes")) # Tell the channel about these problems irc = world.ircs[0] for (cs_id, reason) in cs_flags: if cs_id in seen_changesets and seen_changesets[cs_id].get('alerted_already'): continue response = "Changeset %s is weird because %s. http://osm.org/browse/changeset/%s" % (cs_id, reason, cs_id) log.info(response) for chan in irc.state.channels: if chan == "#osm-bot": msg = ircmsgs.privmsg(chan, response) world.ircs[0].queueMsg(msg) seen_changesets[cs_id]['alerted_already'] = True log.info("There were %s users editing this time." % len(seen_uids)) if stathat: stathat.ez_post_value('*****@*****.**', 'users editing this minute', len(seen_uids), state['timestamp']) f = open('uid.txt', 'r') for line in f: for uid in seen_uids.keys(): if uid in line: seen_uids.pop(uid) continue if len(seen_uids) == 0: break f.close() if stathat: stathat.ez_post_value('*****@*****.**', 'new users this minute', len(seen_uids), state['timestamp']) f = open('uid.txt', 'a') for (uid, data) in seen_uids.iteritems(): f.write('%s\t%s\n' % (data['username'], uid)) location = "" country_code = None if 'lat' in data: try: country_code, location = self.reverse_geocode(data['lat'], data['lon']) except urllib2.HTTPError as e: log.warn("HTTP problem when looking for edit location: %s" % (e)) response = "%s just started editing%s with changeset http://osm.org/browse/changeset/%s" % (data['username'], location, data['changeset']) log.info(response) irc = world.ircs[0] for chan in irc.state.channels: if chan == "#osm-bot" or country_code in _new_uid_edit_region_channels.get(chan, ()): msg = ircmsgs.privmsg(chan, response) world.ircs[0].queueMsg(msg) f.close() except Exception as e: log.error("Exception processing new users: %s" % traceback.format_exc(e))