def handle(self, *args, **options): with requests.Session() as session: post = session.post( 'https://geocaching.su', data=LOGIN_DATA ) r = session.get('https://geocaching.su') if not logged(r.text): print('Authorization failed') else: for cache in Cach.objects.filter(author__isnull=True): r = session.get( 'http://www.geocaching.su/', params={'pn': 101, 'cid': cache.pid} ) author_uid = get_author(r.text) if author_uid: author = Geocacher.objects.filter( uid=int(author_uid)).first() if author: cache.author = author cache.save() print('saved', cache.pid, author_uid) else: print('not found author', author_uid) log(UPDATE_TYPE.set_caches_authors, 'OK') return 'Authors of caches have updated'
def main(): LOAD_CACHES = True start = time() for k, v in OPENSITES.items(): print 'OPENSITE', k geosite = Geosite.objects.get(code=k) for rec in v.get('RECTANGLES'): process(rec, geosite, v) log(v.get('log_key'), 'OK') sql = """ select COUNT(*) FROM ( select g.code as code, count(id) as cnt from geothing g group by g.code having cnt > 1 ) as tbl """ dc = sql2val(sql) message = 'doubles %s' % dc log('map_opencaching', message) elapsed = time() - start print "Elapsed time -->", elapsed
def handle(self, *args, **options): with requests.Session() as session: post = session.post('https://geocaching.su', data=LOGIN_DATA) r = session.get('https://geocaching.su') if not logged(r.text): print('Authorization failed') else: ids = LogRecommendCach.objects.all().values_list('cacher_uid', flat=True) for uid in Geocacher.objects.exclude(uid__in=ids).values_list( 'uid', flat=True): r = session.get( 'http://www.geocaching.su/site/popup/userstat.php', params={ 's': 3, 'uid': uid }) for (cid, x, y, z) in get_caches_data(uid, r.text): cache = get_object_or_none(Cach, pid=cid) if cache: the_log, created = LogRecommendCach.objects.get_or_create( cacher_uid=uid, cach_pid=cid) log(UPDATE_TYPE.gcsu_logs_recommended, 'OK') return 'List of recommended caches has updated'
def handle(self, *args, **options): with requests.Session() as session: post = session.post('https://geocaching.su', data=LOGIN_DATA) r = session.get('https://geocaching.su') if not logged(r.text): print('Authorization failed') else: r = session.get('http://www.geocaching.su/', params={'pn': 107}) for uid in get_geocachers_uids(r.text): r = session.get( 'http://www.geocaching.su/site/popup/userstat.php', params={ 's': 2, 'uid': uid }) for (cid, found_date, grade, x) in get_caches_data(uid, r.text): cache = get_object_or_none(Cach, pid=cid) if cache and found_date: the_log, created = LogSeekCach.objects.get_or_create( cacher_uid=uid, cach_pid=cid) if created: the_log.found_date = found_date the_log.grade = grade the_log.save() log(UPDATE_TYPE.gcsu_new_logs_found, 'OK') return 'List of found caches has updated'
def main(): updated = get_object_or_none(Variable, name='map_updated') if not updated: return updated.value = 'successful' updated.save() log('map', 'success')
def handle(self, *args, **options): sql_batches = ('sql/calculate_geocacher_statistics.sql', ) for name in sql_batches: patch_it(name) print(name, ' processed') log(UPDATE_TYPE.geocacher_statistics, 'OK') return 'Geocachers statistics is updated'
def handle(self, *args, **options): with requests.Session() as session: post = session.post('https://geocaching.su', data=LOGIN_DATA) r = session.get('https://geocaching.su') if not logged(r.text): print('Authorization failed') else: get_caches() log(UPDATE_TYPE.gcsu_caches, 'OK') return 'List of caches has updated'
def main(): updated = get_object_or_none(Variable, name='updated') if not updated: return if updated.value != 'successful': return is_updating = get_object_or_none(Variable, name='updating') if not is_updating: return is_updating.value = '0' is_updating.save() log('gcsu', 'succsess')
def handle(self, *args, **options): sincedate = datetime.now() - timedelta(days=7) sincedatestr = sincedate.strftime('%Y%m%d%H%M%S') with requests.Session() as session: r = session.get( 'http://geokrety.org/export_oc.php', params={'modifiedsince': sincedatestr} ) soup = BeautifulSoup(r.text, 'lxml') all_krety = soup.find_all('geokret') for kret in all_krety: gkid = int(kret.get('id') or 0) if not gkid: continue name = kret.find('name').text distance = kret.distancetravelled.text position = kret.position latitude = float(position.get('latitude')or 0) longitude = float(position.get('longitude') or 0) waypoints = kret.waypoints wp = waypoints.find_all('waypoint') waypoint = wp[0].text if wp else None state = kret.state.text geokret, created = GeoKret.objects.get_or_create(gkid=gkid) if geokret: if name: geokret.name = name geokret.distance = distance if geokret.location is None: geokret.location = Location.objects.create( NS_degree=latitude, EW_degree=longitude) else: geokret.location.NS_degree = latitude geokret.location.EW_degree = longitude geokret.location.save() geokret.waypoint = waypoint geokret.state = int(state or 0) geokret.save() log(UPDATE_TYPE.geokrety_updated, 'OK') return 'Geokrety are updated'
def main(): #if not switch_off_status_updated(): # return False start = time() patch_it() print ' calculated' elapsed = time() - start print "Elapsed time -->", elapsed switch_on_status_updated() log('gcsu_cashstat', 'OK')
def main(): LOAD_CACHES = True start = time() uc = 0 nc = 0 geosite = Geosite.objects.get(code='OC_COM') for rec in RECTANGLES: uc, nc = process(rec, geosite, uc, nc) message = 'OK. updated %s, new %s' % (uc, nc) log('map_occom_caches', message) print message elapsed = time() - start print "Elapsed time -->", elapsed
def handle(self, *args, **options): with requests.Session() as session: post = session.post('https://geocaching.su', data=LOGIN_DATA) r = session.get('https://geocaching.su') if not logged(r.text): print('Authorization failed') else: last_cid = Cach.objects.all().aggregate( last_pid=Max('pid'))['last_pid'] get_caches(last_cid) log(UPDATE_TYPE.gcsu_new_caches, 'OK') return 'List of caches has updated'
def main(): start = time() for cache in Cach.objects.all(): cache_stat, created = CachStat.objects.get_or_create( cach=cache, cach_pid=cache.pid) cache_stat.calculate_points() sql = """ insert into geocacher_search_stat (geocacher_id, geocacher_pid, country, region) select g.id, g.pid, c.name, gcs.name from geocacher g left join geocacher_search_stat gss on g.pid = gss.geocacher_pid left join geo_country c on g.country_iso3 = c.iso3 left join geo_country_subject gcs on c.iso = gcs.country_iso and g.admin_code = gcs.code where gss.geocacher_pid is null """ execute_query(sql) sql = """ update geocacher_search_stat gss set points=( select ROUND(sum(IFNULL(cs.points, 0))) as points_sum from log_seek_cach lsc left join cach_stat cs on lsc.cach_pid = cs.cach_pid where lsc.cacher_pid = gss.geocacher_pid ), year_points=( select ROUND(sum(IFNULL(cs.points, 0))) as points_sum from log_seek_cach lsc left join cach_stat cs on lsc.cach_pid = cs.cach_pid where YEAR(lsc.found_date)=%s and lsc.cacher_pid = gss.geocacher_pid ) """ % date.today().year execute_query(sql) elapsed = time() - start print "Elapsed time -->", elapsed log('gcsu_rating', 'OK')
def main(): if not switch_off_status_updated(): return False LOAD_CACHES = True start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In','Log_In'), ('email', '*****@*****.**'), ('passwd','zaebalixakeryvas'), ('longterm', '1'))) soup=yplib.soup() a = soup.find('a', attrs={'class':"profilelink"}, text='galdor') if not a: print 'Authorization failed' return False if LOAD_CACHES: #Cach.objects.all().delete() cntr_list = [] t = re.compile('\<td\>(\w\w\d+)\<\/td\>') for p in range(120): item_list = [] r = yplib.post2('http://www.geocaching.su/?pn=101', (('sort','1'), ('page', str(p)), ('in_page','100'), ('finded','1'), ('y','0'), ('x','0'), ('updown', '1'))) html = yplib.show() code_list = t.findall(html) for code in code_list: pid = code[2:] item_list.append({'id': pid, 'code': code}) if item_list == cntr_list: break else: cntr_list = item_list check_cach_list(item_list) switch_on_status_updated() log('gcsu_caches', 'OK') elapsed = time() - start print "Elapsed time -->", elapsed
def handle(self, *args, **options): for cache in Cach.objects.exclude(author__isnull=True): cache_stat, created = CachStat.objects.get_or_create( cach=cache, cach_pid=cache.pid, geocacher=cache.author) cache_stat.calculate_points() queries = [ """ insert into geocacher_search_stat (geocacher_id, geocacher_uid, country, region) select g.id, g.uid, c.name, gcs.name from geocacher g left join geocacher_search_stat gss on g.uid = gss.geocacher_uid left join geo_country c on g.country_iso3 = c.iso3 left join geo_country_subject gcs on c.iso = gcs.country_iso and g.admin_code = gcs.code where gss.geocacher_uid is null """, """ update geocacher_search_stat gss set points=( select ROUND(sum(IFNULL(cs.points, 0))) as points_sum from log_seek_cach lsc left join cach_stat cs on lsc.cach_pid = cs.cach_pid where lsc.cacher_uid = gss.geocacher_uid ), year_points=( select ROUND(sum(IFNULL(cs.points, 0))) as points_sum from log_seek_cach lsc left join cach_stat cs on lsc.cach_pid = cs.cach_pid where YEAR(lsc.found_date)=%s and lsc.cacher_uid = gss.geocacher_uid ) """ % date.today().year ] for sql in queries: patch_it(sql) log(UPDATE_TYPE.search_statistics, 'OK') return 'Geocachers search statistics is updated'
def main(): #if not switch_off_status_updated(): #return False start = time() sql_batches = ('set_cach_country_code.sql', 'set_cach_oblast_code.sql', 'set_country_iso_for_geocachers.sql', 'set_admin_code_to_geocacher.sql', 'set_geonames_to_cach.sql', 'set_cache_country_subject.sql', 'crimea.sql') for name in sql_batches: patch_it('sql/'+name) print name, ' processed' elapsed = time() - start print "Elapsed time -->", elapsed #switch_on_status_updated() log('gcsu_patch', 'OK')
def handle(self, *args, **options): sql_batches = ( 'set_cach_country_code.sql', 'set_cach_oblast_code.sql', ) for name in sql_batches: patch_it('sql/' + name) print(name, ' processed') #with requests.Session() as session: #post = session.post( #'https://geocaching.su', #data=LOGIN_DATA #) #r = session.get('https://geocaching.su') #if not logged(r.text): #print('Authorization failed') #else: #for uid in Geocacher.objects.filter( #country_iso3__isnull=True).values_list('uid', flat=True): #r = session.get( #'http://www.geocaching.su/site/popup/userstat.php', #params={'s': 2, 'uid': uid} #) #country = get_found_caches_countries(uid, r.text) #set_country_code(uid, country) #names = {''} #for uid in Geocacher.objects.filter( #admin_code__isnull=True).values_list('uid', flat=True): #r = session.get( #'http://www.geocaching.su/site/popup/userstat.php', #params={'s': 2, 'uid': uid} #) #oblast = get_found_caches_oblast(uid, r.text) #names.add(oblast) #set_oblast_code(uid, oblast) log(UPDATE_TYPE.geocacher_patch, 'OK') return 'Geocachers data are updated'
def handle(self, *args, **options): with requests.Session() as session: post = session.post( 'https://geocaching.su', data=LOGIN_DATA, ) r = session.get('https://geocaching.su') if not logged(r.text): print('Authorization failed') else: for uid in range(200000): r = session.get( 'http://www.geocaching.su/profile.php?uid=%d' % uid) geocacher = get_user_profile(uid, r.text) if geocacher: print(uid, geocacher.id, geocacher.nickname) log(UPDATE_TYPE.gcsu_geocachers, 'OK') return 'List of geocachers has updated'
def handle(self, *args, **options): sql_batches = ('set_country_iso_for_geocachers.sql', 'set_admin_code_to_geocacher.sql', 'crimea.sql') for name in sql_batches: patch_it('sql/' + name) print(name, ' processed') with requests.Session() as session: post = session.post('https://geocaching.su', data=LOGIN_DATA) r = session.get('https://geocaching.su') if not logged(r.text): print('Authorization failed') else: for uid in Geocacher.objects.filter( country_iso3__isnull=True).values_list('uid', flat=True): r = session.get( 'http://www.geocaching.su/site/popup/userstat.php', params={ 's': 2, 'uid': uid }) country = get_found_caches_countries(uid, r.text) set_country_code(uid, country) names = {''} for uid in Geocacher.objects.filter( admin_code__isnull=True).values_list('uid', flat=True): r = session.get( 'http://www.geocaching.su/site/popup/userstat.php', params={ 's': 2, 'uid': uid }) oblast = get_found_caches_oblast(uid, r.text) names.add(oblast) set_oblast_code(uid, oblast) log(UPDATE_TYPE.geocacher_patch, 'OK') return 'Geocachers data are updated'
def handle(self, *args, **options): with requests.Session() as session: for geocacher in Geocacher.objects.filter( country_iso3__isnull=True, admin_code__isnull=True): country = get_subdiv_data(geocacher.latitude, geocacher.longitude) print(country) if country and country.get('status') == 'ok': c = get_object_or_none(GeoCountry, iso=country.get('country_id')) if c is not None: geocacher.country_iso3 = c.iso3 geocacher.country = c.name geocacher.admin_code = country['sub_id'] geocacher.oblast = country['sub_name'] geocacher.save() elif country.get('status') == 'limit': break log(UPDATE_TYPE.set_geocachers_locations, 'OK') return 'Location of geocachers has updated'
def main(): start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In', 'Log_In'), ('email', '*****@*****.**'), ('passwd', 'zaebalixakeryvas'), ('longterm', '1'))) soup = yplib.soup() a = soup.find('a', attrs={'class': "profilelink"}, text='galdor') if not a: print 'Authorization failed' return False excluded_id = [118575, 111821, 109578, 96417] all_id = [] for k in range(10): r = yplib.post2('http://www.geocaching.su/?pn=108', (('sort', '2'), ('page', str(k)), ('in_page', '1000'), ('updown', '2'))) soup = yplib.soup() a_list = soup.findAll('a', {'class': "profilelink"}) t = re.compile('\?pid=(\d+)') for a in a_list[:-1]: if a.get('onclick'): user_id = t.findall(a['onclick'])[0] #login = a.text.encode('utf8') if not (user_id in all_id) and not (user_id in excluded_id): all_id.append(user_id) check_id_list(all_id) elapsed = time() - start print "Elapsed time -->", elapsed log('upd_gcsu_cachers', 'OK')
def main(): start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In', 'Log_In'), ('email', '*****@*****.**'), ('passwd', 'zaebalixakeryvas'), ('longterm', '1'))) soup = yplib.soup() a = soup.find('a', attrs={'class': "profilelink"}, text='galdor') if not a: print 'Authorization failed' return False #all_updated = False t = re.compile('\<td\>(\w\w\d+)\<\/td\>') for p in range(30): item_list = [] r = yplib.post2('http://www.geocaching.su/?pn=101', (('sort', '1'), ('page', str(p)), ('in_page', '1000'), ('finded', '1'), ('y', '0'), ('x', '0'), ('updown', '1'))) html = yplib.show() code_list = t.findall(html) for code in code_list: pid = code[2:] item_list.append({'id': pid, 'code': code}) print 'count %s' % len(item_list) check_cach_list(item_list) log('upd_gcsu_caches', 'OK') elapsed = time() - start print "Elapsed time -->", elapsed
def main(processed_pid): #if not switch_off_status_updated(): #return False LOAD_GEO_LOCATION = True start = time() if LOAD_GEO_LOCATION: #.filter(pid=5408) #for cach in Cach.objects.all().filter(pid__gt=processed_pid).order_by('pid')[:1990]: for cach in Cach.objects.all().extra(where=["country_code IS NULL OR admin_code IS NULL OR admin_code='777'"]).order_by('pid')[:1000]: lat = cach.latitude_degree lng = cach.longitude_degree if lat is not None and lng is not None: d = ((0,0), (0.01,0), (-0.01,0), (0,0.01), (0,-0.01)) cnt = 0 while cnt < 5: url = 'http://api.geonames.org/countrySubdivision?username=galdor&lat=%s&lng=%s&lang=en' % (lat+d[cnt][0], lng+d[cnt][1]) print print cach.pid, url yplib.get(url) try: soup = yplib.soup() except: url = 'http://api.geonames.org/countrySubdivision?username=galdor&lat=%s&lng=%s&lang=en' % (lat+d[cnt][0], lng+d[cnt][1]) yplib.get(url) try: soup = yplib.soup() except: soup = None if soup: item = soup.find('countrysubdivision') if item: break cnt += 1 if soup is None: print cach.pid, lat, lng, cach.loc_NS, cach.loc_NS_degree, cach.loc_NS_minute, cach.loc_EW, cach.loc_EW_degree, cach.loc_EW_minute continue item = soup.find('countrycode') if item and item.text: cach.country_code = item.text.encode('utf8') if soup.admincode1 and soup.admincode1.text: cach.admin_code = soup.admincode1.text item = soup.find('code', {'type':'FIPS10-4'}) if item: cach.code_fips10_4 = item.text item = soup.find('code', {'type':'ISO3166-2'}) if item: cach.code_iso3166_2 = item.text item = soup.find('countryname') if item: cach.country_name = item.text.encode('cp1251') if soup.adminname1: cach.oblast_name = soup.adminname1.text.encode('cp1251') #print cach.pid, cach.country_name, cach.country_code, cach.oblast_name #print soup #print #print cach.pid if cach.country_code and len(cach.country_code) == 2: cach.save() else: print cach.pid, lat, lng, cach.loc_NS, cach.loc_NS_degree, cach.loc_NS_minute, cach.loc_EW, cach.loc_EW_degree, cach.loc_EW_minute count_without_country = Cach.objects.filter(country_code__isnull=True).count() count_without_subject = Cach.objects.filter(admin_code__isnull=True).count() print '%s have no country' % count_without_country print '%s have no country subject' % count_without_subject sql = "UPDATE cach SET admin_code='777', oblast_name='undefined subject' WHERE country_code IS NOT NULL AND admin_code IS NULL" r = execute_query(sql) sql = """SELECT COUNT(*) FROM cach WHERE country_code IS NULL""" undefined_country_count = sql2val(sql) sql = """SELECT COUNT(*) FROM cach WHERE admin_code IS NULL OR admin_code = '777'""" undefined_subject_count = sql2val(sql) undefined_count = '%s/%s' % (undefined_country_count, undefined_subject_count) elapsed = time() - start print "Elapsed time -->", elapsed #switch_on_status_updated() log('gcsu_location', 'OK %s'%undefined_count)
def main(): start = time() yplib.setUp() yplib.set_debugging(False) geosite = Geosite.objects.get(code='OCDE') countries = GeoCountry.objects.all() countries = countries.values_list('iso', flat=True) sql = """ SELECT `value` FROM variables WHERE `name`='last_ocde_updated' """ lastdate = sql2val(sql); if not lastdate: lastdate = '20000101000000' statuses = [] types = [] oc_count = 0 gc_count = 0 nc_count = 0 k = 0 uc = 0 nc = 0 for country in countries: url = 'http://opencaching.de/xml/ocxml11.php?modifiedsince=%s&cache=1&country=%s' % \ (lastdate, country) response = urllib2.urlopen(url) xml = response.read() try: root = ET.XML(xml) except Exception as e: print 'PARSING ERROR', country, e continue # session id current_session = root[0] session_id = current_session.text # count records = root[1] caches_count = int(records.get("cache") or 0) if caches_count: page_count = int(round(caches_count * 1.0 / CACHES_PER_PAGE, 0)) + 1 for p in range(page_count): page_url = 'http://www.opencaching.de/xml/ocxml11.php?sessionid=%s&file=%s' % \ (session_id, p + 1) page_response = urllib2.urlopen(page_url).read() from StringIO import StringIO zipdata = StringIO() zipdata.write(page_response) try: zf = zipfile.ZipFile(zipdata) except: continue for name in zf.namelist(): uncompressed = zf.read(name) cache_root = ET.XML(uncompressed) latitude = None longitude = None status = None created_date_str = '' for cache in cache_root.getchildren(): k += 1 if cache.tag == 'cache': the_geothing = TheGeothing() the_location = TheLocation() for param in cache: if param.tag == 'id': the_geothing.pid = param.get('id') if param.tag == 'userid': the_geothing.author = param.text if param.tag == 'name': the_geothing.name = param.text if param.tag == 'longitude': longitude = param.text if param.tag == 'latitude': latitude = param.text if param.tag == 'type': cache_type = param.get('short') the_geothing.type_code = OCDE_TYPES.get(cache_type) type_ = (param.get('id'), param.get('short')) if not type_ in types: types.append(type_) if param.tag == 'status': status = int(param.get('id') or 0) status_ = (status, param.text) if not status_ in statuses: statuses.append(status_) if param.tag == 'waypoints': the_geothing.code = param.get('oc') if the_geothing.code: oc_count += 1 gccode = param.get('gccom') if gccode: gc_count += 1 nccode = param.get('nccom') if nccode: nc_count += 1 if param.tag == 'datecreated': created_date_str = param.text parts = strptime(created_date_str, '%Y-%m-%d %H:%M:%S') dt = datetime(parts[0], parts[1], parts[2], parts[3], parts[4], parts[5]) the_geothing.created_date = dt if latitude and longitude and status == 1: the_location.NS_degree = float(latitude) the_location.EW_degree = float(longitude) if the_geothing.code and the_geothing.type_code in GEOCACHING_ONMAP_TYPES: geothing = get_object_or_none(Geothing, pid=the_geothing.pid, geosite=geosite) if geothing is not None: uc += update_geothing(geothing, the_geothing, the_location) or 0 else: create_new_geothing(the_geothing, the_location, geosite) nc += 1 message = 'OK. updated %s, new %s' % (uc, nc) log('map_ocde_caches', message) print message sql = """ UPDATE `variables` SET `value`='%s' WHERE `name`='last_ocde_updated' """ % ocde_timestamp() execute_query(sql) elapsed = time() - start print "Elapsed time -->", elapsed
def handle(self, *args, **options): url = 'https://www.shukach.com/ru/karta?destination=karta' with requests.Session() as session: r = session.post(url, data={ 'name': 'gps-fun', 'pass': '******', 'form_id': 'user_login_block', }).text if not 'gps-fun' in r: print('Autorization failed') return sql = """ DELETE FROM _temp_geothing """ execute_query(sql) all_points_count = 0 updated_things = 0 updated_points = 0 new_count = 0 removed = [] geosite = Geosite.objects.get(code='SHUKACH') for k in range(100): ids = range(k * 1000, (k + 1) * 1000) ids_str = ','.join([str(id) for id in ids]) url = 'https://www.shukach.com/export_wpt' r = session.post(url, data={'wptnids': ids_str}).text wpt = r.split('\n') print(k, len(wpt)) if len(wpt) < 6: continue for point in wpt: # print(point) # print pid = code = None name = '' created_date = None author = type_code = '' NS_degree = EW_degree = None fields = point.split(',') if len(fields) > WPT_TITLE and fields[0].isdigit(): all_points_count += 1 p = re.compile('(\D+)(\d+)') code = fields[WPT_CODE] dgs = p.findall(code) if dgs: type_code = dgs[0][0] pid = int(dgs[0][1]) if type_code in GEOCACHING_ONMAP_TYPES: NS_degree = float(fields[WPT_LAT]) EW_degree = float(fields[WPT_LON]) p = re.compile(r'(.+)от(.+)') dgs = p.findall(fields[WPT_TITLE]) if dgs: title = dgs[0] name = title[0].strip() author = title[1].strip() else: name = fields[WPT_TITLE] d = float(fields[WPT_DATE]) created_date = Dephi_date_to_python_date(d) date_str = created_date.strftime( '%Y-%m-%d %H:%M') ns_str = '{0:.9}'.format(NS_degree) ew_str = '{0:.9}'.format(EW_degree) sql = """ INSERT INTO _temp_geothing (pid, code, name, created_date, author, type_code, NS_degree, EW_degree) VALUES ({},'{}','{}','{}', '{}', '{}', {}, {}) """.format(pid, code, name.replace("'", "\\'"), date_str, author, type_code, ns_str, ew_str) execute_query(sql) sql = "SELECT id FROM geosite WHERE code='SHUKACH'" shukach_id = sql2val(sql) # update existent geothings sql = """ UPDATE geothing gt LEFT JOIN _temp_geothing as t ON gt.pid=t.pid SET gt.created_date=t.created_date, gt.name=t.name, gt.author=t.author, gt.type_code=t.type_code WHERE gt.geosite_id={} AND t.code IS NOT NULL AND (gt.name != t.name OR gt.author != t.author OR gt.type_code != t.type_code) """.format(shukach_id) #print sql updated_things = exec_sql(sql) sql = """ UPDATE location as l LEFT JOIN geothing as gt ON l.id=gt.location_id LEFT JOIN _temp_geothing as t ON gt.pid=t.pid SET l.NS_degree=t.NS_degree, l.EW_degree=t.EW_degree WHERE gt.geosite_id={} AND t.code IS NOT NULL AND ((ABS(l.NS_degree - t.NS_degree) > 0.00001) OR (ABS(l.EW_degree - t.EW_degree) > 0.00001)) """.format(shukach_id) updated_points = exec_sql(sql) # list of id of removed geothings sql = """ SELECT gt.id FROM geothing gt LEFT JOIN _temp_geothing as t ON gt.pid=t.pid WHERE gt.geosite_id={} AND t.code IS NULL """.format(shukach_id) removed = sql2table(sql) new_count = 0 # insert new geothings sql = """ SELECT t.pid, t.code, t.name, t.created_date, t.author, t.country_code, t.type_code, t.NS_degree, t.EW_degree FROM _temp_geothing as t LEFT JOIN geothing gt ON gt.pid=t.pid AND gt.geosite_id={} WHERE gt.pid IS NULL """.format(shukach_id) cursor = get_cursor(sql) while True: row = cursor.fetchone() if row is None: break else: sql = """ INSERT INTO location (NS_degree, EW_degree) VALUES ({}, {}) """.format(row[7], row[8]) execute_query(sql) sql = "SELECT LAST_INSERT_ID()" location_id = sql2val(sql) sql = """ INSERT INTO geothing (geosite_id, pid, code, name, created_date, author, type_code, location_id, admin_code) SELECT {}, t.pid, t.code, t.name, t.created_date, t.author, t.type_code, {}, '777' FROM _temp_geothing as t WHERE t.pid={} """.format(shukach_id, location_id, row[0]) execute_query(sql) new_count += 1 message = 'OK. %s waypoints, updated %s waypoints, updated %s locations, new %s, removed %s' % ( all_points_count, updated_things or 0, updated_points or 0, new_count, len(removed)) print(message) log('map_shukach', message) return 'List of caches from geocaching.su has updated'
def handle(self, *args, **options): url = 'https://cdn.geokrety.org/rzeczy/xml/export2-full.xml.bz2' filename = '/tmp/export2-full.xml.bz2' urllib.request.urlretrieve(url, filename) zipfile = bz2.BZ2File(filename) data = zipfile.read() newfilepath = filename[:-4] open(newfilepath, 'wb').write(data) fh = open(newfilepath, 'r') xml = fh.read() fh.close() soup = BeautifulSoup(xml, 'lxml') all_krety = soup.find_all('geokret') for kret in all_krety: gkid = int(kret.get('id') or 0) if not gkid: continue name = kret.get('name') distance = int(kret.get('dist') or 0) latitude = float(kret.get('lat') or 0) longitude = float(kret.get('lon') or 0) waypoint = kret.get('waypoint') state = int(kret.get('state')) if kret.get('state') else None kret_type = kret.get('type') if kret.get('type') else None geokret, created = GeoKret.objects.get_or_create(gkid=gkid) if geokret: if name: geokret.name = name geokret.distance = distance if geokret.location is None: geokret.location = Location.objects.create( NS_degree=latitude, EW_degree=longitude) else: geokret.location.NS_degree = latitude geokret.location.EW_degree = longitude geokret.location.save() geokret.waypoint = waypoint geokret.state = state geokret.type_code = kret_type geokret.save() for kret in GeoKret.objects.filter(name__isnull=True): with requests.Session() as session: r = session.get( 'https://geokrety.org/konkret.php', params={'id': kret.gkid} ) soup = BeautifulSoup(r.text, 'lxml') for cell in soup.find_all('td', attrs={'class': 'heading1'}): if cell.text and cell.text.strip().startswith('GeoKret'): strong = cell.find('strong') if strong: kret.name = strong.text kret.save() print(kret.name) log(UPDATE_TYPE.geokrety_imported, 'OK') return 'Geokrety are imported'
def main(): #if not switch_off_status_updated(): #return False LOAD_CACHES = True LOAD_GEO_LOCATION = False start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In', 'Log_In'), ('email', '*****@*****.**'), ('passwd', 'zaebalixakeryvas'), ('longterm', '1'))) soup = yplib.soup() a = soup.find('a', attrs={'class': "profilelink"}, text='galdor') if not a: print 'Authorization failed' return False print 'OK' if LOAD_CACHES: r = yplib.get('http://www.geocaching.su/site/popup/selex.php') soup = yplib.soup() #print soup #html = yplib.show() chbox_list = soup.findAll('input', type='checkbox') regions = [] #print chbox_list print for chbox in chbox_list: #print chbox.get('value') v = chbox.get('value') if v and chbox.get('name', '') == 'point[]': regions.append(v) print print regions data = [ ('translit', '0'), ('fmt', 'wpt'), ('code_to_name', '1'), ('finded', '2'), ] for r in regions: data.append(('point[]', r)) print print data print r = yplib.post2('http://www.geocaching.su/site/popup/export.php', data) soup = yplib.soup() txt = soup.text print txt return Cach.objects.all().delete() cntr_list = [] t = re.compile('\<td\>(\w\w\d+)\<\/td\>') for p in range(100): item_list = [] r = yplib.post2('http://www.geocaching.su/?pn=101', (('sort', '1'), ('page', str(p)), ('in_page', '100'), ('finded', '1'), ('y', '0'), ('x', '0'), ('updown', '1'))) html = yplib.show() code_list = t.findall(html) for code in code_list: pid = code[2:] item_list.append({'id': pid, 'code': code}) if item_list == cntr_list: break else: cntr_list = item_list check_cach_list(item_list) #check_cach_list([{'id': 2746, 'code': 'EX2746'}]) #break if LOAD_GEO_LOCATION: #.filter(pid=5408) for cach in Cach.objects.all(): lat = cach.latitude_degree lng = cach.longitude_degree if lat is not None and lng is not None: url = 'http://ws.geonames.org/countrySubdivision?lat=%s&lng=%s&lang=ru' % ( lat, lng) print print cach.pid, url yplib.get(url) try: soup = yplib.soup() except: url = 'http://ws.geonames.org/countrySubdivision?lat=%s&lng=%s&lang=en' % ( lat, lng) yplib.get(url) soup = yplib.soup() item = soup.find('countrycode') if item: cach.country_code = item.text.encode('utf8') if soup.admincode1: cach.admin_code = soup.admincode1.text item = soup.find('code', {'type': 'FIPS10-4'}) if item: cach.code_fips10_4 = item.text item = soup.find('code', {'type': 'ISO3166-2'}) if item: cach.code_iso3166_2 = item.text item = soup.find('countryname') if item: cach.country_name = item.text.encode('cp1251') if soup.adminname1: cach.oblast_name = soup.adminname1.text.encode('cp1251') print cach.pid, cach.country_name, cach.oblast_name #print soup #print #print cach.pid cach.save() else: print cach.pid, lat, lng, cach.loc_NS, cach.loc_NS_degree, cach.loc_NS_minute, cach.loc_EW, cach.loc_EW_degree, cach.loc_EW_minute switch_on_status_updated() log('gcsu_caches', 'OK') elapsed = time() - start print "Elapsed time -->", elapsed
def main(): if not switch_off_status_updated(): return False LOAD_GEOCACHERS = False LOAD_ABSENT_GEOCACHERS = False start = time() cursor = connection.cursor() cursor.execute('select * from geocacher') yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In','Log_In'), ('email', '*****@*****.**'), ('passwd','zaebalixakeryvas'), ('longterm', '1'))) soup=yplib.soup() a = soup.find('a', attrs={'class':"profilelink"}, text='galdor') if not a: print('Authorization failed') return False if LOAD_GEOCACHERS: Geocacher.objects.all().delete() cntr_list = [] all_id = [] for p in range(2500): print('page', p + 1) #if p < 0: #continue user_list = [] r = yplib.post2('http://www.geocaching.su/?pn=108', (('sort','1'), ('page', str(p)), ('in_page','100'), ('updown', '1'))) soup=yplib.soup() a_list = soup.findAll('a', {'class':"profilelink"}) t = re.compile('\?pid=(\d+)') for a in a_list[:-1]: if a.get('onclick'): #print p.findall(a['onclick']), a.text.encode('utf8') user_id = t.findall(a['onclick'])[0] login = a.text.encode('utf8') if not (user_id in all_id): user_list.append({'id': user_id, 'login': login}) all_id.append(user_id) #user_list = user_list[:-1] if user_list == cntr_list: break else: cntr_list = user_list #print len(user_list) #return check_id_list(user_list) #break #check_id_list([{'id': 15957, 'login': u'Кривич'}]) #break if LOAD_ABSENT_GEOCACHERS: pid_list = (469, 406, 1224, 4400, 11910, 4456, 13439, 7707, 8887, 3156, 8094) user_list = [{'id': pid, 'login': u''} for pid in pid_list] check_id_list(user_list) elapsed = time() - start print("Elapsed time -->", elapsed) switch_on_status_updated() log('gcsu_geocachers', 'OK')
def main(): LOAD_CACHES = True start = time() yplib.setUp() yplib.set_debugging(False) url = 'http://www.geocaching.su/rss/geokrety/api.php?interval=1y&ctypes=1,2,3,7&changed=1' f = urllib2.urlopen(url) xml = f.read() xml = xml try: sxml = ET.XML(xml) except Exception as e: print type(e) print e return cnt_new = 0 cnt_upd = 0 caches = sxml.getchildren() geosite = Geosite.objects.get(code='GC_SU') for cache in caches: if cache.tag == 'cache': the_geothing = TheGeothing() the_location = TheLocation() for tag_ in cache.getchildren(): if tag_.tag == 'code': the_geothing.code = tag_.text if tag_.tag == 'autor': the_geothing.author = tag_.text if tag_.tag == 'name': the_geothing.name = tag_.text if tag_.tag == 'position': lat_degree = float(tag_.get('lat')) the_location.NS_degree = lat_degree lon_degree = float(tag_.get('lon')) the_location.EW_degree = lon_degree if tag_.tag == 'cdate': date_str = tag_.text date_ = date_str.split('-') if len(date_) == 3: the_geothing.created_date = datetime(int(date_[0]), int(date_[1]), int(date_[2])) if the_geothing.code: p = re.compile('(\D+)(\d+)') dgs = p.findall(the_geothing.code) if dgs: code_data = dgs[0] the_geothing.pid = int(code_data[1]) the_geothing.type_code = code_data[0] if the_geothing.type_code in GEOCACHING_ONMAP_TYPES: geothing = get_object_or_none(Geothing, pid=the_geothing.pid, geosite=geosite) if geothing is not None: cnt_upd += update_geothing(geothing, the_geothing, the_location) or 0 else: create_new_geothing(the_geothing, the_location, geosite) cnt_new += 1 message = 'OK %s/%s'%(cnt_new, cnt_upd) log('map_gcsu_caches', message) print message elapsed = time() - start print "Elapsed time -->", elapsed
def process(rectangle, geosite, params): #print 'process' #print rectangle, geosite, params print geosite bbox = [str(x) for x in rectangle] url = params.get('url_pattern') % (params['MY_CONSUMER_KEY'], '|'.join(bbox), params['FIELDS']) #print geosite, rectangle #print 'URL' #print url try: response = urllib2.urlopen(url) print 'GOT' except Exception as e: print 'exception', e return data = response.read() caches_data = json.loads(data) caches = caches_data.get('results') #print 'count', len(caches) more_data = caches_data.get('more') if more_data: BlockNeedBeDivided.objects.create(geosite=geosite, bb='|'.join(bbox), added=datetime.now()) if not len(caches): return k = 0 uc = 0 nc = 0 #print caches for code, cache in caches.iteritems(): #print cache k += 1 the_geothing = TheGeothing() the_location = TheLocation() locations = cache.get('location').split('|') lat_degree = float(locations[0]) the_location.NS_degree = lat_degree lon_degree = float(locations[1]) the_location.EW_degree = lon_degree the_geothing.code = cache.get('code') the_geothing.name = cache.get('name') if cache.get('status') != 'Available': continue the_geothing.type_code = OCPL_TYPES.get(cache.get('type')) cache_url = cache.get('url') if not cache_url: continue p = re.compile(params['code_re']) dgs = p.findall(cache_url) if not dgs: continue the_geothing.pid = int(dgs[0], 16) if cache.get('owner'): owner_name = cache.get('owner').get('username') the_geothing.author = owner_name date_created = cache.get('date_created') if date_created: date_created = date_created[:10] parts = date_created.split('-') if parts and len(parts) == 3: dt = datetime(int(parts[0]), int(parts[1]), int(parts[2])) the_geothing.created_date = dt if the_geothing.type_code in GEOCACHING_ONMAP_TYPES: geothing = get_object_or_none(Geothing, pid=the_geothing.pid, geosite=geosite) #print #print geothing if geothing is not None: #print 'update' uc += update_geothing(geothing, the_geothing, the_location) or 0 else: #print 'new' create_new_geothing(the_geothing, the_location, geosite) nc += 1 message = 'OK. updated %s, new %s' % (uc, nc) log(params.get('log_key'), message)