Пример #1
0
def main():
    LOAD_CACHES = True
    
    start = time() 
    
    yplib.setUp()
    yplib.set_debugging(False)
    
    url = 'http://www.geocaching.su/rss/geokrety/api.php?interval=1y&ctypes=1,2,3,7&changed=1'
    
    f = urllib2.urlopen(url)
    xml = f.read()
    xml = xml
    
    try:
        sxml = ET.XML(xml)
    except Exception as e:
        print type(e)
        print e
        return
    
    cnt_new = 0
    cnt_upd = 0
    caches = sxml.getchildren()     
    
    geosite = Geosite.objects.get(code='GC_SU')
    
    for cache in caches:
        if cache.tag == 'cache':
            the_geothing = TheGeothing()
            the_location = TheLocation()
            for tag_ in cache.getchildren():
                if tag_.tag == 'code':
                    the_geothing.code = tag_.text
                if tag_.tag == 'autor':
                    the_geothing.author = tag_.text
                if tag_.tag == 'name':
                    the_geothing.name = tag_.text
                if tag_.tag == 'position':
                    lat_degree = float(tag_.get('lat'))
                    the_location.NS_degree = lat_degree
                    lon_degree = float(tag_.get('lon'))
                    the_location.EW_degree = lon_degree                    
                if tag_.tag == 'cdate':
                    date_str = tag_.text
                    date_ = date_str.split('-')
                    if len(date_) == 3:
                        the_geothing.created_date = datetime(int(date_[0]), int(date_[1]), int(date_[2]))
            if  the_geothing.code:
                p = re.compile('(\D+)(\d+)') 
                dgs = p.findall(the_geothing.code)
                if dgs:
                    code_data = dgs[0]
                    the_geothing.pid = int(code_data[1])
                    the_geothing.type_code = code_data[0]

            if the_geothing.type_code in GEOCACHING_ONMAP_TYPES:
                geothing = get_object_or_none(Geothing, pid=the_geothing.pid, geosite=geosite)
                if geothing is not None:
                    cnt_upd += update_geothing(geothing, the_geothing, the_location) or 0

                else:
                    create_new_geothing(the_geothing, the_location, geosite)
                    cnt_new += 1

    message = 'OK %s/%s'%(cnt_new, cnt_upd)
    log('map_gcsu_caches', message)
    print message
    elapsed = time() - start
    print "Elapsed time -->", elapsed
Пример #2
0
def main():
    if not switch_off_status_updated():
        return False

    LOAD_CREATED_CACHE_LOGS = False
    LOAD_SEEK_CACHE_LOGS = False
    LOAD_RECOMMEND_CACHE_LOGS = False
    LOAD_PHOTOALBUM_LOGS = False

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)


    r = yplib.post2('http://www.geocaching.su/?pn=108',
            (('Log_In', 'Log_In'), ('email', '*****@*****.**'),
             ('passwd', 'zaebalixakeryvas'), ('longterm', '1')))

    soup = yplib.soup()

    a = soup.find('a', attrs={'class': "profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False
    print
    print 'BEGIN'
    if LOAD_CREATED_CACHE_LOGS:
        LogCreateCach.objects.all().delete()
        print 'delete create logs'
        cachers = Geocacher.objects.all()
        print cachers.count()
        t = re.compile('\?pn\=101\&cid=(\d+)')
        t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)')
        for cacher in cachers:
            if cacher.uid:
                print cacher.pid, cacher.uid
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=1&uid=%s' % cacher.uid
                try:
                    yplib.get(url)
                except BrowserStateError:
                    continue
                soup = yplib.soup()
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    #print tbl
                    rows = tbl.findAll('tr')
                    #print len(rows)
                    for row in rows:
                        cach_pid = created_date = None
                        coauthor = False
                        cell = row.find('td')
                        if cell:
                            #print cell
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            print cacher.pid, cach_pid, txt.encode('utf8')
                            if u'(соавтор)' in txt:
                                coauthor = True
                            found = t1.findall(txt)
                            if found:
                                created_date = found[0]
                                created_date = date_or_none(created_date)
                            if cach_pid:
                                the_log = LogCreateCach(
                                    author_pid=cacher.pid,
                                    cach_pid=cach_pid)
                                the_log.created_date = created_date
                                the_log.coauthor = coauthor
                                the_log.save()
                                print 'saved'

    if LOAD_SEEK_CACHE_LOGS:
        LogSeekCach.objects.all().delete()
        cachers = Geocacher.objects.all()
        t = re.compile('\?pn\=101\&cid=(\d+)')
        t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)')
        t2 = re.compile(u'найден\s+(\d\d\.\d\d\.\d\d\d\d)')
        t3 = re.compile(u'оценен\s+на\s+(\d)')

        fh = open('cant_open_userstat.txt', 'w')
        for cacher in cachers:
            if cacher.uid:
                print cacher.pid, cacher.uid
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=2&uid=%s' % cacher.uid

                loaded = False
                cnter = 0

                while not loaded and cnter < 100:
                    try:
                        yplib.get(url)
                        soup = yplib.soup()
                        loaded = True
                    except BrowserStateError:
                        cnter += 1
                if not loaded:
                    print 'cannot go to %s' % url
                    fh.write(url)

                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            found = t3.findall(txt)
                            if found:
                                g = found[0]
                                grade = int_or_none(g)
                            print cacher.pid, cach_pid, txt.encode('utf8')
                            found = t2.findall(txt)
                            if found:
                                found_date = found[0]
                                found_date = date_or_none(found_date)
                            if cach_pid:
                                the_log = LogSeekCach(
                                    cacher_pid=cacher.pid,
                                    cach_pid=cach_pid)
                                the_log.found_date = found_date
                                the_log.grade = grade
                                the_log.save()
                                print 'saved'
        fh.close()

    if LOAD_RECOMMEND_CACHE_LOGS:
        LogRecommendCach.objects.all().delete()
        cachers = Geocacher.objects.all()
        t = re.compile('\?pn\=101\&cid=(\d+)')

        for cacher in cachers:
            if cacher.uid:
                print cacher.pid, cacher.uid
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=3&uid=%s' % cacher.uid
                yplib.get(url)
                soup = yplib.soup()
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            print cacher.pid, cach_pid, txt.encode('utf8')
                            if cach_pid:
                                the_log = LogRecommendCach(
                                    cacher_pid=cacher.pid,
                                    cach_pid=cach_pid)
                                the_log.save()
                                print 'saved'

    if LOAD_PHOTOALBUM_LOGS:
        LogPhotoAlbum.objects.all().delete()
        cachers = Geocacher.objects.all()
        t = re.compile('showmemphotos\.php\?cid=(\d+)')

        for cacher in cachers:
            if cacher.uid:
                print cacher.pid, cacher.uid
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=4&uid=%s' % cacher.uid
                yplib.get(url)
                soup = yplib.soup()
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            print cacher.pid, cach_pid, txt.encode('utf8')
                            if cach_pid:
                                the_log = LogPhotoAlbum(
                                    cacher_pid=cacher.pid, cach_pid=cach_pid)
                                the_log.save()
                                print 'saved'

    elapsed = time() - start
    print "Elapsed time -->", elapsed
    switch_on_status_updated()
    log('gcsu_logs', 'OK')
Пример #3
0
def main():
    #if not switch_off_status_updated():
    #return False

    LOAD_CACHES = True
    LOAD_GEO_LOCATION = False

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    r = yplib.post2('http://www.geocaching.su/?pn=108',
                    (('Log_In', 'Log_In'), ('email', '*****@*****.**'),
                     ('passwd', 'zaebalixakeryvas'), ('longterm', '1')))

    soup = yplib.soup()

    a = soup.find('a', attrs={'class': "profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False
    print 'OK'

    if LOAD_CACHES:

        r = yplib.get('http://www.geocaching.su/site/popup/selex.php')
        soup = yplib.soup()
        #print soup
        #html = yplib.show()

        chbox_list = soup.findAll('input', type='checkbox')
        regions = []
        #print chbox_list
        print
        for chbox in chbox_list:
            #print chbox.get('value')
            v = chbox.get('value')
            if v and chbox.get('name', '') == 'point[]':
                regions.append(v)
        print
        print regions
        data = [
            ('translit', '0'),
            ('fmt', 'wpt'),
            ('code_to_name', '1'),
            ('finded', '2'),
        ]
        for r in regions:
            data.append(('point[]', r))
        print
        print data
        print
        r = yplib.post2('http://www.geocaching.su/site/popup/export.php', data)
        soup = yplib.soup()
        txt = soup.text
        print txt
        return

        Cach.objects.all().delete()
        cntr_list = []
        t = re.compile('\<td\>(\w\w\d+)\<\/td\>')
        for p in range(100):
            item_list = []
            r = yplib.post2('http://www.geocaching.su/?pn=101',
                            (('sort', '1'), ('page', str(p)),
                             ('in_page', '100'), ('finded', '1'), ('y', '0'),
                             ('x', '0'), ('updown', '1')))
            html = yplib.show()
            code_list = t.findall(html)
            for code in code_list:
                pid = code[2:]
                item_list.append({'id': pid, 'code': code})

            if item_list == cntr_list:
                break
            else:
                cntr_list = item_list
                check_cach_list(item_list)
                #check_cach_list([{'id': 2746, 'code': 'EX2746'}])
            #break
    if LOAD_GEO_LOCATION:
        #.filter(pid=5408)
        for cach in Cach.objects.all():
            lat = cach.latitude_degree
            lng = cach.longitude_degree

            if lat is not None and lng is not None:
                url = 'http://ws.geonames.org/countrySubdivision?lat=%s&lng=%s&lang=ru' % (
                    lat, lng)
                print
                print cach.pid, url
                yplib.get(url)
                try:
                    soup = yplib.soup()
                except:
                    url = 'http://ws.geonames.org/countrySubdivision?lat=%s&lng=%s&lang=en' % (
                        lat, lng)
                    yplib.get(url)
                    soup = yplib.soup()
                item = soup.find('countrycode')
                if item:
                    cach.country_code = item.text.encode('utf8')

                if soup.admincode1:
                    cach.admin_code = soup.admincode1.text
                item = soup.find('code', {'type': 'FIPS10-4'})
                if item:
                    cach.code_fips10_4 = item.text
                item = soup.find('code', {'type': 'ISO3166-2'})
                if item:
                    cach.code_iso3166_2 = item.text
                item = soup.find('countryname')
                if item:
                    cach.country_name = item.text.encode('cp1251')
                if soup.adminname1:
                    cach.oblast_name = soup.adminname1.text.encode('cp1251')
                print cach.pid, cach.country_name, cach.oblast_name
                #print soup
                #print
                #print cach.pid
                cach.save()
            else:
                print cach.pid, lat, lng, cach.loc_NS, cach.loc_NS_degree, cach.loc_NS_minute, cach.loc_EW, cach.loc_EW_degree, cach.loc_EW_minute

            switch_on_status_updated()
            log('gcsu_caches', 'OK')

    elapsed = time() - start
    print "Elapsed time -->", elapsed
Пример #4
0
def main():
    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    geosite = Geosite.objects.get(code='OCCZ')

    statuses = []
    types = []
    oc_count = 0

    k = 0
    uc = 0
    nc = 0

    url = 'http://www.opencaching.cz/search.php?searchto=searchbydistance&showresult=1&output=XML&sort=byname&latNS=N&lat_h=50&lat_min=5.123&lonEW=E&lon_h=14&lon_min=20.123&distance=1500&unit=km&count=500&startat=0'

    response = urllib2.urlopen(url).read()

    cache_root = ET.XML(response)

    docinfo = cache_root.getchildren()[0]
    result_count = 0
    for tag in docinfo.getchildren():
        if tag.tag == 'results':
            result_count = int(tag.text or 0)
    if result_count:
        for cache in cache_root.getchildren()[1:]:
            latitude = None
            longitude = None
            status = None
            created_date_str = ''
            k += 1
            if cache.tag == 'cache':
                the_geothing = TheGeothing()
                the_location = TheLocation()

                for param in cache:
                    if param.tag == 'id':
                        the_geothing.pid = param.text
                    if param.tag == 'owner':
                        the_geothing.author = param.text
                    if param.tag == 'name':
                        the_geothing.name = param.text
                    if param.tag == 'lon':
                        longitude = param.text
                    if param.tag == 'lat':
                        latitude = param.text
                    if param.tag == 'type':
                        cache_type = param.text
                        the_geothing.type_code = OCCZ_TYPES.get(cache_type)
                        if not cache_type in types:
                            types.append(cache_type)
                    if param.tag == 'status':
                        status = param.text
                        if not status in statuses:
                            statuses.append(status)
                    if param.tag == 'waypoint':
                        the_geothing.code = param.text
                        if the_geothing.code:
                            oc_count += 1
                    if param.tag == 'hidden':
                        created_date_str = param.text
                        parts = strptime(created_date_str, '%d.%m.%Y')
                        dt = datetime(parts[0], parts[1], parts[2], parts[3],
                                      parts[4], parts[5])
                        the_geothing.created_date = dt

                if latitude and longitude:

                    the_location.NS_degree = get_degree(latitude)
                    the_location.EW_degree = get_degree(longitude)
                    if the_geothing.code and the_geothing.pid and \
                       the_geothing.type_code in GEOCACHING_ONMAP_TYPES:
                        geothing = get_object_or_none(Geothing,
                                                      pid=the_geothing.pid,
                                                      geosite=geosite)
                        if geothing is not None:
                            uc += update_geothing(geothing, the_geothing,
                                                  the_location) or 0
                        else:
                            create_new_geothing(the_geothing, the_location,
                                                geosite)
                            nc += 1

    message = 'OK. updated %s, new %s' % (uc, nc)
    log('map_occz_caches', message)
    print message

    print
    print types
    print statuses

    elapsed = time() - start
    print "Elapsed time -->", elapsed
Пример #5
0
def main():
    LOAD_CACHES = True

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    r = yplib.post2('http://www.geocaching.su/?pn=108',
                    (('Log_In', 'Log_In'), ('email', '*****@*****.**'),
                     ('passwd', 'zaebalixakeryvas'), ('longterm', '1')))

    soup = yplib.soup()

    a = soup.find('a', attrs={'class': "profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False

    r = yplib.get('http://www.geocaching.su/site/popup/selex.php')
    soup = yplib.soup()

    chbox_list = soup.findAll('input', type='checkbox')
    regions = []

    for chbox in chbox_list:
        v = chbox.get('value')
        if v and chbox.get('name', '') == 'point[]':
            regions.append(v)

    data = [
        ('translit', '0'),
        ('fmt', 'wpt'),
        ('code_to_name', '1'),
        ('finded', '2'),
    ]
    for r in regions:
        data.append(('point[]', r))

    r = yplib.post2('http://www.geocaching.su/site/popup/export.php', data)
    soup = yplib.soup()
    wpt = soup.text.split('\n')

    WPT_CODE = 1
    WPT_LAT = 2
    WPT_LON = 3
    WPT_TITLE = 10
    WPT_DATE = 4

    geosite = Geosite.objects.get(code='GC_SU')

    print len(wpt), 'points'
    k = 0
    for point in wpt:
        k += 1
        fields = point.split(',')
        if fields[0].isdigit():
            the_geothing = TheGeothing()
            the_location = TheLocation()

            lat_degree = float(fields[WPT_LAT])
            the_location.NS_degree = lat_degree
            #the_location.NS_minute = (abs(lat_degree) - abs(the_location.NS_degree)) * 60
            lon_degree = float(fields[WPT_LON])
            the_location.EW_degree = lon_degree
            #the_location.EW_minute = (abs(lon_degree) - abs(the_location.EW_degree)) * 60

            p = re.compile('(\D+)(\d+)')
            dgs = p.findall(fields[WPT_CODE])
            if dgs:
                code_data = dgs[0]
                the_geothing.code = fields[WPT_CODE]
                the_geothing.pid = int(code_data[1])
                the_geothing.type_code = code_data[0]

            p = re.compile(u'(.+)от(.+)')
            dgs = p.findall(fields[WPT_TITLE])
            if dgs:
                title = dgs[0]
                the_geothing.name = title[0]
                the_geothing.author = title[1]

            d = float(fields[WPT_DATE])

            the_geothing.created_date = Dephi_date_to_python_date(d)

            if the_geothing.type_code in GEOCACHING_ONMAP_TYPES:
                geothing = get_object_or_none(Geothing,
                                              pid=the_geothing.pid,
                                              geosite=geosite)
                if geothing is not None:
                    update_geothing(geothing, the_geothing, the_location)
                else:
                    create_new_geothing(the_geothing, the_location, geosite)

    log('map_gcsu_caches', 'OK')
    elapsed = time() - start
    print "Elapsed time -->", elapsed
Пример #6
0
def main():
    LOAD_CACHES = True

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    # log in
    r = yplib.post2('http://opencaching.pl/login.php',
                    (('LogMeIn', 'zaloguj'), ('email', 'kurianin'),
                     ('password', 'gjhjkjy'), ('action', 'login'),
                     ('target', 'index.php')))

    soup = yplib.soup()

    a = soup.find('a', text='kurianin')
    if not a:
        print 'Authorization failed'
        return False
    print 'OK'

    ## search page
    #r = yplib.get('http://opencaching.pl/search.php')
    #soup = yplib.soup()

    # get wpt file
    r = yplib.get(
        'http://opencaching.pl/search.php?searchto=searchbyname&showresult=1&expert=0&output=HTML&sort=bycreated&f_inactive=1&f_ignored=1&f_userfound=1&f_userowner=1&f_watched=0&f_geokret=0&country=PL&region=&cachetype=1111111110&cache_attribs=&cache_attribs_not=&cachesize_1=1&cachesize_2=1&cachesize_3=1&cachesize_4=1&cachesize_5=1&cachesize_6=1&cachesize_7=1&cachevote_1=-3&cachevote_2=3.000&cachenovote=1&cachedifficulty_1=1&cachedifficulty_2=5&cacheterrain_1=1&cacheterrain_2=5&cacherating=0&cachename=%25&cachename='
    )
    soup = yplib.soup(cp='utf8')
    link_to_wpt = ''

    #the_div = soup.find('div', {'class':"content2-pagetitle"})

    wpt_link = re.compile('ocpl\d+\.wpt\?.+count\=max.*')
    a_list = soup.findAll('a', {'class': "links", 'title': "Oziexplorer .wpt"})
    if a_list:
        for a in a_list:
            if a.get('href') and wpt_link.match(a.get('href')):
                link_to_wpt = a.get('href')
                break
    print link_to_wpt

    if link_to_wpt:
        r = yplib.get(link_to_wpt)
        soup = yplib.soup(cp='utf8')
        wpt = soup.text.split('\n')
    else:
        print 'oblom'
        return

    WPT_CODE = 10
    WPT_LAT = 2
    WPT_LON = 3
    WPT_TITLE = 1
    WPT_DATE = 4
    MY_CONSUMER_KEY = 'fky3LF9xvWz9y7Gs3tZ6'
    FIELDS = 'code|name|location|type|status|url|owner|date_created'
    geocach_api_request = 'http://opencaching.pl/okapi/services/caches/geocache?cache_code=%s&consumer_key=%s&fields=%s'

    geosite = Geosite.objects.get(code='OCPL')
    print geosite
    print len(wpt), 'points'
    k = 0
    uc = 0
    nc = 0
    for point in wpt:
        k += 1
        fields = point.split(',')
        if fields[0] == '-1':
            the_geothing = TheGeothing()
            the_geothing.pid = 1
            the_location = TheLocation()

            lat_degree = float(fields[WPT_LAT])
            the_location.NS_degree = lat_degree
            #the_location.NS_minute = (abs(lat_degree) - abs(the_location.NS_degree)) * 60
            lon_degree = float(fields[WPT_LON])
            the_location.EW_degree = lon_degree
            #the_location.EW_minute = (abs(lon_degree) - abs(the_location.EW_degree)) * 60

            code_str = fields[WPT_CODE]
            parts = code_str.split('/')
            if len(parts) == 4:
                cache_code = parts[0]
                the_geothing.code = cache_code
                the_geothing.name = fields[WPT_TITLE]
                geothing_items = Geothing.objects.filter(
                    code=the_geothing.code, geosite=geosite)
                if geothing_items.count() > 0:
                    geothing = geothing_items[0]
                    if the_geothing.name == geothing.name and not location_was_changed(
                            geothing.location, the_location):
                        continue

                url = geocach_api_request % (cache_code, MY_CONSUMER_KEY,
                                             FIELDS)
                try:
                    response = urllib2.urlopen(url)
                    json_str = response.read()
                    cache_data = json.loads(json_str)
                    if cache_data.get('status') != 'Available':
                        continue
                    #print cache_data.get('type')
                    the_geothing.type_code = OCPL_TYPES.get(
                        cache_data.get('type'))
                    #print the_geothing.type_code
                    cache_url = cache_data.get('url')
                    if not cache_url:
                        continue
                    p = re.compile(u'OP([\dA-F]+)$')
                    dgs = p.findall(cache_url)
                    the_geothing.pid = int(dgs[0], 16)
                    owner_name = ''
                    if cache_data.get('owner'):
                        owner_name = cache_data.get('owner').get('username')
                    the_geothing.author = owner_name

                    date_created = cache_data.get('date_created')
                    if date_created:
                        date_created = date_created[:10]
                        parts = date_created.split('-')
                        if parts and len(parts) == 3:
                            dt = datetime(int(parts[0]), int(parts[1]),
                                          int(parts[2]))
                            the_geothing.created_date = dt

                except:
                    print
                    print 'exception.'
                    print url
                    print cache_data
                    #break
                    continue

            if the_geothing.type_code in GEOCACHING_ONMAP_TYPES:
                geothing = get_object_or_none(Geothing,
                                              pid=the_geothing.pid,
                                              geosite=geosite)
                if geothing is not None:
                    update_geothing(geothing, the_geothing, the_location)
                    uc += 1
                else:
                    create_new_geothing(the_geothing, the_location, geosite)
                    nc += 1
            #break

    sql = """
    select COUNT(*)  
    FROM
    (
    select g.code as code, count(id) as cnt 
    from geothing g 
    group by g.code
    having cnt > 1
    ) as tbl 
    """
    dc = sql2val(sql)
    message = 'OK. updated %s, new %s, doubles %s' % (uc, nc, dc)
    log('map_ocpl_caches', message)
    elapsed = time() - start
    print "Elapsed time -->", elapsed
def main():
    WPT_CODE = 1
    WPT_LAT = 2
    WPT_LON = 3
    WPT_TITLE = 10
    WPT_DATE = 4

    start = time()

    geosite = Geosite.objects.get(code='SHUKACH')

    yplib.setUp()
    yplib.set_debugging(False)

    r = yplib.post2('http://www.shukach.com/ru/karta?destination=karta',
            (('form_build_id','form-ce43c02c68d4d8db1cb0e91745797d06'),
             ('name', 'gps-fun'),
             ('pass','vjlthybpfwbzwbz'),
             ('form_id', 'user_login_block')))

    sql = """
    DELETE FROM _temp_geothing
    """
    execute_query(sql)

    all_points_count = 0
    for k in range(50):
        ids = range(k*1000, (k+1)*1000)
        #print k*1000, (k+1)*1000
        ids_str = ','.join([str(id) for id in ids])
        r = yplib.post2('http://www.shukach.com/export_wpt',
                (('wptnids', ids_str), ))

        wpt = yplib.cmd.show()

        wpt = wpt.split('\n')
        #print len(wpt)
        if len(wpt) < 6:
            continue
        for point in wpt:
            point = point.decode('cp1251').encode('utf-8')
            pid = code = None
            name = ''
            created_date = None
            author = type_code = ''
            NS_degree = EW_degree = None

            fields = point.split(',')
            if fields[0].isdigit():
                all_points_count += 1
                p = re.compile('(\D+)(\d+)')
                code = fields[WPT_CODE]
                dgs = p.findall(code)
                if dgs:
                    type_code = dgs[0][0]
                    pid = int(dgs[0][1])
                    if type_code in GEOCACHING_ONMAP_TYPES:
                        NS_degree = float(fields[WPT_LAT])
                        EW_degree = float(fields[WPT_LON])
                        p = re.compile(r'(.+)от(.+)')
                        dgs = p.findall(fields[WPT_TITLE])
                        if dgs:
                            title = dgs[0]
                            name = title[0].strip()
                            author = title[1].strip()
                        else:
                            name = fields[WPT_TITLE]
                        d = float(fields[WPT_DATE])
                        created_date = Dephi_date_to_python_date(d)
                        date_str = created_date.strftime('%Y-%m-%d %H:%M')
                        ns_str = '{0:.9}'.format(NS_degree)
                        ew_str = '{0:.9}'.format(EW_degree)
                        sql = """
                        INSERT INTO _temp_geothing
                        (pid, code, name, created_date, author,
                        type_code, NS_degree, EW_degree)
                        VALUES
                        ({},'{}','{}','{}', '{}', '{}', {}, {})
                        """.format(
                               pid, code, name.replace("'", "\\'"),
                               date_str, author, type_code,
                               ns_str, ew_str)

                        execute_query(sql)

    sql = "SELECT id FROM geosite WHERE code='SHUKACH'"
    shukach_id = sql2val(sql)

    # update existent geothings
    sql = """
    UPDATE geothing gt
         LEFT JOIN _temp_geothing as t
         ON gt.pid=t.pid
    SET gt.created_date=t.created_date,
        gt.name=t.name,
        gt.author=t.author,
        gt.type_code=t.type_code
    WHERE gt.geosite_id={} AND
        t.code IS NOT NULL AND
        (gt.name != t.name OR
        gt.author != t.author OR
        gt.type_code != t.type_code)
    """.format(shukach_id)
    #print sql
    updated_things = exec_sql(sql)

    sql = """
    UPDATE location as l
        LEFT JOIN geothing as gt ON l.id=gt.location_id
        LEFT JOIN _temp_geothing as t
         ON gt.pid=t.pid
    SET l.NS_degree=t.NS_degree,
        l.EW_degree=t.EW_degree
    WHERE gt.geosite_id={} AND
        t.code IS NOT NULL AND
        ((ABS(l.NS_degree - t.NS_degree) > 0.00001) OR
         (ABS(l.EW_degree - t.EW_degree) > 0.00001))
    """.format(shukach_id)
    updated_points = exec_sql(sql)

    # list of id of removed geothings
    sql = """
    SELECT gt.id
    FROM geothing gt
         LEFT JOIN _temp_geothing as t
         ON gt.pid=t.pid
    WHERE gt.geosite_id={} AND t.code IS NULL
    """.format(shukach_id)
    removed = sql2table(sql)

    new_count = 0
    # insert new geothings
    sql = """
    SELECT t.pid, t.code, t.name, t.created_date, t.author,
           t.country_code, t.type_code, t.NS_degree, t.EW_degree
    FROM _temp_geothing as t
         LEFT JOIN geothing gt ON gt.pid=t.pid AND gt.geosite_id={}
    WHERE gt.pid IS NULL
    """.format(shukach_id)
    cursor = get_cursor(sql)
    while True:
        row = cursor.fetchone()
        if row is None:
            break
        else:
            sql = """
            INSERT INTO location
            (NS_degree, EW_degree)
            VALUES
            ({}, {})
            """.format(row[7], row[8])

            execute_query(sql)
            sql = "SELECT LAST_INSERT_ID()"
            location_id = sql2val(sql)

            sql = """
            INSERT INTO geothing
            (geosite_id, pid, code, name, created_date, author,
            type_code, location_id, admin_code)
            SELECT {}, t.pid, t.code, t.name, t.created_date, t.author,
            t.type_code, {}, '777'
            FROM _temp_geothing as t
            WHERE t.pid={}
            """.format(shukach_id, location_id, row[0])
            execute_query(sql)
            new_count += 1

    message = 'OK. %s waypoints, updated %s waypoints, updated %s locations, new %s, removed %s' % (
        all_points_count,
        updated_things or 0,
        updated_points or 0,
        new_count,
        len(removed))
    print(message)
    log('map_shukach', message)
    elapsed = time() - start
    print "Elapsed time -->", elapsed

    return True
Пример #8
0
def main():
    #if not switch_off_status_updated():
    #return False

    LOAD_CREATED_CACHE_LOGS = True
    LOAD_SEEK_CACHE_LOGS = True
    LOAD_RECOMMEND_CACHE_LOGS = True
    LOAD_PHOTOALBUM_LOGS = True

    start = time()

    yplib.setUp()
    yplib.set_debugging(False)

    r = yplib.post2('http://www.geocaching.su/?pn=108',
                    (('Log_In', 'Log_In'), ('email', '*****@*****.**'),
                     ('passwd', 'zaebalixakeryvas'), ('longterm', '1')))

    soup = yplib.soup()

    a = soup.find('a', attrs={'class': "profilelink"}, text='galdor')
    if not a:
        print 'Authorization failed'
        return False
    print
    print 'BEGIN'
    fh = open('cant_open_user_profile.txt', 'w')
    if LOAD_CREATED_CACHE_LOGS:
        LogCreateCach.objects.all().update(updated=False)
        print 'updating of creating logs'
        cachers = Geocacher.objects.all().values_list('pid', 'uid')
        t = re.compile('\?pn\=101\&cid=(\d+)')
        t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)')
        for cacher in cachers:
            if cacher[1]:
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=1&uid=%s' % cacher[
                    1]
                try:
                    yplib.get(url)
                except BrowserStateError:
                    log_error(fh, cacher[1], 'bse')
                    continue
                soup = yplib.soup()
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = created_date = None
                        coauthor = False
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            if u'(соавтор)' in txt:
                                coauthor = True
                            found = t1.findall(txt)
                            if found:
                                created_date = found[0]
                                created_date = date_or_none(created_date)
                            if cach_pid:
                                print cacher[0], cach_pid, txt.encode('utf8')
                                the_log, created = LogCreateCach.objects.\
                                    get_or_create(
                                        author_pid=cacher[0],
                                        cach_pid=cach_pid)
                                the_log.created_date = created_date
                                the_log.coauthor = coauthor
                                the_log.updated = True
                                the_log.save()
                else:
                    log_error(fh, cacher[1], 'npc')
        LogCreateCach.objects.filter(updated=False).delete()

    if LOAD_SEEK_CACHE_LOGS:
        LogSeekCach.objects.all().update(updated=False)
        cachers = Geocacher.objects.all().values_list(
            'pid', 'uid')  #.filter(pid=18849)
        t = re.compile('\?pn\=101\&cid=(\d+)')
        t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)')
        t2 = re.compile(u'найден\s+(\d\d\.\d\d\.\d\d\d\d)')
        t3 = re.compile(u'оценен\s+на\s+(\d)')

        for cacher in cachers:
            if cacher[1]:
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=2&uid=%s' % cacher[
                    1]
                try:
                    yplib.get(url)
                    soup = yplib.soup()
                except BrowserStateError:
                    log_error(fh, cacher[1], 'bse')
                    continue

                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            found = t3.findall(txt)
                            if found:
                                g = found[0]
                                grade = int_or_none(g)
                            found = t2.findall(txt)
                            if found:
                                found_date = found[0]
                                found_date = date_or_none(found_date)
                            if cach_pid:
                                print cacher[0], cach_pid, txt.encode('utf8')
                                the_log, created = LogSeekCach.objects.\
                                    get_or_create(
                                        cacher_pid=cacher[0],
                                        cach_pid=cach_pid, )
                                the_log.found_date = found_date
                                the_log.grade = grade
                                the_log.updated = True
                                the_log.save()
                else:
                    log_error(fh, cacher[1], 'npf')
        LogSeekCach.objects.filter(updated=False).delete()

    if LOAD_RECOMMEND_CACHE_LOGS:
        LogRecommendCach.objects.all().update(updated=False)
        cachers = Geocacher.objects.all().values_list('pid', 'uid')
        t = re.compile('\?pn\=101\&cid=(\d+)')
        for cacher in cachers:
            if cacher[1]:
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=3&uid=%s' % cacher[
                    1]
                try:
                    yplib.get(url)
                    soup = yplib.soup()
                except BrowserStateError:
                    log_error(fh, cacher[1], 'bse')
                    continue
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            if cach_pid:
                                print cacher[0], cach_pid, txt.encode('utf8')
                                the_log, created = LogRecommendCach.\
                                    objects.get_or_create(
                                        cacher_pid=cacher[0],
                                        cach_pid=cach_pid)
                                the_log.updated = True
                                the_log.save()
                else:
                    log_error(fh, cacher[1], 'npr')
        LogRecommendCach.objects.filter(updated=False).delete()

    if LOAD_PHOTOALBUM_LOGS:
        LogPhotoAlbum.objects.all().update(updated=False)
        cachers = Geocacher.objects.all().values_list('pid', 'uid')
        t = re.compile('showmemphotos\.php\?cid=(\d+)')

        for cacher in cachers:
            if cacher[1]:
                url = 'http://www.geocaching.su/site/popup/userstat.php?s=4&uid=%s' % cacher[
                    1]
                try:
                    yplib.get(url)
                    soup = yplib.soup()
                except BrowserStateError:
                    log_error(fh, cacher[1], 'bse')
                    continue
                tbl = soup.find('table', attrs={'class': 'pages'})
                if tbl:
                    rows = tbl.findAll('tr')
                    for row in rows:
                        cach_pid = found_date = grade = None
                        cell = row.find('td')
                        if cell:
                            a_list = cell.findAll('a')
                            for a in a_list:
                                cach_pid = None
                                parts = t.findall(a['href'])
                                if len(parts):
                                    cach_pid = int(parts[0])

                            txt = cell.text
                            if cach_pid:
                                print cacher[0], cach_pid, txt.encode('utf8')
                                the_log, created = LogPhotoAlbum.\
                                    objects.get_or_create(
                                        cacher_pid=cacher[0],
                                        cach_pid=cach_pid)
                                the_log.updated = True
                                the_log.save()
                else:
                    log_error(fh, cacher[1], 'npp')
        LogPhotoAlbum.objects.filter(updated=False).delete()

    elapsed = time() - start
    print "Elapsed time -->", elapsed
    #switch_on_status_updated()
    log('gcsu_logs', 'OK')
    fh.close()