Exemplo n.º 1
0
def get_imdb_film():
    
    data_nof_persons, distr_nof_data, dump, good = get_imdb_data(None, True, 1)
    
    create_dump_file('%s_nof_person' % dump, settings.NOF_DUMP_PATH, '<data>%s</data>' % data_nof_persons)
    create_dump_file('%s_nof_distributor' % dump, settings.NOF_DUMP_PATH, '<data>%s</data>' % distr_nof_data)
    cron_success('html', dump, 'films_data', 'Данные релизов')
Exemplo n.º 2
0
def imdb_film_ident():
    source = ImportSources.objects.get(url='http://www.imdb.com/')
    
    films = Films.objects.filter(kid=None)
    films_ids = [i.imdb_id for i in films]

    exist_films = Film.objects.using('afisha').filter(idalldvd__in=films_ids)
    exist_ids = {}
    for i in exist_films:
        exist_ids[i.idalldvd] = i.id

    data_nof_film = ''
    
    for i in films:
        name = None
        for j in i.name.filter(status=1, language__id=2):
            name = j.name.encode('utf-8')
            
        slug = low(del_separator(name))
        
        kid = exist_ids.get(long(i.imdb_id))
        
        if kid:
            i.kid = kid
            i.save()
        else:
            full_url = '%stitle/tt%s/' % (source.url, i.imdb_id)
            data_nof_film += xml_noffilm(name, slug, None, None, i.imdb_id, 'Фильм не найден', full_url.encode('utf-8'), source.id)
            
    create_dump_file('%s_nof_film' % source.dump, settings.NOF_DUMP_PATH, '<data>%s</data>' % data_nof_film)
    cron_success('html', source.dump, 'films_ident', 'Идентификация')
Exemplo n.º 3
0
def nowru_ident():
    source = ImportSources.objects.get(url='http://www.now.ru/')
    ignored = get_ignored_films()

    data_nof_film = ''
    nowru_data = Nowru.objects.filter(kid=None)

    for i in nowru_data:
        name_ru_slug = low(del_separator(i.name_ru.encode('utf-8')))
        if name_ru_slug.decode('utf-8') not in ignored:
            name_en_slug = low(del_separator(i.name_en.encode('utf-8')))
            kid, info = film_identification(name_ru_slug,
                                            name_en_slug, {}, {},
                                            year=i.year,
                                            source=source)
            if kid:
                i.kid = kid
                i.save()
            else:
                if 'slug="%s"' % name_ru_slug not in data_nof_film:
                    name_ru = i.name_ru.encode('utf-8')
                    name_en = i.name_en.encode('utf-8')
                    data_nof_film += xml_noffilm(name_ru, name_ru_slug,
                                                 name_en, name_en_slug,
                                                 i.nowru_id, info, None,
                                                 source.id)

    create_dump_file('%s_nof_film' % source.dump, settings.NOF_DUMP_PATH,
                     '<data>%s</data>' % data_nof_film)
    cron_success('xml', source.dump, 'players', 'Онлайн плееры')
Exemplo n.º 4
0
def cron_dump_schedules_v4():
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('schedules *') + '\n')
    res = query_schedule_v4(None, None)
    result_xml, result_json = get_schedule_v4(res, None, True)
    save_dump(result_xml, None, None, 'schedule_v4')
    save_dump(simplejson.dumps(result_json, ensure_ascii=False).encode('utf-8'), None, None, 'schedule_v4', '', 'json')
    cron_success('api', source.dump, 'schedules_v4', 'Сеансы v4')
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('schedules') + '\n')
Exemplo n.º 5
0
def cron_dump_releases_ua():
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('releases_ua *') + '\n')
    res = query_releases_ua(None)
    result_xml, result_json = get_releases_ua(res, None, True)
    save_dump(result_xml, None, None, 'releases_ua')
    save_dump(simplejson.dumps(result_json, ensure_ascii=False).encode('utf-8'), None, None, 'releases_ua', '', 'json')
    cron_success('api', source.dump, 'releases_ua', 'Укр. релизы')
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('releases_ua') + '\n')
Exemplo n.º 6
0
def cron_dump_film_trailers():
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('film_trailers *') + '\n')
    res, version = query_film_trailers(None, None)
    result_xml, result_json = get_film_trailers(res, None, True)
    save_dump(result_xml, None, None, 'film_trailers')
    save_dump(simplejson.dumps(result_json, ensure_ascii=False).encode('utf-8'), None, None, 'film_trailers', '', 'json')
    cron_success('api', source.dump, 'film_trailers', 'Трейлеры')
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('film_trailers') + '\n')
Exemplo n.º 7
0
def cron_dump_movie_reviews():
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('movie_reviews *') + '\n')
    res = query_movie_reviews(None, None)
    result_xml, result_json = get_movie_reviews(res, None, True)
    save_dump(result_xml, None, None, 'movie_reviews')
    save_dump(simplejson.dumps(result_json, ensure_ascii=False).encode('utf-8'), None, None, 'movie_reviews', '', 'json')
    cron_success('api', source.dump, 'movie_reviews', 'Рецензии')
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('movie_reviews') + '\n')
Exemplo n.º 8
0
def cron_dump_imdb_rate():
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('imdb_rate *') + '\n')
    res = query_imdb_rate(None, None)
    result_xml, result_json = get_imdb_rate(res, None, True)
    save_dump(result_xml, None, None, 'imdb_rate')
    save_dump(simplejson.dumps(result_json, ensure_ascii=False).encode('utf-8'), None, None, 'imdb_rate', '', 'json')
    cron_success('api', source.dump, 'imdb_rate', 'IMDB рейтинги')
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('imdb_rate') + '\n')
Exemplo n.º 9
0
def cron_dump_films_name():
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('films_name *') + '\n')
    res = query_films_name(None)
    result_xml, result_json = get_films_name(res, None, True)
    save_dump(result_xml, None, None, 'films_name')
    save_dump(simplejson.dumps(result_json, ensure_ascii=False).encode('utf-8'), None, None, 'films_name', '', 'json')
    cron_success('api', source.dump, 'films_name', 'Названия фильмов')
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('films_name') + '\n')
Exemplo n.º 10
0
def vkinocomua_schedules_export_to_kinoafisha():
    from release_parser.views import schedules_export
    source = ImportSources.objects.get(url='http://vkino.com.ua/')
    autors = (source.code, 0, 75, 100)
    log = schedules_export(source, autors, False)
    # запись лога в xml файл
    create_dump_file('%s_export_to_kinoafisha_log' % source.dump, settings.LOG_DUMP_PATH, '<data>%s</data>' % log)
    cron_success('export', source.dump, 'schedules', 'Сеансы')
Exemplo n.º 11
0
def cron_dump_cinemas():
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('cinemas *') + '\n')
    res = query_cinema(None)
    result_xml, result_json = get_cinema(res, None, True)
    save_dump(result_xml, None, None, 'cinema')
    save_dump(simplejson.dumps(result_json, ensure_ascii=False).encode('utf-8'), None, None, 'cinema', '', 'json')
    cron_success('api', source.dump, 'cinemas', 'Кинотеатры')
    open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('cinemas') + '\n')
Exemplo n.º 12
0
def get_okinoua_links():
    '''
    Получение urls укр. релизов
    '''
    links = []

    def get_link_from_tag(i):
        tag = i.find('p', {'class': 'name'})
        film_id = tag.a.get('href').replace('/film/', '').replace('/', '')
        link = 'http://www.okino.ua%s' % tag.a.get('href')
        return link, film_id

    url = 'http://www.okino.ua/comingsoon/'
    req = urllib.urlopen(url)
    if req.getcode() == 200:
        html_data = BeautifulSoup(req.read(), from_encoding="utf-8")
        divs = [
            {
                'class': 'film'
            },
            {
                'class': 'film last'
            },
            {
                'class': 'film film-s'
            },
            {
                'class': 'film last film-s'
            },
        ]

        for div in divs:
            for i in html_data.findAll('div', div):
                distr = None
                link, film_id = get_link_from_tag(i)
                for j in i.findAll('p'):
                    if u'Дистрибьютор:' in j.text:
                        distr = j.text.split(':')
                        distr = distr[1].strip()
                        links.append({
                            'link': link,
                            'distr': distr,
                            'id': film_id
                        })

    f = open('%s/dump_okino.ua.links.xml' % settings.API_DUMP_PATH, 'w')
    xml = ''
    for i in links:
        xml += '<release>'
        xml += '<link value="%s"></link>' % i['link']
        xml += '<distr value="%s"></distr>' % i['distr'].replace('&', '&amp;')
        xml += '<id value="%s"></id>' % i['id']
        xml += '</release>'
    f.write('<data>%s</data>' % xml.encode('utf-8'))
    f.close()

    cron_success('html', 'okino.ua', 'links', 'Ссылки укр. релизов')
Exemplo n.º 13
0
def get_rutracker_topics_closed():

    REG_SIZE = re.compile(r'\[\d+\.?\d+?\s?\w+\]')
    REG_SLUG = re.compile(ur'[a-zа-я0-9]+')

    source = ImportSources.objects.get(url='http://rutracker.org/')

    films = SourceFilms.objects.filter(source_obj=source)
    films_dict = {}
    for i in films:
        films_dict[i.name_alter] = i

    url = 'http://rutracker.org/forum/index.php?closed=1'
    req = urllib.urlopen(url)
    for_del = []
    if req.getcode() == 200:
        data = BeautifulSoup(req.read(), from_encoding="windows-1251")

        nav = data.find('ul')
        if nav:
            for i in nav.findAll('li'):
                title = i.b.text.strip().encode('utf-8')
                if ' / ' in title:
                    name_alt = re.findall(REG_SLUG, low(title).decode('utf-8'))
                    name_alt = ''.join(name_alt)
                    obj = films_dict.get(name_alt)
                    if obj:
                        for_del.append(obj.id)

    SourceFilms.objects.filter(pk__in=set(for_del)).delete()
    '''
    if req.getcode() == 200:
        data = BeautifulSoup(req.read(), from_encoding="utf-8")
        return HttpResponse(str(data))
        nav = data.find('div', {'class': 'cl-pg'})
        for a in nav.findAll('a'):
            link = a.get('href').encode('utf-8')
            if 'start' in link:
                new_url = '%sforum/%s' % (source.url, link)
                links.append(new_url)
    
    for url in links:
        req = urllib.urlopen(url)
        if req.getcode() == 200:
            data = BeautifulSoup(req.read(), from_encoding="utf-8")
            for i in data.findAll('b'):
                title = i.text.encode('utf-8').strip()
                if ' / ' in title:
                
                    name_alt = re.findall(REG_SLUG, low(title).decode('utf-8'))
                    name_alt = ''.join(name_alt)
                    
                    obj = films_dict.get(name_alt)
                    if obj:
                        obj.delete()
    '''
    cron_success('xml', source.dump, 'films_closed', 'Закрытые фильмы')
Exemplo n.º 14
0
def cron_dump_screens():
    vers = [{'ver': 1, 'name': 'screens'}, {'ver': 2, 'name': 'screens_v2'}]
    qresult = query_screens(None)
    for i in vers:
        open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('Сеансы v.%s (Дания) *' % i['ver']) + '\n')
        result_xml, result_json = get_screens(qresult, i['ver'], None, True)
        save_dump(result_xml, None, None, i['name'])
        save_dump(simplejson.dumps(result_json, ensure_ascii=False).encode('utf-8'), None, None, i['name'], '', 'json')
        cron_success('api', source.dump, i['name'], 'Сеансы v.%s (Дания)' % i['ver'])
        open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('Сеансы v.%s (Дания)' % i['ver']) + '\n')
Exemplo n.º 15
0
def cron_dump_films():
    years_list = ['1990', '1990_1999', '2000_2009', '2010_2011'] + map(str, range(2012, datetime.date.today().year + 1))
    for i in years_list:
        open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('films_%s *' % i) + '\n')
        res = get_year_films(i)
        result_xml, result_json = get_film(res, None, None, True)
        save_dump(result_xml, None, None, 'film', i, 'xml')
        save_dump(simplejson.dumps(result_json, ensure_ascii=False).encode('utf-8'), None, None, 'film', i, 'json')
        cron_success('api', source.dump, 'films_%s' % i, 'Фильмы %s' % i)
        open('%s/api_time_log.txt' % settings.API_DUMP_PATH, 'a').write(str(datetime.datetime.now()) + '\t' + str('films_%s' % i) + '\n')
Exemplo n.º 16
0
def get_tvzavr_dump():
    '''
    Получение дампа фильмов
    '''
    source = ImportSources.objects.get(url='http://www.tvzavr.ru/')
    main_url = '%sapi/mgm/sitemap-video.xml' % source.url
    req = urllib.URLopener()
    path = '%s/dump_%s_index.xml' % (settings.API_DUMP_PATH, source.dump)
    req.retrieve(main_url, path)
    cron_success('xml', source.dump, 'index', 'Дамп с фильмами')
Exemplo n.º 17
0
def get_kinoteatrua_releases():
    '''
    Получение укр.релизов
    '''
    opener = give_me_cookie()

    source = ImportSources.objects.get(url='http://kino-teatr.ua/')

    films_dict = get_source_data(source, 'film', 'dict')

    releases = SourceReleases.objects.select_related('film').filter(
        source_obj=source)
    releases_dict = {}
    for i in releases:
        releases_dict[i.film.source_id] = i

    url = '%sfilms-near.phtml' % source.url

    req = opener.open(urllib2.Request(url))
    if req.getcode() == 200:
        data = BeautifulSoup(req.read(), from_encoding="utf-8")
        for ind, i in enumerate(data.findAll('a',
                                             {'class': 'searchItemLink'})):
            film_url = i.get('href')
            film_id = film_url.replace('http://kino-teatr.ua/film/',
                                       '').replace('.phtml',
                                                   '').encode('utf-8')
            film_obj = films_dict.get(film_id)
            if film_obj:
                req2 = opener.open(urllib2.Request(film_url))
                if req2.getcode() == 200:
                    data2 = BeautifulSoup(req2.read(), from_encoding="utf-8")
                    block = data2.find('div', id='filmInfo')
                    strong = block.find('strong',
                                        text=u"Премьера (в Украине): ")
                    day, month, year = strong.find_next_sibling(
                        "a").text.strip().split('.')
                    showdate = datetime.date(int(year), int(month), int(day))
                    release_obj = releases_dict.get(film_id)
                    if release_obj:
                        if release_obj.release != showdate:
                            release_obj.release = showdate
                            release_obj.save()
                    else:
                        release_obj = SourceReleases.objects.create(
                            source_obj=source,
                            film=film_obj,
                            release=showdate,
                        )
                        releases_dict[film_id] = release_obj

            if ind % 1 == 0:
                time.sleep(random.uniform(1.0, 3.0))

    cron_success('html', source.dump, 'releases', 'Укр.релизы')
Exemplo n.º 18
0
def get_imdb_film_list():

    source = ImportSources.objects.get(url='http://www.imdb.com/')

    url = '%scalendar/?region=us' % source.url
    
    opener = give_me_cookie()
    req = opener.open(urllib2.Request(url))
    
    xml = ''
    ids = []
    if req.getcode() == 200:
        data = BeautifulSoup(req.read(), from_encoding="utf-8")
        div = data.find('div', id="main")
        old_date = ''
        for h4 in div.findAll('h4'):
            release = h4.string.encode('utf-8')
            day, month, year = release.split()
            
            month = get_month_en(low(month))
            
            rel_date = '%s-%s-%s' % (year, month, day)

            xml += '<date v="%s">' % rel_date
                
            ul = h4.find_next('ul')
            
            for li in ul.findAll('li'):
                year = li.find('span', {'class': "year_type"}).string.encode('utf-8')
                if 'documentary' not in low(year):
                    year = re.findall(r'\d+', year)
                    if year:
                        details = li.find('i')
                        if details:
                            details = str(details).encode('utf-8').replace('<i>','').replace('</i>','')
                            details = details.replace('(','').replace(')','')
                        else:
                            details = ''
                            
                        if 'limited' not in low(details) and 'fest' not in low(details) or 'tv premiere' not in low(details):
                            film_name = li.a.string.encode('utf-8').replace('"', '&quot;').replace('&','&amp;')
                            film_slug = low(del_separator(film_name))
                            full_url = li.a.get('href').encode('utf-8')
                            imdb_id = full_url.replace('/title/tt', '').replace('/', '')
                        
                            xml += '<film n="%s" s="%s" y="%s" id="%s" d="%s" r="%s"></film>' % (film_name, film_slug, year[0], imdb_id, details, rel_date)
                            ids.append(imdb_id)
                    
            xml += '</date>'
    ids = ';'.join(set(ids))
    xml = '<data><ids value="%s">%s</ids></data>' % (ids, xml)time

    create_dump_file('%s_film_list' % source.dump, settings.API_DUMP_PATH, xml)
    cron_success('html', source.dump, 'films_list', 'Список релизов')
Exemplo n.º 19
0
def get_rambler_indexfile():
    source = ImportSources.objects.get(url='http://www.rambler.ru/')
    url = 'http://api.kassa.rambler.ru/v2/%s/xml/Movie/export/sale/' % RAMBLER_API_KEY  # dump_rambler_index.xml
    req = urllib.urlopen(url)
    if req.getcode() == 200:
        data = req.read()
        if 'InvalidClientIp' in data:
            return HttpResponse(str('InvalidClientIp'))
        create_dump_file('%s_index' % source.dump, settings.API_DUMP_PATH,
                         data)
    cron_success('xml', source.dump, 'index', 'Индексный файл')
    return HttpResponse(str('OK'))
Exemplo n.º 20
0
def cinemate_cc_get_links():
    source = ImportSources.objects.get(url='http://cinemate.cc/')

    films = {}
    source_films = SourceFilms.objects.filter(source_obj=source)[:50]
    for i in source_films:
        films[int(i.source_id)] = i

    torrents = list(
        CinemateTorrents.objects.filter(
            film__source_id__in=films.keys()).values_list('go_link_id',
                                                          flat=True))

    opener = give_me_cookie()

    for source_id, film in films.iteritems():

        url = '%smovie/%s/links/#tabs' % (source.url, source_id)
        req = opener.open(urllib2.Request(url))
        data = BeautifulSoup(req.read(), from_encoding="utf-8")
        table = data.find('div', {'class': "table"})
        for div in table.findAll('div', {'class': "row delimiter"}):
            td_div = div.findAll('div')
            tracker = td_div[2].text.strip().encode('utf-8')
            quality = td_div[3].text.strip().encode('utf-8')
            size = td_div[-1].text.strip().encode('utf-8')
            link_id = div.find('a', {
                'class': "icon_t download-link"
            }).get('href', '').replace('/go/s/', '').replace('/', '')

            if link_id not in torrents:

                go_url = '%sgo/s/%s' % (source.url, link_id)
                go_req = opener.open(urllib2.Request(go_url))
                go_data = BeautifulSoup(go_req.read(), from_encoding="utf-8")

                main = go_data.find('div', {'class': "main"})

                a = main.find('a', rel="nofollow").get('href')

                CinemateTorrents.objects.create(
                    film=film,
                    go_link_id=link_id,
                    link=a,
                    tracker=tracker,
                    quality=quality,
                    file_size=size,
                )

        time.sleep(random.uniform(0.8, 1.2))

    cron_success('html', source.dump, 'links', 'Ссылки на трекеры')
Exemplo n.º 21
0
def nowru_player_to_kinoafisha():
    source = ImportSources.objects.get(url='http://www.now.ru/')

    nowru_data = Nowru.objects.exclude(kid=None)
    nowru_ids = [i.kid for i in nowru_data]

    ivi_data = SourceFilms.objects.exclude(kid__in=set(nowru_ids)).filter(
        source_obj__url="http://antipiracy.ivi.ru/")
    ivi_ids = [i.kid for i in ivi_data]

    nowru_ivi = nowru_ids + ivi_ids

    megogo_data = MovieMegogo.objects.exclude(
        Q(afisha_id=0) | Q(afisha_id=None) | Q(afisha_id__in=set(nowru_ivi)))
    megogo_ids = [i.afisha_id for i in megogo_data]

    nowru_ivi_megogo = set(nowru_ivi + megogo_ids)

    afisha_code = FilmsCodes.objects.using('afisha').exclude(player='').filter(
        film__id__in=nowru_ivi_megogo)

    afisha_code_dict = {}
    for i in afisha_code:
        afisha_code_dict[i.film_id] = i

    for ind, data in enumerate((nowru_data, ivi_data, megogo_data)):
        for i in data:
            # now.ru
            if ind == 0:
                kid = i.kid
                player = i.player_code
            elif ind == 1:
                kid = i.kid
                player = i.text
            # megogo
            elif ind == 2:
                kid = i.afisha_id
                player = '<iframe width="607" height="360" \
                        src="http://megogo.net/e/%s" frameborder="0" \
                        allowfullscreen></iframe>' % i.megogo_id

            if kid:
                afisha_obj = afisha_code_dict.get(kid)
                if afisha_obj:
                    afisha_obj.player = player
                    afisha_obj.save()
                else:
                    FilmsCodes.objects.using('afisha').create(
                        film_id=kid,
                        player=player,
                    )
    cron_success('export', source.dump, 'players', 'Онлайн плееры')
Exemplo n.º 22
0
def get_luxor_schedules():

    query = 'QueryCode=GetSessions'

    data = get_luxor_data_by_socket(query)

    source = ImportSources.objects.get(url='http://luxor.ru/')

    #create_dump_file('%s_schedules' % source.dump, settings.API_DUMP_PATH, data)
    '''
    xml = open('%s/dump_%s_schedules.xml' % (settings.API_DUMP_PATH, source.dump), 'r')# temp
    data = xml.read()# temp
    xml.close()# temp
    '''
    films = get_source_data(source, 'film', 'dict')
    cinemas = get_source_data(source, 'cinema', 'dict')
    halls = get_source_data(source, 'hall', 'dict')
    schedules = get_source_data(source, 'schedule', 'list')

    xml_data = BeautifulSoup(data, from_encoding="utf-8")

    for session in xml_data.findAll('session'):
        sch_id = session['id']
        if sch_id not in schedules:
            cinema_id = session.theatre['id'].encode('utf-8')
            hall_id = session.theatre.hall['id'].encode('utf-8')
            film_id = session.movie['id'].encode('utf-8')

            cinema_obj = cinemas.get(cinema_id)
            film_obj = films.get(film_id)
            hall_obj = halls.get(hall_id)

            if cinema_obj and film_obj and hall_obj:
                showdate = session.date.string.encode('utf-8')
                showtime = session.time.string.encode('utf-8')

                day, month, year = showdate.split('.')
                hours, minutes = showtime.split(':')

                dtime = datetime.datetime(int(year), int(month), int(day),
                                          int(hours), int(minutes))

                SourceSchedules.objects.create(
                    source_id=sch_id,
                    source_obj=source,
                    film=film_obj,
                    cinema=cinema_obj,
                    dtime=dtime,
                    hall=hall_obj.kid,
                )

    cron_success('xml', source.dump, 'schedules', 'Сеансы')
Exemplo n.º 23
0
def get_currency_rate():

    source = ImportSources.objects.get(url='http://cbrf.magazinfo.ru/')

    data = [
        {
            'url': '%srur/USD' % source.url,
            'cur_1': '4',  # рубль
            'cur_2': '1',  # доллар
        },
        {
            'url': '%srur/AUD' % source.url,
            'cur_1': '4',  # рубль
            'cur_2': '3',  # австр.доллар
        },
    ]

    for i in data:
        req = urllib.urlopen(i['url'])
        if req.getcode() == 200:
            data = BeautifulSoup(req.read(), from_encoding="utf-8")
            table = data.find('table',
                              border="1",
                              cellspacing="0",
                              cellpadding="5")
            tr = table.findAll('tr', limit=2)
            td = tr[1].findAll('td')
            cur_day, cur_month, cur_year = td[0].text.split('.')
            cur_date = datetime.datetime(int(cur_year), int(cur_month),
                                         int(cur_day))
            value = td[1].text.encode('utf-8')

            obj, created = CurrencyRate.objects.get_or_create(
                currency=i['cur_1'],
                by_currency=i['cur_2'],
                defaults={
                    'currency': i['cur_1'],
                    'by_currency': i['cur_2'],
                    'country_id': 2,
                    'date': cur_date,
                    'value': value,
                })

            if obj:
                obj.value = value
                obj.date = cur_date
                obj.save()

        get_currency_rate_NZD()

    cron_success('html', source.dump, 'currency_rate', 'Курс валют')
Exemplo n.º 24
0
def raspishi_relations():
    source = ImportSources.objects.get(url='http://распиши.рф/')

    ignored = get_ignored_films()
    data_nof_film = ''

    domain = u'распиши.рф'
    url = 'http://%s/getfilmxml.php' % domain.encode('idna')

    req = urllib.urlopen(url)
    if req.getcode() == 200:
        films_rid = list(
            RaspishiRelations.objects.exclude(kid=0).values_list('rid',
                                                                 flat=True))

        xml_data = BeautifulSoup(req.read(), from_encoding="utf-8")
        for i in xml_data.findAll('movie'):
            id = int(i['id'])
            if id not in films_rid:
                name_ru = i.find('name').text.encode('utf-8')
                name_en = i.find('nameeng').text.encode('utf-8')

                name_ru = re.sub(r'\(.*?\)', '', name_ru).strip()
                name_en = re.sub(r'\(.*?\)', '', name_en).strip()

                name_slug = low(del_separator(del_screen_type(name_ru)))
                name_en_slug = low(del_separator(del_screen_type(name_en)))

                if name_slug.decode('utf-8') not in ignored:
                    try:
                        kid, info = film_identification(name_slug,
                                                        None, {}, {},
                                                        source=source)

                        if kid:
                            created = RaspishiRelations.objects.create(
                                rid=id,
                                kid=kid,
                                name_ru=name_ru,
                                name_en=name_en,
                            )
                        else:
                            data_nof_film += xml_noffilm(
                                name_ru, name_slug, name_en, name_en_slug, id,
                                info, None, source.id)
                    except db.backend.Database._mysql.OperationalError:
                        pass
    create_dump_file('%s_nof_film' % source.dump, settings.NOF_DUMP_PATH,
                     '<data>%s</data>' % data_nof_film)
    cron_success('xml', source.dump, 'films', 'Укр. сеансы')
Exemplo n.º 25
0
def get_ktmir_and_ktrussia_schedules():
    city_name = 'Балаково'
    cinema_name = 'Мир'
    source = 'http://ktmir.ru/'
    data_nof_film = page_parser(city_name, cinema_name, source)
    create_dump_file('ktmir_nof_film', settings.NOF_DUMP_PATH,
                     '<data>%s</data>' % data_nof_film)
    cron_success('html', 'ktmir', 'schedules', 'Сеансы')

    city_name = 'Балаково'
    cinema_name = 'Россия'
    source = 'http://kt-russia.ru/'
    data_nof_film = page_parser(city_name, cinema_name, source)
    create_dump_file('ktrussia_nof_film', settings.NOF_DUMP_PATH,
                     '<data>%s</data>' % data_nof_film)
    cron_success('html', 'ktrussia', 'schedules', 'Сеансы')
Exemplo n.º 26
0
def get_okinoua_cities():
    """
    Парсинг городов Украины
    """
    source = ImportSources.objects.get(url='http://www.okino.ua/')

    # Получаем список городов с таблицы SourceCities в виде списка
    cities_ids = get_source_data(source, 'city', 'list')
    data_nof_city = ''

    # Открываем страницу с городами
    url = '%skinoafisha-kiev/' % source.url
    req = urllib.urlopen(url)
    if req.getcode() == 200:
        page = BeautifulSoup(req.read(), from_encoding="utf-8")
        # Находим все теги с городами и считываем из них id и названия городов
        for ul in page.findAll('ul', {'class': 'blist'}):
            for li in ul.findAll('li'):
                id = li.a.get('href').replace('/', '')
                name = li.a.string.encode('utf-8').strip()
                name_slug = low(del_separator(name))
                # Сравниваем полученные города с городами в нашей БД и, если НЕТ совпадений, то
                if id not in cities_ids:
                    # идентифицируем новый город
                    city = City.objects.filter(name__name=name_slug,
                                               name__status=2).distinct('pk')
                    # если идентифицировали, то записываем в таблицу SourceCities
                    if city.count() == 1:
                        SourceCities.objects.create(
                            source_id=id,
                            source_obj=source,
                            city=city[0],
                            name=name,
                        )
                    # в противном случаем записываем ненайденые города в xml для дальнейших действий над ними
                    else:
                        if 'slug="%s"' % name_slug not in data_nof_city:
                            data_nof_city += '<city name="%s" slug="%s"></city>' % (
                                name, name_slug)

    create_dump_file('okinoua_nof_city', settings.NOF_DUMP_PATH,
                     '<data>%s</data>' % data_nof_city)
    cron_success('html', 'okinoua', 'cities', 'Укр. города')
Exemplo n.º 27
0
def get_rambler_cities():
    source = ImportSources.objects.get(url='http://www.rambler.ru/')

    cities_ids = get_source_data(source, 'city', 'list')
    data_nof_city = ''
    '''
    # LOCALHOST
    f = open('%s/dump_rambler_city.xml' % settings.API_DUMP_PATH, 'r')
    xml = BeautifulSoup(f.read(), from_encoding="utf-8")
    f.close()
    if xml: # --- end localhost
    '''

    # SERVER
    url = 'http://api.kassa.rambler.ru/v2/%s/xml/cities/' % RAMBLER_API_KEY  # dump_rambler_city.xml
    req = urllib.urlopen(url)
    if req.getcode() == 200:
        xml = BeautifulSoup(req.read(),
                            from_encoding="utf-8")  # --- end server

        for i in xml.findAll('city'):
            id = i.cityid.string
            name = i.find('name').string.encode('utf-8')
            name_slug = low(del_separator(name))
            if id not in cities_ids:
                city = City.objects.filter(name__name=name_slug,
                                           name__status=2).distinct('pk')
                if city.count() == 1:
                    SourceCities.objects.create(
                        source_id=id,
                        source_obj=source,
                        city=city[0],
                        name=name,
                    )
                else:
                    if 'slug="%s"' % name_slug not in data_nof_city:
                        data_nof_city += '<city name="%s" slug="%s"></city>' % (
                            name, name_slug)

    create_dump_file('%s_nof_city' % source.dump, settings.NOF_DUMP_PATH,
                     '<data>%s</data>' % data_nof_city)
    cron_success('xml', source.dump, 'cities', 'Города')
Exemplo n.º 28
0
def get_premierzal_cities():
    source = ImportSources.objects.get(url='http://www.premierzal.ru/')

    cities = get_source_data(source, 'city', 'list')

    data_nof_city = ''

    req = urllib.urlopen(source.url)
    if req.getcode() == 200:
        data = BeautifulSoup(req.read())

        block = data.find('div', {'class': 'drop'})

        for i in block.findAll('a'):
            city_name = i.text.encode('utf-8').strip()
            city_id = low(del_separator(city_name))

            if city_id.decode('utf-8') not in cities:

                city = City.objects.filter(name__name=city_id,
                                           name__status=2).distinct('pk')

                if city.count() == 1:
                    SourceCities.objects.create(
                        source_id=city_id,
                        source_obj=source,
                        city=city[0],
                        name=city_name,
                    )
                else:
                    data_nof_city += '<city name="%s" slug="%s"></city>' % (
                        city_name, city_id)

                cities.append(city_id.decode('utf-8'))

    create_dump_file('%s_nof_city' % source.dump, settings.NOF_DUMP_PATH,
                     '<data>%s</data>' % data_nof_city)
    cron_success('html', source.dump, 'cities', 'Города')
Exemplo n.º 29
0
def get_ivi_file():
    '''
    Получение txt файла
    '''
    source = ImportSources.objects.get(url='http://antipiracy.ivi.ru/')
    films = get_source_data(source, 'film', 'list')
    url = '%s-/' % source.url

    req = urllib.urlopen(url)
    if req.getcode() == 200:
        links = BeautifulSoup(req.read(), from_encoding="windows-1251")

        for i in links.findAll('a'):
            link = i.string.encode('utf-8')
            if 'in one file.txt' in link:
                req2 = urllib.urlopen('%s%s' % (url, i.get('href')))
                data = BeautifulSoup(req2.read(), from_encoding="windows-1251")
                file = str(data).replace('<html><head></head><body>',
                                         '').replace('</body></html>', '')
                create_dump_file(source.dump, settings.API_DUMP_PATH, file,
                                 'txt')

    cron_success('html', source.dump, 'file', 'txt файл с данными')
Exemplo n.º 30
0
def get_zapad24ru():
    ignored = get_ignored_films()
    ignored_cinemas = get_ignored_cinemas()

    source = ImportSources.objects.get(url='http://zapad24.ru/')
    sfilm_clean(source)

    cities_dict = get_source_data(source, 'city', 'dict')
    cinemas_dict = get_source_data(source, 'cinema', 'dict')
    schedules = get_source_data(source, 'schedule', 'list')

    films = {}
    source_films = SourceFilms.objects.filter(source_obj=source)
    for i in source_films:
        films[i.source_id] = i
    fdict = get_all_source_films(source, source_films)

    today = datetime.datetime.now()
    next_month = datetime.date.today() + datetime.timedelta(days=40)

    data_nof_films = ''
    data_nof_cinema = ''
    data_nof_city = ''
    noffilms = []

    req = urllib.urlopen('%safisha/' % source.url)
    if req.getcode() == 200:
        data = BeautifulSoup(req.read())  #, from_encoding="utf-8"
        div = data.find('div', align="left")
        for ind, table in enumerate(
                div.findAll('table',
                            border="0",
                            cellpadding="0",
                            cellspacing="0",
                            width="100%")):
            cinema_tag = table.find('strong').string.encode('utf-8')
            cinema_name = re.findall(r'\".+\"',
                                     cinema_tag)[0].replace('"', '').strip()
            cinema_slug = low(del_separator(cinema_name))
            cinema_id = cinema_slug.decode('utf-8')

            city_name = re.findall(r'\(.+\)', cinema_tag)[0].replace(
                '(г. ', '').replace(')', '').strip()
            city_slug = low(del_separator(city_name))
            city_id = city_slug.decode('utf-8')

            city_obj = cities_dict.get(city_id)

            if not city_obj:
                city = City.objects.filter(name__name=city_slug,
                                           name__status=2).distinct('pk')
                if city.count() == 1:
                    city_obj = SourceCities.objects.create(
                        source_id=city_id,
                        source_obj=source,
                        city=city[0],
                        name=city_name,
                    )
                    cities_dict[city_id] = city_obj
                else:
                    if 'slug="%s"' % city_slug not in data_nof_city:
                        data_nof_city += '<city name="%s" slug="%s"></city>' % (
                            city_name, city_slug)

            if city_obj:
                cinema_ig_id = u'%s__%s' % (cinema_slug.decode('utf-8'),
                                            city_obj.city.kid)

                if cinema_ig_id not in ignored_cinemas:
                    cinema_obj = cinemas_dict.get(cinema_id)
                    if not cinema_obj:
                        filter1 = {
                            'name__name': cinema_slug,
                            'name__status': 2,
                            'city': city_obj.city
                        }
                        cinema_kid = cinema_identification(
                            cinema_slug, filter1)
                        if cinema_kid:
                            try:
                                cinema = Cinema.objects.get(code=cinema_kid)
                                cinema_obj = SourceCinemas.objects.create(
                                    source_id=cinema_id,
                                    source_obj=source,
                                    city=city_obj,
                                    cinema=cinema,
                                    name=cinema_name,
                                )
                                cinemas_dict[cinema_id] = cinema_obj
                            except Cinema.DoesNotExist:
                                pass
                        else:
                            if 'slug="%s"' % cinema_slug not in data_nof_cinema:
                                data_nof_cinema += '<cinema name="%s" slug="%s" city="%s" city_kid="%s"></cinema>' % (
                                    cinema_name, cinema_slug, city_name,
                                    city_obj.city.kid)

                    if cinema_obj:
                        film_table = table.find('table')
                        date_from = None
                        date_to = None
                        for tr in film_table.findAll('tr'):
                            film_name, film_slug, film_id = (None, None, None)
                            if ind == 0:
                                film_name = tr.find('b').string.encode(
                                    'utf-8').strip()
                                film_slug = low(del_separator(film_name))
                                film_id = film_slug.decode('utf-8')
                            else:
                                showdate = ''
                                for f in tr.findAll('b'):
                                    if f.find('span'):
                                        showdate = f.find(
                                            'span').string.encode(
                                                'utf-8').strip()
                                    else:
                                        film_name = f.string.encode(
                                            'utf-8').strip()
                                        film_name = re.findall(
                                            r'\«.+\»', film_name)[0]
                                        film_name = film_name.replace(
                                            '«', '').replace('»', '').strip()
                                        film_slug = low(
                                            del_separator(film_name))
                                        film_id = film_slug.decode('utf-8')

                                if showdate and film_name:
                                    try:
                                        date_from, date_to = showdate.split(
                                            '-')
                                        date_from_day, date_from_month = date_from.strip(
                                        ).split('.')
                                        date_to_day, date_to_month = date_to.strip(
                                        ).split('.')
                                    except ValueError:
                                        date_from, date_to = showdate.split(
                                            ' – ')
                                        date_from_day, date_from_month = date_from.strip(
                                        ).split()
                                        date_from_month = get_month(
                                            date_from_month)
                                        date_to_day, date_to_month = date_to.strip(
                                        ).split()
                                        date_to_month = get_month(
                                            date_to_month)

                                    date_from = datetime.date(
                                        today.year, int(date_from_month),
                                        int(date_from_day))
                                    date_to = datetime.date(
                                        today.year, int(date_to_month),
                                        int(date_to_day))

                            full_url = tr.find('a').get('href').encode('utf-8')

                            if film_id not in noffilms and film_id not in ignored:
                                obj = films.get(film_id)
                                next_step = checking_obj(obj)

                                if next_step:
                                    if obj:
                                        kid = obj.kid
                                    else:
                                        kid, info = film_identification(
                                            film_slug,
                                            None, {}, {},
                                            source=source)

                                    objt = None
                                    if kid:
                                        create_new, objt = unique_func(
                                            fdict, kid, obj)
                                        if create_new:
                                            objt = create_sfilm(
                                                film_id, kid, source,
                                                film_name)
                                            films[film_id] = objt
                                            if not fdict.get(kid):
                                                fdict[kid] = {
                                                    'editor_rel': [],
                                                    'script_rel': []
                                                }
                                            fdict[kid]['script_rel'].append(
                                                objt)
                                    elif not obj:
                                        data_nof_film += xml_noffilm(
                                            film_name, film_slug, None, None,
                                            film_id.encode('utf-8'), info,
                                            full_url, source.id)
                                        noffilms.append(film_id)

                                    if objt:
                                        req_film = urllib.urlopen(full_url)
                                        if req_film.getcode() == 200:
                                            data_film = BeautifulSoup(
                                                req_film.read()
                                            )  #, from_encoding="utf-8"

                                            td = data_film.find(
                                                'td', {
                                                    'class': 'news'
                                                }).div.text.encode('utf-8')

                                            showtime = []

                                            if ind == 0:
                                                showtime = re.findall(
                                                    r'\d+\:\d+\s\s?', td)
                                            else:
                                                if date_from and date_to:
                                                    if date_to < next_month:
                                                        showtimes = re.findall(
                                                            r'Начало сеансов:\s?[\d+\-\d+\,?\s?]+',
                                                            td)
                                                        times = []
                                                        for t in showtimes:
                                                            t = t.replace(
                                                                'Начало сеансов:',
                                                                '').split(',')
                                                            times = [
                                                                i.strip()
                                                                for i in t
                                                                if i.strip()
                                                            ]

                                                        delta = date_to - date_from
                                                        for day in range(
                                                                delta.days +
                                                                1):
                                                            d = date_from + datetime.timedelta(
                                                                days=day)
                                                            for t in times:
                                                                hours, minutes = t.split(
                                                                    '-')
                                                                dtime = datetime.datetime(
                                                                    d.year,
                                                                    d.month,
                                                                    d.day,
                                                                    int(hours),
                                                                    int(minutes
                                                                        ))
                                                                showtime.append(
                                                                    dtime)

                                            for t in showtime:
                                                if ind == 0:
                                                    hours, minutes = t.strip(
                                                    ).split(':')
                                                    dtime = datetime.datetime(
                                                        today.year,
                                                        today.month, today.day,
                                                        int(hours),
                                                        int(minutes))
                                                else:
                                                    dtime = t

                                                sch_id = '%s%s%s%s' % (
                                                    dtime, cinema_slug,
                                                    city_slug,
                                                    film_id.encode('utf-8'))
                                                sch_id = sch_id.replace(
                                                    ' ', '').decode('utf-8')

                                                if sch_id not in schedules:
                                                    SourceSchedules.objects.create(
                                                        source_id=sch_id,
                                                        source_obj=source,
                                                        film=objt,
                                                        cinema=cinema_obj,
                                                        dtime=dtime,
                                                    )
                                                    schedules.append(sch_id)

    create_dump_file('%s_nof_city' % source.dump, settings.NOF_DUMP_PATH,
                     '<data>%s</data>' % data_nof_city)
    create_dump_file('%s_nof_cinema' % source.dump, settings.NOF_DUMP_PATH,
                     '<data>%s</data>' % data_nof_cinema)
    create_dump_file('%s_nof_film' % source.dump, settings.NOF_DUMP_PATH,
                     '<data>%s</data>' % data_nof_films)
    cron_success('html', source.dump, 'schedules', 'Сеансы')