Пример #1
0
def extract_details1(html, url):
    tmp = {}
    film = Film()
    film.uid = int(url.split('/')[-1].split('.')[0])
    if html:
        m = re_image_url.search(html)
        if m:
            image_url = m.group(m.lastindex).replace(' ',
                                                     '').replace(';',
                                                                 ',').strip()
            image_name = image_url.split('/')[-1]
            download_image(image_url)
            film.image_name = image_name
        for regex, _key in zip(r_list, key_list):
            tmp[_key] = None
            m = regex.search(html)
            if not m:
                film._key = ''
            else:
                t = m.group(m.lastindex).replace(' ',
                                                 '').replace(';', ',').strip()
                tmp[_key] = t
        film_dict2model(film, tmp)

        urls = r_download_url.findall(html)
        field = ''
        if urls:
            for url in urls:
                field = field + url.strip()
                if urls.index(url) != len(urls) - 1:
                    field = field + ','
        film.download_url = field
    return film
Пример #2
0
def extract_details(html, url):
    film = Film()
    film.uid = int(url.split('/')[-1].split('.')[0])
    if html:
        m = re_image_url.search(html)
        if m:
            image_url = m.group(m.lastindex).replace(' ',
                                                     '').replace(';',
                                                                 ',').strip()
            image_name = image_url.split('/')[-1]
            download_image(image_url)
            film.image_name = image_name

        m = r_name_cn.search(html)
        if not m:
            film.name_cn = ''
        else:
            film.name_cn = m.group(m.lastindex).replace(' ', '').replace(
                ';', ',').strip()

        m = r_name.search(html)
        if not m:
            film.name = ''
        else:
            film.name = m.group(m.lastindex).replace(' ',
                                                     '').replace(';',
                                                                 ',').strip()

        m = r_year.search(html)
        if not m:
            film.year = ''
        else:
            film.year = m.group(m.lastindex).replace(' ',
                                                     '').replace(';',
                                                                 ',').strip()

        m = r_country.search(html)
        if not m:
            film.country = ''
        else:
            film.country = m.group(m.lastindex).replace(' ', '').replace(
                ';', ',').strip()

        m = r_category.search(html)
        if not m:
            film.category = ''
        else:
            film.category = m.group(m.lastindex).replace(' ', '').replace(
                ';', ',').strip()

        m = r_language.search(html)
        if not m:
            film.language = ''
        else:
            film.language = m.group(m.lastindex).replace(' ', '').replace(
                ';', ',').strip()

        m = r_subtitle.search(html)
        if not m:
            film.subtitle = ''
        else:
            film.subtitle = m.group(m.lastindex).replace(' ', '').replace(
                ';', ',').strip()

        m = r_release_date.search(html)
        if not m:
            film.release_date = ''
        else:
            film.release_date = m.group(m.lastindex).replace(
                ' ', '').replace(';', ',').strip()

        m = r_score.search(html)
        if not m:
            film.score = ''
        else:
            film.score = m.group(m.lastindex).replace(' ',
                                                      '').replace(';',
                                                                  ',').strip()

        m = r_file_size.search(html)
        if not m:
            film.file_size = ''
        else:
            film.file_size = m.group(m.lastindex).replace(
                ' ', '').replace(';', ',').strip()

        m = r_movie_duration.search(html)
        if not m:
            film.movie_duration = ''
        else:
            film.movie_duration = m.group(m.lastindex).replace(
                ' ', '').replace(';', ',').strip()

        m = r_director.search(html)
        if not m:
            film.director = ''
        else:
            film.director = m.group(m.lastindex).replace(' ', '').replace(
                ';', ',').strip()

        urls = r_download_url.findall(html)
        field = ''
        if urls:
            for url in urls:
                field = field + url.strip()
                if urls.index(url) != len(urls) - 1:
                    field = field + ','
        # print field
        film.download_url = field
    return film