def extract_details1(html, url): tmp = {} film = Film() film.uid = int(url.split('/')[-1].split('.')[0]) if html: m = re_image_url.search(html) if m: image_url = m.group(m.lastindex).replace(' ', '').replace(';', ',').strip() image_name = image_url.split('/')[-1] download_image(image_url) film.image_name = image_name for regex, _key in zip(r_list, key_list): tmp[_key] = None m = regex.search(html) if not m: film._key = '' else: t = m.group(m.lastindex).replace(' ', '').replace(';', ',').strip() tmp[_key] = t film_dict2model(film, tmp) urls = r_download_url.findall(html) field = '' if urls: for url in urls: field = field + url.strip() if urls.index(url) != len(urls) - 1: field = field + ',' film.download_url = field return film
def extract_details(html, url): film = Film() film.uid = int(url.split('/')[-1].split('.')[0]) if html: m = re_image_url.search(html) if m: image_url = m.group(m.lastindex).replace(' ', '').replace(';', ',').strip() image_name = image_url.split('/')[-1] download_image(image_url) film.image_name = image_name m = r_name_cn.search(html) if not m: film.name_cn = '' else: film.name_cn = m.group(m.lastindex).replace(' ', '').replace( ';', ',').strip() m = r_name.search(html) if not m: film.name = '' else: film.name = m.group(m.lastindex).replace(' ', '').replace(';', ',').strip() m = r_year.search(html) if not m: film.year = '' else: film.year = m.group(m.lastindex).replace(' ', '').replace(';', ',').strip() m = r_country.search(html) if not m: film.country = '' else: film.country = m.group(m.lastindex).replace(' ', '').replace( ';', ',').strip() m = r_category.search(html) if not m: film.category = '' else: film.category = m.group(m.lastindex).replace(' ', '').replace( ';', ',').strip() m = r_language.search(html) if not m: film.language = '' else: film.language = m.group(m.lastindex).replace(' ', '').replace( ';', ',').strip() m = r_subtitle.search(html) if not m: film.subtitle = '' else: film.subtitle = m.group(m.lastindex).replace(' ', '').replace( ';', ',').strip() m = r_release_date.search(html) if not m: film.release_date = '' else: film.release_date = m.group(m.lastindex).replace( ' ', '').replace(';', ',').strip() m = r_score.search(html) if not m: film.score = '' else: film.score = m.group(m.lastindex).replace(' ', '').replace(';', ',').strip() m = r_file_size.search(html) if not m: film.file_size = '' else: film.file_size = m.group(m.lastindex).replace( ' ', '').replace(';', ',').strip() m = r_movie_duration.search(html) if not m: film.movie_duration = '' else: film.movie_duration = m.group(m.lastindex).replace( ' ', '').replace(';', ',').strip() m = r_director.search(html) if not m: film.director = '' else: film.director = m.group(m.lastindex).replace(' ', '').replace( ';', ',').strip() urls = r_download_url.findall(html) field = '' if urls: for url in urls: field = field + url.strip() if urls.index(url) != len(urls) - 1: field = field + ',' # print field film.download_url = field return film