Python htmlstring示例

编程语言: Python

命名空间/包名称: lxml.etree

方法/功能: htmlstring

hotexamples.com的示例: 5

Python htmlstring - 已找到5个示例。这些是从开源项目中提取的最受好评的lxml.etree.htmlstring现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def fetchProvince(id):
    site = "https://lobbycanada.gc.ca"
    uri = "/app/secure/ocl/lrs/do/cmmLgPblcVw?comlogId=" + str(id)

    page = requests.get(site + uri)
    tree = html.fromstring(page.content)
    table = tree.xpath('//table[@class="table"]')[0]
    uri = re.search(b'(?<=<a href=\")(.*)(?=\" target)',
                    htmlstring(table)).groups(1)[0].decode("utf-8")

    page = requests.get(site + uri)
    tree = html.fromstring(page.content)
    uri = tree.get_element_by_id("regId").xpath('//option')[0].get("value")

    page = requests.get((site + uri).replace("#regStart", "") + "#indirect")
    tree = html.fromstring(page.content)
    addressHtml = tree.get_element_by_id("indirect").xpath(
        '//div[@class="col-sm-5"]')[0]
    province = re.search(b'(?<=\\n)[^\\n]*(?=,)[^;]*;([A-Z][A-Z0-9]*)',
                         htmlstring(addressHtml)).groups(1)[0].decode("utf-8")

    return province

示例#2

显示文件

def parse_booking_hotel_page(url):
    '''
    Receive an URL corresponding to a hotel webpage,
    parse informations about the hotel and
    return a dictionary with these informations
    '''
    #  get html
    try:
        content = requests.get(url.replace("en-gb", "en-us"), headers={'Accept-Encoding': 'identity'}).text #US to get prices in $$$
        log_influxdb("HOTELS")
    except:
        return ("#", float(0), float(0), float(0), '', [], '', '', {}, 'N/A')

    try:
        dom = LH.fromstring(content)
        #  get latitude
        latitude = re.findall(
            'booking.env.b_map_center_latitude = ([-\.\d]+)', content)
        latitude = latitude[0] if len(latitude) > 0 else -1
        #  get longitude
        longitude = re.findall(
            'booking.env.b_map_center_longitude = ([-\.\d]+)', content)
        longitude = longitude[0] if len(longitude) > 0 else -1
        #  get the rate
        tmp_rate = css_select(
            dom, 'span.average, span.js--hp-scorecard-scoreval, \
                  [itemprop="ratingValue"]')
        rate = tmp_rate[0].text if len(tmp_rate) > 1 else -1
        #  get the address
        address = css_select(dom, 'span.hp_address_subtitle')
        #  get images link
        #pictures = css_select(dom, 'div#photos_distinct a')
        #pictures = [result.get('href').replace("max400", "max1024x768") for result in pictures]
        pictures = re.findall(r'https:\/\/.+\.bstatic\.com\/images\/hotel\/max1024x768\/.+\.jpg', content)
        pictures = list(set(pictures))
        # get price
        price = re.findall(r'start at U*S*(.+?) .+', content)
        if (len(price) >= 1):
        	price = "From "+price[0]
        else:
        	price = "N/A"
        #  get description
        description = css_select(dom, "div.hotel_description_wrapper_exp")
        description = htmlstring(description[0])
        description = re.sub("<[^>]*>", "", description)
        #  get reviews url
        reviews_url = css_select(dom, 'a.show_all_reviews_btn')
        reviews_url = [result.get('href') for result in reviews_url]
        reviews_url = BOOKING_URL_PREFIX + reviews_url[0]
        #  get reviews
        reviews = get_grades(reviews_url)
        if len(address) >= 1:
            return (url, float(latitude), float(longitude), float(rate),
                    address[0].text, pictures, description, reviews_url,
                    reviews, price)
        else:
            return (url, float(latitude), float(longitude), float(rate),
                    '', pictures, description, reviews_url, reviews, price)
    except:
        return ("#", float(0), float(0), float(0), '', [], '', '',
                dict(hotel_clean=float(-4)), 'N/A')

示例#3

显示文件

    def render_template(self, template, values=None, engine='ir.qweb'):
        res = super(IrUiView, self).render_template(template, values, engine)
        res_copy = res

        try:
            website = request.website
        except:
            website = False

        optimiser = website and self.env['optimiser.optimiser'].sudo().search(
            [('website_id', '=', website.id)])

        try:
            res = res.decode("utf-8", "ignore").encode("ascii",
                                                       "xmlcharrefreplace")
        except:
            pass

        if values and values.get('request', False) and optimiser:
            res = fromstring(res)
            head = res.find('.//head')
            body = res.find('.//body')
            no_head_body = False

            if not body or not head:
                no_head_body = True

            if not no_head_body:
                if not request.httprequest.is_xhr:

                    if optimiser.load_css_async or optimiser.css_bottom:
                        styles = res.cssselect('link[rel="stylesheet"]')
                        ie_styles = ""

                        for style in styles:
                            ie_styles += htmlstring(
                                style, method="html").decode("utf-8").strip(
                                ).strip("\n").rstrip('>') + "/>"

                            if optimiser.css_bottom:
                                body.insert(len(body), style)

                            if optimiser.load_css_async:
                                noscript_tag = Element('noscript')
                                tmp_style = copy.copy(style)
                                noscript_tag.insert(0, tmp_style)

                                parent = style.getparent()
                                parent.insert(
                                    parent.index(style) + 1, noscript_tag)

                                style.attrib['rel'] = 'preload'
                                style.attrib['as'] = 'style'
                                style.attrib[
                                    'onload'] = "this.onload=null;this.rel='stylesheet'"

                        script_tag_for_converting_styles = Element("script")
                        script_tag_for_converting_styles.attrib[
                            'data-not-touchable'] = 'true'
                        script_tag_for_converting_styles.text = "function supportsToken(token){return function(relList){if(relList && relList.supports && token){return relList.supports(token)} return false}}; window.onload = function(){if(!supportsToken('preload')(document.createElement('link').relList)){var links=document.querySelectorAll('link[as=\"style\"][rel=\"preload\"]'); if(links.length){for(var i in links){links[i].rel='stylesheet'}}}}"
                        body.insert(len(body),
                                    script_tag_for_converting_styles)

                        script_tag_for_checking_ie = Element('script')
                        script_tag_for_checking_ie.attrib[
                            'data-not-touchable'] = 'true'
                        script_tag_for_checking_ie.text = "function isIE(){var myNav=navigator.userAgent.toLowerCase(); return (myNav.indexOf('msie') != -1 || myNav.indexOf('trident') != -1) ? true : false;}; if(isIE()){var div=document.createElement('div');div.innerHTML='%s';document.head.appendChild(div);}" % ie_styles
                        head.insert(len(head), script_tag_for_checking_ie)

                    if optimiser.js_bottom:
                        scripts = res.cssselect(
                            'script:not([data-not-touchable])')

                        for script in scripts:
                            body.insert(len(body), script)

                    if optimiser.load_js_async:
                        scripts = res.cssselect('script[src]')
                        lazy_scripts = res.cssselect('script[data-src]')
                        optimiser_js_async_setting = optimiser.load_js_async

                        if "shop/payment" in request.httprequest.path:
                            optimiser_js_async_setting = 'async'

                        for script in scripts:
                            if optimiser_js_async_setting == 'async':
                                script.attrib['defer'] = 'defer'
                            else:
                                script.attrib[
                                    'data-optimiser-src'] = script.attrib[
                                        'src']
                                script.attrib.pop("src", None)

                        for script in lazy_scripts:
                            if optimiser_js_async_setting == 'async':
                                script.attrib['defer'] = 'defer'
                                script.attrib['src'] = script.attrib[
                                    'data-src']
                            else:
                                script.attrib[
                                    'data-optimiser-src'] = script.attrib[
                                        'data-src']

                            script.attrib.pop("data-src", None)

                        if optimiser_js_async_setting == 'sync_lazy':
                            load_lazy_scripts = Element("script")
                            load_lazy_scripts.text = """function loadScripts() {
							    var scripts = Array.from(document.querySelectorAll("script[data-optimiser-src]"));
								sessionStorage.setItem('secondTimeLoad', '1');
							    function loadScript(scripts) {
							        if(scripts.length){
							            var attr = scripts[0].getAttribute("data-optimiser-src");
							            scripts[0].setAttribute("src", attr);
							            scripts[0].removeAttribute("data-optimiser-src");
							            scripts[0].onload = function () {
							                scripts.shift();
							                loadScript(scripts);
							            }
							        }
							    }

							    loadScript(scripts)
							}

							window.addEventListener("scroll", function scrollEventFunction() {
								setTimeout(function(){loadScripts()},500)
							},{once: true})

							window.addEventListener("load", function () {
								var timer = sessionStorage.getItem('secondTimeLoad') ? 0 : 1500;
							    setTimeout(function () {
							        loadScripts()
							    }, timer);
							})"""
                            body.insert(len(body), load_lazy_scripts)

                    if optimiser.page_loading:
                        page_loader_script_tag = Element("script")
                        page_loader_script_tag.text = "window.addEventListener('" + optimiser.show_page_loading_until + \
                                                      "', function(){document.querySelector('div.optimiser-page-loader').remove();});"
                        page_loader_image_width = optimiser.page_loading_image_width if optimiser.page_loading_image_width else "100px"
                        page_loader_image_height = optimiser.page_loading_image_height if optimiser.page_loading_image_height else "100px"
                        page_loader_image_position_top = optimiser.page_loading_image_pos_top if optimiser.page_loading_image_pos_top else "50%"
                        page_loader_image_position_left = optimiser.page_loading_image_pos_left if optimiser.page_loading_image_pos_left else "50%"
                        page_loader_bg = optimiser.page_loading_bg_color if optimiser.page_loading_bg_color else "#FFFFFF"
                        page_loader_bg_transparency = optimiser.page_loading_bg_transparency if optimiser.page_loading_bg_transparency else 1
                        page_loader_bg_image = (optimiser.show_default_page_loading_image or optimiser.page_loading_image) \
                                               and "background-image: url(/optimiser-page-loader-image);" or ""

                        page_loader_div = Element(
                            "div", **{
                                'class':
                                "optimiser-page-loader",
                                'style':
                                "position: fixed;"
                                "left: 0;"
                                "top: 0;"
                                "width: 100%%;"
                                "height: 100%%;"
                                "z-index: 9999999999;"
                                "%s"
                                "background-repeat: no-repeat;"
                                "background-size: %s %s;"
                                "background-color: rgba%s;"
                                "background-position: %s %s;"
                                "background-attachment: fixed;" %
                                (str(page_loader_bg_image),
                                 str(page_loader_image_width),
                                 str(page_loader_image_height),
                                 str(
                                     hex2rgb(page_loader_bg,
                                             page_loader_bg_transparency)),
                                 str(page_loader_image_position_top),
                                 str(page_loader_image_position_left))
                            })

                        body.insert(len(body), page_loader_script_tag)
                        body.insert(0, page_loader_div)

                    if len(optimiser.custom_content_ids) > 0:

                        contents = optimiser.custom_content_ids

                        for content in contents:
                            if content.content:
                                try:
                                    tmp = fromstring(content.content)
                                except:
                                    continue

                                if content.position.startswith('head'):
                                    html = head
                                else:
                                    html = body

                                position = len(
                                    html) if content.position.endswith(
                                        "end") else 0
                                head_content = tmp.find('.//head')

                                if head_content:
                                    for tmp_content in head_content:
                                        html.insert(position, tmp_content)
                                        position += 1
                                else:
                                    html.insert(position, tmp)

                if optimiser.preload_fonts and len(
                        optimiser.preload_fonts_ids) > 0:
                    for font in optimiser.preload_fonts_ids:
                        preload_font_elem = Element('link')
                        preload_font_elem.attrib['rel'] = "preload"
                        preload_font_elem.attrib['href'] = font.path
                        preload_font_elem.attrib['as'] = "font"
                        preload_font_elem.attrib['crossorigin'] = ""
                        head.insert(1, preload_font_elem)

                if optimiser.enable_lazy_load_front:
                    images = res.cssselect('img:not(.og_not_lazy)')
                    bg_images = res.cssselect(
                        '[style*="background-image"]:not(.optimiser-page-loader):not(.og_not_lazy)'
                    )
                    loading_image = ((optimiser.show_default_image_loading_image or optimiser.loading_image)
                                     and "/optimiser-loading") \
                                    or "/optimiser/static/src/img/empty.png"
                    check_class_regex = re.compile(
                        r"^.*\s*optimiser_lazy(\s+|$)")

                    if not request.httprequest.is_xhr:
                        lazy_loader_style = Element('style')
                        lazy_loader_style.text = 'img[src="/optimiser-loading"]{width:40px!important;height:40px!important;text-align:center;margin:auto;-o-object-fit:contain!important;object-fit:contain!important}'
                        head.insert(len(head), lazy_loader_style)

                    for bg_img in bg_images:
                        bg_style = bg_img.attrib['style']
                        find_bg_image = "background-image:"

                        try:
                            bg_image_index = bg_style.index(find_bg_image)
                        except:
                            continue

                        index_of_bg_image_start = bg_image_index + len(
                            find_bg_image)

                        try:
                            bg_style.index('url', index_of_bg_image_start)
                        except:
                            continue

                        try:
                            index_of_bg_image_end = bg_style.index(
                                ';', index_of_bg_image_start)
                        except:
                            try:
                                bg_style += ';'
                                index_of_bg_image_end = bg_style.index(
                                    ';', index_of_bg_image_start)
                            except:
                                continue

                        important_exists = ''

                        try:
                            important_exists = bg_style[
                                index_of_bg_image_start:
                                index_of_bg_image_end].index('!important')
                        except:
                            pass

                        start_of_string = bg_style[:bg_style.
                                                   index(find_bg_image)]
                        end_of_string = bg_style[index_of_bg_image_end + 1:]

                        if important_exists != '':
                            url_with_important_exist = bg_style[
                                index_of_bg_image_start:
                                index_of_bg_image_end].strip().rstrip(
                                    '!important').strip()
                            important_exists = '!important'
                            main_image_url = url_with_important_exist.strip(
                            ).lstrip('url(').rstrip(')').strip("'").strip('"')
                        else:
                            main_image_url = bg_style[
                                             bg_style.index('url', index_of_bg_image_start) + 3:index_of_bg_image_end] \
                             .strip().lstrip('(').rstrip(')').strip("'").strip('"')

                        bg_img.attrib[
                            'data-src'] = main_image_url if not 'data-src' in bg_img.attrib else bg_img.attrib[
                                'data-src']
                        bg_img.attrib['class'] = bg_img.attrib.get(
                            'class', '') if check_class_regex.match(
                                bg_img.attrib.get(
                                    'class', '')) else bg_img.attrib.get(
                                        'class', '') + ' optimiser_lazy'
                        bg_img.attrib['style'] = "background-image: url('" + \
                                                 loading_image + "')" + important_exists + ";" + \
                                                 start_of_string + \
                                                 end_of_string

                    for img in images:
                        img.attrib['data-src'] = img.attrib[
                            'src'] if not 'data-src' in img.attrib else img.attrib[
                                'data-src']
                        img.attrib['src'] = loading_image
                        img.attrib['class'] = img.attrib.get(
                            'class', '') if check_class_regex.match(
                                img.attrib.get(
                                    'class', '')) else img.attrib.get(
                                        'class', '') + ' optimiser_lazy'

                if optimiser.enable_recaptcha:
                    if optimiser.captcha_selectors:
                        selectors = res.cssselect(','.join(
                            optimiser.captcha_selectors.mapped('name')))

                        if selectors:
                            captcha_element_parent = Element('div')
                            captcha_element_parent.attrib[
                                'class'] = 'form-group field-recaptcha'

                            captcha_element = Element('div')
                            captcha_element.attrib['class'] = 'g-recaptcha'
                            captcha_element.attrib[
                                'data-sitekey'] = optimiser.captcha_site_key

                            captcha_element_parent.insert(0, captcha_element)

                            for element in selectors:
                                insert_element = None

                                for i in reversed(element.getchildren()):
                                    if i.tag == 'div':
                                        insert_element = i
                                        break

                                element.insert(element.index(insert_element),
                                               captcha_element_parent)

                            if not request.httprequest.is_xhr:
                                script_tag_for_recaptcha = Element("script")
                                script_tag_for_recaptcha.attrib[
                                    'src'] = 'https://www.google.com/recaptcha/api.js'
                                script_tag_for_recaptcha.attrib[
                                    'async'] = 'async'
                                script_tag_for_recaptcha.attrib[
                                    'defer'] = 'defer'
                                body.insert(len(body),
                                            script_tag_for_recaptcha)

                doctype = None if '/slides/embed' in request.httprequest.url else '<!DOCTYPE html>'

                res = htmlstring(res, method="html", doctype=doctype)
            else:
                res = res_copy

            if optimiser.compress_html:
                res = htmlmin.minify(res.decode("utf-8"),
                                     remove_empty_space=True,
                                     remove_comments=True)

            try:
                res = res.decode("utf-8")
            except:
                pass

        return res

示例#4

显示文件

    def getSongTag(songId, albumId):
        
        logger.debug("songId : %s" , songId)
        logger.debug("albumId : %s" , albumId)
        allTag = {}

        url = 'https://m.app.melon.com/song/detail.htm?songId='
        url = '%s%s' % (url, urllib.parse.quote(songId))
        
        data = LogicNormal.get_html(url)
        tree = html.fromstring(data)

        #제목
        try:
            h1 = tree.xpath('/html/body/div[1]/article/div[2]/div/h1')[0]
            title = h1.text.strip()
            allTag['title'] = title
        except Exception as e:
            allTag['title'] = ""
        #logger.debug( "제목 : " + title )

        #아티스트
        try:
            artist = ""
            p = tree.xpath('/html/body/div[1]/article/div[2]/div/p')[0]
            artist = p.text.strip()
            allTag['artist'] = artist
        except Exception as e:
            allTag['artist'] = ""
        #logger.debug( "아티스트 : " + artist )

        #장르
        try:
            span = tree.xpath('/html/body/div[1]/article/div[2]/ul/li[1]/span[2]')[0]
            genre = span.text.strip()
            allTag['genre'] = genre
        except Exception as e:
            allTag['genre'] = ""
        #logger.debug( "장르 : " + genre )


        
        url = 'https://m.app.melon.com/album/music.htm?albumId='
        url = '%s%s' % (url, urllib.parse.quote(albumId))
        
        data = LogicNormal.get_html(url)
        tree = html.fromstring(data)

        p = tree.xpath('/html/body/section/div[2]/div[1]/div/div[2]/p[2]')
        #제작년도
        try:
            year = p[0].text[:4]
            allTag['year'] = year
        except Exception as e:
            allTag['year'] = ""
        #logger.debug( "제작년도 : " + year )
        
        #트랙
        try:
            track = "00"
            lis = tree.xpath('/html/body/div[1]/article/div[2]/ul/li')
            
            from lxml.etree import tostring as htmlstring
            logger.debug("lis : %d" ,  len(lis))
            
            if len(lis) == 1:
                p = tree.xpath('/html/body/div[1]/article/div[2]/ul/li/div[2]/div/a/p')[0]
                pHtml = p.text_content().strip()
                pHtml = pHtml.replace('타이틀',"")
                p = pHtml.strip()
                if p == title:
                    div = tree.xpath('/html/body/div[1]/article/div[2]/ul/li/div[1]')[0]
                    track = div.text_content().strip()
                
            else:
                for i in range(0, len(lis)):
                    cnt = i + 1
                    logger.debug("i : %d", i)
                    p = tree.xpath('/html/body/div[1]/article/div[2]/ul/li[%s]/div[2]/div/a/p' % cnt)[0]
                    span = tree.xpath('/html/body/div[1]/article/div[2]/ul/li[%s]/div[2]/div/a/p/span' % cnt)
                    
                    if len(span) == 1:
                        pHtml = p.text_content().strip()
                        pHtml = pHtml.replace('타이틀',"")
                        p = pHtml.strip()
                    else:
                        p = p.text.strip() 
                    
                    if p == title:
                        div = tree.xpath('/html/body/div[1]/article/div[2]/ul/li[%s]/div[1]' % cnt)[0]
                        track = div.text_content().strip()
                
            allTag['track'] = track
        except Exception as e:
            allTag['track'] = ""
        #logger.debug( "트랙 : " + track )
        
        #앨범이미지
        try:
            albumImage = ""
            meta = tree.xpath('/html/head/meta[6]')[0]
            albumImage = meta.attrib.get("content")
            allTag['albumImage'] = albumImage
        except Exception as e:
            allTag['albumImage'] = ""
        #logger.debug( "앨범이미지 : " + albumImage )

        #앨범
        try:
            album = ""
            p = tree.xpath('/html/body/section/div[2]/div[1]/div/div[2]/p[1]')[0]
            album = p.text.strip()
            allTag['album'] = album
        except Exception as e:
            allTag['album'] = ""
        #logger.debug( "앨범 : " + album )

        #가사
        try:
            url = 'https://m.app.melon.com/song/lyrics.htm?songId='
            url = '%s%s' % (url, urllib.parse.quote(songId))
            
            data = LogicNormal.get_html(url)
            tree = html.fromstring(data)
            
            div = tree.xpath('/html/body/div[1]/article/div[2]/div[2]')[0]
            lyrics = htmlstring(div, encoding='utf8')
            lyrics = lyrics.replace('<div class="lyrics">',"")
            lyrics = lyrics.replace("&#13;","")
            lyrics = lyrics.replace("</div>","")
            lyrics = lyrics.replace("<br/>","\n").strip()
            allTag['lyrics'] = lyrics
        except Exception as e:
            allTag['lyrics'] = ""
        #logger.debug( "가사 : " + lyrics )

        return allTag

示例#5

显示文件

def movie_details(url):
    '''
	Extracts movie info from URL
	'''
    try:
        content = requests.get(url, headers={
            'Accept-Encoding': 'identity'
        }).text
        dom = LH.fromstring(content)
        status = "RELEASED"
        rank = -1
        try:
            rank = float(re.findall(r'\d+', url)[0])
        except:
            log_influxdb("COULDNT_RANK")
            rank = -1.0
        try:
            length = re.findall('\(([0-9]+)h ([0-9]+)min\)', content)
            hours = float(length[0][0])
            minutes = float(length[0][1])
        except:
            status = "UNRELEASED"
            hours = minutes = float(-1)
            log_influxdb("UNRELEASED_MOVIES")
        try:
            score_press = float(
                css_select(
                    ".rating-holder .rating-item:nth-child(1) .stareval-note",
                    dom)[0].text.replace(",", "."))
            reviews_press = float(
                css_select(
                    ".rating-holder .rating-item:nth-child(1) .stareval-review",
                    dom)[0].text)
        except:
            score_press = reviews_press = float(-1)
        try:
            score_viewers = float(
                css_select(
                    ".rating-holder .rating-item:nth-child(2) .stareval-note",
                    dom)[0].text.replace(",", "."))
            reviews_viewers = float(
                css_select(
                    '.rating-holder .rating-item:nth-child(2) .stareval-review [itemprop="ratingCount"]',
                    dom)[0].text)
        except:
            score_viewers = reviews_viewers = float(-1)
        try:
            date = to_ascii(css_select(".date.blue-link", dom)[0].text)
        except:
            status = "NO RELEASE DATE"
            date = ""
            log_influxdb("NO RELEASE DATE")
        try:
            synopsis = to_ascii(css_select(".synopsis-txt", dom)[0].text)
        except:
            status = "NO SYNOPSIS"
            synopsis = ""
            log_influxdb("NO SYNOPSIS")

        title = to_ascii(
            css_select(".titlebar-title.titlebar-title-lg", dom)[0].text)
        cover = to_ascii(
            css_select(".card-movie-overview .thumbnail-img",
                       dom)[0].get("src"))
        director = to_ascii(
            css_select('[itemprop="director"] [itemprop="name"]', dom)[0].text)
        genre = [
            to_ascii(result.text)
            for result in css_select('[itemprop="genre"]', dom)
        ]
        nationalities = [
            to_ascii(result.text)
            for result in css_select(".blue-link.nationality", dom)
        ]
        pictures = [
            to_ascii(result.get("data-src"))
            for result in css_select(".shot-img", dom)
        ]
        actors = [
            to_ascii(result.text) for result in css_select(
                ".card-movie-overview .meta-body .meta-body-item:nth-child(3) span.blue-link:not(.more)",
                dom)
        ]
        misc = to_ascii(htmlstring(css_select(".ovw-synopsis-info", dom)[0]))

        try:
            trailer = ALLOCINE_URL_PREFIX + css_select(
                ".trailer", dom)[0].get("href").replace("&amp;", "&")
            trailer = requests.get(trailer,
                                   headers={
                                       'Accept-Encoding': 'identity'
                                   }).text
            trailer_hd = re.findall(
                "([\.\\\/0-9a-zA-Z_]+hd[\\/0-9a-zA-Z_]+\.mp4)", trailer)
            if len(trailer_hd):
                trailer = to_ascii(u'http:' + stripslashes(trailer_hd[0]))
            else:
                trailer = to_ascii(u'http:' + stripslashes(
                    re.findall("([\.\\\/0-9a-zA-Z_]+[^k]\.mp4)", trailer)[0]))
        except:
            trailer = ""
            status = "MISSING TRAILER"
            log_influxdb("FAILED_TRAILERS")

        log_influxdb("MOVIES")
        return [
            status, hours, minutes, title, date, cover, director, genre,
            nationalities, score_press, reviews_press, score_viewers,
            reviews_viewers, pictures, actors, synopsis, misc, trailer, rank
        ]
    except Exception as e:
        log_influxdb("FAILED_MOVIES")
        return []