Exemplo n.º 1
0
def getFormData(comPattern, content, posini):
    if not isinstance(comPattern, CustomRegEx.ExtRegexObject): raise TypeError('Expecting an CustomRegEx.ExtRegexObject')
    match = comPattern.search(content, posini)
    if not match: return None
    posfin = match.end()
    formHtml = match.group()
    formAttr = getAttrDict(formHtml, 0, noTag=False)[1]
    formAttr.pop('*ParamPos*')
    formFields = collections.OrderedDict()
    if formAttr and formAttr.has_key('id'):
        formId = formAttr['id']
        pattern = r'''\$\(['"]<input/>['"]\)\.attr\(\{(?P<input_attr>[^}]+)\}\)\.prependTo\(['"]#%s['"]\)''' % formId
        prependVars = re.findall(pattern, content)
        for avar in prependVars:
            avar = avar.replace(': ', ':').replace(',', '').replace(':', '=')
            avar = '<input ' + avar + ' prepend="">'
            attr = getAttrDict(avar, 0, noTag=True)
            name = attr['name']
            formFields[name] = attr
    pattern = r'(?#<form<__TAG__="input|select|textarea"=tag name=name>*>)'
    for m in CustomRegEx.finditer(pattern, formHtml):
        # tag, name = map(operator.methodcaller('lower'),m.groups())
        tag, name = m.groups()
        p1, p2 = m.span()
        attr = getAttrDict(m.group(), 0, noTag=True)
        attr.pop('*ParamPos*')
        if formFields.get(name, None):
            if 'value' in attr and formFields[name].has_key('value'):
                value = formFields[name]['value']
                if isinstance(value, basestring):
                    value = [value]
                value.append(attr['value'])
                formFields[name]['value'] = value
        else:
            formFields[name] = attr
            if attr.has_key('list'):
                pattern = r'(?#<datalist id="%s"<value=value>*>)' % attr['list']
                attr['value'] = CustomRegEx.findall(pattern, formHtml)
                pass
            elif tag == 'select':
                pattern = r'(?#<option value=value *=&lvalue&>)'
                match = CustomRegEx.findall(pattern, formHtml[p1:p2])
                # attr['value'] = map(operator.itemgetter(0), match)
                # attr['lvalue'] = map(operator.itemgetter(1), match)
                attr['value'], attr['lvalue'] = match.groups()
                pattern = r'(?#<option value=value>)'
                attr['value'] = CustomRegEx.findall(pattern, formHtml[p1:p2])

                pattern = r'(?#<option value=value selected>)'
                try:
                    attr['default'] = CustomRegEx.findall(pattern, formHtml[p1:p2])[0]
                except:
                    attr['default'] = ''
                pass
            elif tag == 'textarea':
                attr['value'] = attr.get('*', '')
                continue
                pass

    return posfin, formAttr, formFields
Exemplo n.º 2
0
def getMenuHeaderFooterOLD(param, args, data, menus):
    htmlUnescape = HTMLParser.HTMLParser().unescape
    menuId = args.get('menu', ['rootmenu'])[0]
    url = args.get("url")[0]
    headerFooter = []
    for k, elem in enumerate(menus):
        opLabel, opregexp = elem
        opdefault, sep, opvalues = opregexp.partition('|')
        opvalues = opvalues or opdefault
        opdefault = opdefault if sep else ''
        pIni, pFin = 0, -1
        if opdefault.startswith('(?#<SPAN>)'):
            pIni, match = -1, CustomRegEx.search(opdefault, data)
            if match: pIni, pFin = match.span(0)
        opmenu = CustomRegEx.findall(opvalues, data[pIni:pFin])
        if not opmenu: continue
        cmpregex = CustomRegEx.compile(opvalues)
        tags = cmpregex.groupindex.keys()
        menuUrl = [elem[tags.index('url')] for elem in opmenu] if len(tags) > 1 else opmenu
        if 'label' in tags:
            menuLabel = map(htmlUnescape, [elem[tags.index('label')] for elem in opmenu])
        else:
            menuLabel = len(menuUrl) * ['Label placeholder']
        if opdefault:
            match = CustomRegEx.search(opdefault, data)
            opdefault = htmlUnescape(match.group(1) if match else '')
        paramDict = dict([(key, value[0]) for key, value in args.items() if hasattr(value, "__getitem__") and key not in ["header", "footer"]])
        paramDict.update({'section':param, 'url':url, param:k, 'menu':menuId, 'menulabel': str(menuLabel), 'menuurl':str(menuUrl)})      
        itemParam = {'isFolder':True, 'label':opLabel + opdefault}
        headerFooter.append([paramDict, itemParam, None])
    return headerFooter
Exemplo n.º 3
0
 def parseClassDefinition(self):
     if not self._classSignature:
         classDoc = ''
         classDoc += '# -*- coding: utf-8 -*-\n"""%s"""\n'
         classDoc += 'from Android import overload\n\n'
         self._classSignature = classDoc
         print(classDoc % self.classUrl, file=self._log)
     else:
         texto = self._getSectionText('')
         pattern = r'(?#<code class="api-signature" *=label>)'
         m = crgx.search(pattern, texto)
         classSignature = m.group('label')
         classSignature = classSignature.replace('\n', '').split(' ')
         classSignature = ' '.join(filter(lambda x: x, classSignature))
         self._classSignature = classSignature
         pos = m.end('label')
         m = crgx.search(r'(?#<p>)', texto[pos:])
         classDoc = m.group()
         classDoc = ' '.join(re.findall('>(.*?)<', classDoc, re.DOTALL))
         classDoc = classDoc.replace('\n', '')
         classDoc = re.sub(r' +', ' ', classDoc)
         classType, className = classSignature.split(' class ')
         indent = '\n    '
         classDoc = self.formatDocString(classDoc, indent, 80)
         if 'abstract' in classType:
             template = '\nclass I{0}(object):{1}{2}__metaclass__ = abc.ABCMeta'
         else:
             template = '\nclass {0}(object):{1}'
         self._classname = className
         print(template.format(className, classDoc, indent), file=self._log)
Exemplo n.º 4
0
def thevideo(videoId, encHeaders=''):
    headers = {
        'User-Agent': DESKTOP_BROWSER,
        'Referer': 'http://thevideo.me/%s' % videoId
    }
    encodeHeaders = urllib.urlencode(headers)
    urlStr = 'http://thevideo.me/%s<headers>%s' % (videoId, encodeHeaders)
    content = basicFunc.openUrl(urlStr)[1]
    pattern = r'''name: '(?P<var1>[^']+)', value: '(?P<var2>[^']+)' \}\).prependTo\(\"#veriform\"\)'''
    formVars = CustomRegEx.findall(pattern, content)
    pattern = r"(?#<form .input<name=var1 value=var2>*>)"
    formVars.extend(CustomRegEx.findall(pattern, content))
    pattern = r"\$\.cookie\(\'(?P<var1>[^']+)\', \'(?P<var2>[^']+)\'"
    cookieval = CustomRegEx.findall(pattern, content)
    qte = urllib.quote
    postdata = '&'.join(
        map(lambda x: '='.join(x),
            [(var1, qte(var2) if var2 else '') for var1, var2 in formVars]))
    headers['Cookie'] = '; '.join(map(lambda x: '='.join(x), cookieval))
    encodeHeaders = urllib.urlencode(headers)
    urlStr = 'http://thevideo.me/%s<post>%s<headers>%s' % (videoId, postdata,
                                                           encodeHeaders)
    content = basicFunc.openUrl(urlStr)[1]
    pattern = r"label: '(?P<res>[^']+)', file: '(?P<url>[^']+)'"
    sources = CustomRegEx.findall(pattern, content)
    res, href = sources.pop()
    return href
    pass
Exemplo n.º 5
0
def getMenuHeaderFooterOLD(param, args, data, menus):
    htmlUnescape = HTMLParser.HTMLParser().unescape
    menuId = args.get('menu', ['rootmenu'])[0]
    url = args.get("url")[0]
    headerFooter = []
    for k, elem in enumerate(menus):
        opLabel, opregexp = elem
        opdefault, sep, opvalues = opregexp.partition('|')
        opvalues = opvalues or opdefault
        opdefault = opdefault if sep else ''
        pIni, pFin = 0, -1
        if opdefault.startswith('(?#<SPAN>)'):
            pIni, match = -1, CustomRegEx.search(opdefault, data)
            if match: pIni, pFin = match.span(0)
        opmenu = CustomRegEx.findall(opvalues, data[pIni:pFin])
        if not opmenu: continue
        cmpregex = CustomRegEx.compile(opvalues)
        tags = cmpregex.groupindex.keys()
        menuUrl = [elem[tags.index('url')] for elem in opmenu] if len(tags) > 1 else opmenu
        if 'label' in tags:
            menuLabel = map(htmlUnescape, [elem[tags.index('label')] for elem in opmenu])
        else:
            menuLabel = len(menuUrl) * ['Label placeholder']
        if opdefault:
            match = CustomRegEx.search(opdefault, data)
            opdefault = htmlUnescape(match.group(1) if match else '')
        paramDict = dict([(key, value[0]) for key, value in args.items() if hasattr(value, "__getitem__") and key not in ["header", "footer"]])
        paramDict.update({'section':param, 'url':url, param:k, 'menu':menuId, 'menulabel': str(menuLabel), 'menuurl':str(menuUrl)})      
        itemParam = {'isFolder':True, 'label':opLabel + opdefault}
        headerFooter.append([paramDict, itemParam, None])
    return headerFooter
 def test_equivNotation(self):
     """
     Notación equivalente utilizando asociatividad que se expresa con las {}
     """
     first = CustomRegEx.compile(
         '(?#<a href span{src=icon *=label} div.id>)', 0)
     scnd = CustomRegEx.compile(
         '(?#<a href span.src=icon span.*=label div.id>)', 0)
     assert ExtCompObjEquality(first, scnd)
 def test_cleanvars(self):
     """
     <a href="http://uno.html">texto</a>
     Html tag con variable implícita href y variable label que recoge el texto una vez se
     eliminan los espacios en el prefijo y el sufijo. Es decir si a.* = \n\r \testo es lo que vale \t\n
     la notación &label& hace que en label se almacene "esto es lo que vale"
     """
     first = CustomRegEx.compile('(?#<a (href) *=label>)', 0)
     scnd = CustomRegEx.compile('(?#<a (href) *=&label&>)', 0)
     assert first.tags['tagpholder']['*'] != scnd.tags['tagpholder']['*']
 def test_tripleAsignation(self):
     """
     Notación equivalente utilizando doble asignación para declarar la variable y el parametro que
     se quiere
     """
     first = CustomRegEx.compile(
         '(?#<ese a.*="http//.+?/prueba" a.*=icon href=url>)', 0)
     scnd = CustomRegEx.compile(
         '(?#<ese a.*="http//.+?/prueba"=icon href=url>)', 0)
     assert ExtCompObjEquality(first, scnd)
 def test_equivNotationII(self):
     """
     Notación equivalente utilizando asociatividad cuando se tienen el mismo tag en 
     varios niveles 
     """
     first = CustomRegEx.compile(
         '(?#<table id td.*=grp1 td[2].b.*=grp2 td[2].a.href=grp2a td[2].a.src=grp2b td[3].*=grp3 td[4].*=grp4>)',
         0)
     scnd = CustomRegEx.compile(
         '(?#<table id td{1.*=grp1 2{b.*=grp2 a{href=grp2a src=grp2b}} 3.*=grp3 4.*=grp4}>)',
         0)
     assert ExtCompObjEquality(first, scnd)
    def test_tag(self):
        answer = CustomRegEx.findall('(?#<span|a *=label>)', self.htmlStr)
        required1 = ['span0', 'bloque1', 'bloque2', 'span3']
        assert answer == required1, 'Obtener texto de tags span o a'

        cmpobj = CustomRegEx.compile('(?#<(span|a) *=label>)')
        answer = cmpobj.groupindex.keys()
        required2 = ['__TAG__', 'label']
        assert answer == required2, 'Al encerrar el tagpattern entre paréntesis el nametag se almacena en la variable __TAG__ '

        answer = cmpobj.findall(self.htmlStr)
        required3 = [('span', 'span0'), ('span', 'bloque1'),
                     ('span', 'bloque2'), ('span', 'span3')]
        assert answer == required3, 'El primer componente de los tuples que conforman answer corresponde al nametag'

        cmpobj = CustomRegEx.compile(
            '(?#<span|a __TAG__=mi_nametag_var *=label>)')
        answer = cmpobj.groupindex.keys()
        required4 = ['mi_nametag_var', 'label']
        assert answer == required4, 'Al utilizar el atributo __TAG__ se puede asignar una variable que contendra el nametag de los tags que cumplen con el pattern buscado'

        answer = cmpobj.findall(self.htmlStr)
        assert answer == required3, 'El resultado es el mismo, cambia solo el nombre de la variable asociada al nametag'

        cmpobj = CustomRegEx.compile('(?#<__TAG__ *="[sb].+?"=label>)')
        answer = cmpobj.findall(self.htmlStr)
        assert answer == required1, 'Al utilizar __TAG__ como tag attribute se hace el tagpattern = "[a-zA-Z][^\s>]*", para con el primer resultado se asigna "[sb].+?" al *'

        cmpobj = CustomRegEx.compile('(?#<(__TAG__) *=".+?"=label>)')
        answer = cmpobj.groupindex.keys()
        assert answer == required2, 'Se puede utiliza (__TAG__) para guardar el nametag en la variable __TAG__'

        cmpobj = CustomRegEx.compile(
            '(?#<__TAG__ __TAG__=mi_nametag_var *=".+?"=label>)')
        answer = cmpobj.groupindex.keys()
        assert answer == required4, 'Se puede utiliza __TAG__=nombrevar para guardar el nametag en una variable con nmbre propio'

        cmpobj = CustomRegEx.compile(
            '(?#<__TAG__ __TAG__=mi_nametag_var *=label>)')
        answer = cmpobj.findall(self.htmlStr)
        required = [('span', 'span0'), ('script', ''), ('bloque', ''),
                    ('span', 'span3')]
        assert answer == required, 'Utilizando __TAG__ como tagpattern'

        cmpobj = CustomRegEx.compile(
            '(?#<__TAG__ __TAG__="span|a"=mi_nametag_var *=label>)')
        answer = cmpobj.findall(self.htmlStr)
        assert answer == required3, 'Utilizando __TAG__="span|a"=mi_nametag_var se redefine el tagpattern a "span|a" y se asigna a la variable mi_nametag_var'

        with pytest.raises(re.error):
            'Entrega error porque se utiliza (__TAG__) como tagpattern y con __TAG__=mi_nametag_var se intenta asignarle a otra variable'
            CustomRegEx.compile(
                '(?#<(__TAG__) __TAG__=mi_nametag_var *=label>)')
Exemplo n.º 11
0
def parseUrlContent(url, data, regexp, compFlags = None, posIni = 0, posFin = 0):
    parseDirect = getParseDirectives(regexp)
    nxtposini = parseDirect.get('NXTPOSINI', 0)
    compFlags = compFlags if compFlags else 0
    pattern = CustomRegEx.compile(regexp, flags = compFlags)
    matchs = []
    while 1:
        match = pattern.search(data, posIni)
        if not match: break
        if posFin != 0 and  match.start(0) > posFin: break
        matchDict = match.groupdict()
        if parseDirect.has_key('SPAN'):
            idGroup = parseDirect['SPAN']
            matchDict['span'] = str((match.start(idGroup), match.end(idGroup)))
        posIni = match.end(nxtposini)
        matchs.append(matchDict)
    
    patternVars = pattern.groupindex.keys()
    url_vars = ['url', 'videoUrl', 'iconImage', 'thumbnailImage']
    for key in set(url_vars).intersection(patternVars):
        for elem in matchs:
            elem[key] = urlparse.urljoin(url, elem[key])
    if matchs and 'label' in patternVars:
        srchKeys = [key for key in patternVars  if key.startswith('label') and key != 'label2']
        srchKeys.sort()
        htmlUnescape = HTMLParser.HTMLParser().unescape
        for k in range(len(matchs)):
            lista = [matchs[k].pop(key) for key in srchKeys]
            labelValue = ' '.join([label for label in lista if label])
            matchs[k]['label'] = htmlUnescape(labelValue)
    return matchs
Exemplo n.º 12
0
def parseUrlContent(url, data, regexp, compFlags = None, posIni = 0, posFin = 0):
    parseDirect = getParseDirectives(regexp)
    nxtposini = parseDirect.get('NXTPOSINI', 0)
    compFlags = compFlags if compFlags else 0
    pattern = CustomRegEx.compile(regexp, flags = compFlags)
    matchs = []
    while 1:
        match = pattern.search(data, posIni)
        if not match: break
        if posFin != 0 and  match.start(0) > posFin: break
        matchDict = match.groupdict()
        if parseDirect.has_key('SPAN'):
            idGroup = parseDirect['SPAN']
            matchDict['span'] = str((match.start(idGroup), match.end(idGroup)))
        posIni = match.end(nxtposini)
        matchs.append(matchDict)
    
    patternVars = pattern.groupindex.keys()
    url_vars = ['url', 'videoUrl', 'iconImage', 'thumbnailImage']
    for key in set(url_vars).intersection(patternVars):
        for elem in matchs:
            elem[key] = urlparse.urljoin(url, elem[key].replace('https:', 'http:'))
    if matchs and 'label' in patternVars:
        srchKeys = [key for key in patternVars  if key.startswith('label') and key != 'label2']
        srchKeys.sort()
        htmlUnescape = HTMLParser.HTMLParser().unescape
        for k in range(len(matchs)):
            lista = [matchs[k].pop(key) for key in srchKeys]
            labelValue = ' '.join([label for label in lista if label])
            matchs[k]['label'] = htmlUnescape(labelValue)
    return matchs
    def test_general(self):
        answer = CustomRegEx.findall('(?#<hijo id="hijo1" *=label>)',
                                     self.htmlStr)
        required = ['primer hijo']
        assert answer == required, 'Comentario y variable independiente'

        answer = CustomRegEx.findall('(?#<hijo id=varid *=label>)',
                                     self.htmlStr)
        required = [('hijo1', 'primer hijo'), ('hijo2', ''),
                    ('hijo3', 'tercer hijo')]
        assert answer == required, 'Utilizando variables para distinguir casos'

        answer = CustomRegEx.findall('(?#<hijo id="hijo[13]"=varid *=label>)',
                                     self.htmlStr)
        required = [('hijo1', 'primer hijo'), ('hijo3', 'tercer hijo')]
        assert answer == required, 'Utilizando variables para distinguir casos'

        answer = CustomRegEx.findall('(?#<hijo exp *=label>)', self.htmlStr)
        required = ['']
        assert answer == required, 'Utilizando atributos requeridos (exp) para distinguir un caso'

        answer = CustomRegEx.findall('(?#<hijo exp .*>)', self.htmlStr)
        required = [('El primer comentario', 'El segundo comentario',
                     'El tercer comentario')]
        assert answer == required, 'Comentarios incluidos en tag'

        with pytest.raises(re.error):
            'Error porque no se pueden utilizar variables cuando se tiene ".*" como variable requerida'
            CustomRegEx.compile('(?#<span class=var1 .*>)')
Exemplo n.º 14
0
def getParseDirectives(regexp):
    rawDir = CustomRegEx.findall(r'\?#<([^>]+)>', regexp)
    fltrDir = {}
    for rawkey in rawDir:
        key = rawkey.upper().strip('0123456789')
        if key in ['SPAN', 'NXTPOSINI']:
            value = int(rawkey[len(key):]) if len(rawkey) != len(key) else 0
            fltrDir[key] = value
    return fltrDir
Exemplo n.º 15
0
def getParseDirectives(regexp):
    rawDir = CustomRegEx.findall(r'\?#<([^>]+)>', regexp)
    fltrDir = {}
    for rawkey in rawDir:
        key = rawkey.upper().strip('0123456789')
        if key in ['SPAN', 'NXTPOSINI']:
            value = int(rawkey[len(key):]) if len(rawkey) != len(key) else 0
            fltrDir[key] = value
    return fltrDir
Exemplo n.º 16
0
def vidto(videoId, headers = None):
    headers = headers or {}
    headers['User-Agent'] = MOBILE_BROWSER
    encodeHeaders = urllib.urlencode(headers) 
    url = 'http://vidto.me/%s.html<headers>%s' % (videoId, encodeHeaders)
    content = basicFunc.openUrl(url)[1]
    pattern = r'(?#<Form method="POST".input<type="hidden" name=name value=value>*>)'
    formVars = CustomRegEx.findall(pattern, content)
    qte = urllib.quote
    postdata = '&'.join(map(lambda x: '='.join(x),[(var1, qte(var2) if var2 else '') for var1, var2 in formVars]))
    urlStr = 'http://vidto.me/%s.html<post>%s<headers>%s' % (videoId, postdata, encodeHeaders)
    content = basicFunc.openUrl(urlStr)[1]
    pattern = r'(?#<a class="player-url" href=url>)'
    sources = CustomRegEx.findall(pattern, content, re.DOTALL)
    href = sources.pop()
    urlStr = '%s|%s' % (href,urllib.urlencode({'User-Agent':MOBILE_BROWSER}))
    return urlStr
    pass
    def test_nzone(self):

        allspan = [
            ('independiente', 'span0'),
            ('bloque1', 'span1'),
            ('bloque1', 'span2'),  #En script
            ('independiente', 'bloque1'),
            ('independiente', 'bloque2'),  #En bloque
            ('bloque2', 'span1'),
            ('bloque2', 'span2'),  #En <!--
            ('independiente', 'span3')
        ]

        answer1 = CustomRegEx.findall('(?#<span class=test *=label>)',
                                      self.htmlStr)
        required = [lista for lista in allspan if lista[0] == 'independiente']
        assert answer1 == required, 'Por default se excluyen Los tags buscados en self.htmlStr contenidos en zonas <!--xxx--> y script'
        answer2 = CustomRegEx.findall(
            '(?#<span class=test *=label __EZONE__="[!--|script]">)',
            self.htmlStr)
        assert answer1 == answer2, 'El resultado por default se obtiene haciendo __NZONE__="[!--|script]" '

        answer = CustomRegEx.findall(
            '(?#<span class=test *=label __EZONE__="">)', self.htmlStr)
        assert answer == allspan, 'Para no tener zonas de exclusi.n se hace __EZONE__=""'

        answer = CustomRegEx.findall(
            '(?#<span class=test *=label __EZONE__="[bloque]">)', self.htmlStr)
        required = [
            lista for lista in allspan if not lista[1].startswith('bloque')
        ]
        assert answer == required, 'Se personaliza la zona de exclusi.n asignando a __NZONE__="xxx|zzz" donde xxx y zzz son tags'

        answer = CustomRegEx.findall(
            '(?#<span class=test *=label __EZONE__="^[!--|script]">)',
            self.htmlStr)
        required = [
            lista for lista in allspan if lista[0].startswith('bloque')
        ]
        assert answer == required, 'Para incluir solo tags buscados en las zonas xxx y zzz se debe hacer __NZONE__="^[xxx|zzz]'

        answer = CustomRegEx.findall('(?#<a href=url *=labe>)', self.htmlStr)
        required = []
        assert answer == required

        answer = CustomRegEx.findall(
            '(?#<a href=url *=label __EZONE__="^[script]">)', self.htmlStr)
        required = [('http://www.eltiempo.com.co', 'El Tiempo')]
        assert answer == required

        answer = CustomRegEx.findall(
            '(?#<a href=url *=label __EZONE__="^[!--]">)', self.htmlStr)
        required = [('http://www.elheraldo.com.co', 'El Heraldo')]
        assert answer == required
 def test_namedvarswithpattern(self):
     """
     <a href="http://uno/dos/tres.html">texto</a>
     Html tag con variables url y label a los que se asigna el valor del atributo href y 
     el texto respectivamente ya que href cumple con el patrón "http://uno/.+?/tres.html"
     """
     actual = CustomRegEx.compile(
         '(?#<a href="http://uno/.+?/tres.html" href=url *=label>)', 0)
     assert actual.varList == [['tagpholder.href', 'url'],
                               ['tagpholder.*', 'label']]
Exemplo n.º 19
0
 def _getStringContent(self, pattern, texto):
     pini = 0
     lista = []
     while True:
         answ = crgx.search(pattern, texto[pini:])
         if not answ: break
         fdef = ''.join(answ.groups()).strip('\n ')
         lista.append(fdef)
         pini += answ.end()
     return lista
Exemplo n.º 20
0
 def _getSectionDelimiters(self, section):
     sections = self._sections
     if self._sections is None:
         content = self.getUrlContent()
         pattern = r'(?#<h[12] class="api.+?">)'
         sections = crgx.findall(pattern, content)
         sections = filter(lambda x: 'Protected' not in x, sections)
         sections.append(u'<!-- end jd-content -->')
         self._sections = sections
     it = itertools.dropwhile(lambda x: section not in x, sections)
     return (it.next(), it.next())
Exemplo n.º 21
0
def vidzi(videoId, headers = None):
    strVal = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    headers = headers or {}
    headers['User-Agent'] = MOBILE_BROWSER
    encodeHeaders = urllib.urlencode(headers) 
    url = 'http://vidzi.tv/%s.html<headers>%s' % (videoId, encodeHeaders)
    content = basicFunc.openUrl(url)[1]
    pattern = r"(?#<script *='eval.+?'=pack>)"
    packed = CustomRegEx.search(pattern, content).group('pack')
    pattern = "}\((?P<tupla>\'.+?\))(?:,0,{})*\)"
    m = re.search(pattern, packed)
    mgrp = m.group(1).rsplit(',', 3)
    patron, base, nTags, lista = mgrp[0], int(mgrp[1]), int(mgrp[2]), eval(mgrp[3])  
    while nTags:
        nTags -= 1
        tag = strVal[nTags] if nTags < base else strVal[nTags/base] + strVal[nTags%base]
        patron = re.sub('\\b' + tag + '\\b', lista[nTags] or tag, patron)
    pattern = 'file:"([^"]+(?:mp4|ed=))"'
    sources = CustomRegEx.findall(pattern,patron)
    return sources.pop()
 def test_implicitvars(self):
     """
     <a href="http://uno.html">texto</a>
     Html tag con variable implícita href y variable label que recoge el texto
     """
     actual = CustomRegEx.compile('(?#<a (href) *=label>)', 0)
     assert (actual.tagPattern, actual.tags, actual.varList) == ('a', {
         'tagpholder': {
             '*': '',
             'href': ''
         }
     }, [['tagpholder.href', 'group1'], ['tagpholder.*', 'label']])
Exemplo n.º 23
0
def vidto(videoId, headers=None):
    headers = headers or {}
    headers['User-Agent'] = MOBILE_BROWSER
    encodeHeaders = urllib.urlencode(headers)
    url = 'http://vidto.me/%s.html<headers>%s' % (videoId, encodeHeaders)
    content = basicFunc.openUrl(url)[1]
    pattern = r'(?#<Form method="POST".input<type="hidden" name=name value=value>*>)'
    formVars = CustomRegEx.findall(pattern, content)
    qte = urllib.quote
    postdata = '&'.join(
        map(lambda x: '='.join(x),
            [(var1, qte(var2) if var2 else '') for var1, var2 in formVars]))
    urlStr = 'http://vidto.me/%s.html<post>%s<headers>%s' % (videoId, postdata,
                                                             encodeHeaders)
    content = basicFunc.openUrl(urlStr)[1]
    pattern = r'(?#<a class="player-url" href=url>)'
    sources = CustomRegEx.findall(pattern, content, re.DOTALL)
    href = sources.pop()
    urlStr = '%s|%s' % (href, urllib.urlencode({'User-Agent': MOBILE_BROWSER}))
    return urlStr
    pass
 def test_namedvars(self):
     """
     <a href="http://uno.html">texto</a>
     Html tag con variables url y label a los que se asigna el valor del atributo href y 
     el texto respectivamente
     """
     actual = CustomRegEx.compile('(?#<a href=url *=label>)', 0)
     assert (actual.tagPattern, actual.tags, actual.varList) == ('a', {
         'tagpholder': {
             '*': '',
             'href': ''
         }
     }, [['tagpholder.href', 'url'], ['tagpholder.*', 'label']])
Exemplo n.º 25
0
 def getMediaCode(self):
     keyValues = set(['url', 'videoUrl', 'videoId'])
     lista = [(elem, self.addonADG.getThreadParam(elem, 'regexp')) for elem in self.addonADG.getChildren('media') if self.addonADG.getThreadAttr(elem, 'type') == 'thread']
     keySet = set()
     for elem in lista:
         cmpregex = CustomRegEx.compile(elem[1])
         regexvars = keyValues.intersection(cmpregex.groupindex.keys())
         keySet.update(regexvars)
     if not keySet: self.ERRORS += 'WARNING: Sources not send any of ' + str(keyValues) + ' to media'  + '\n'
         
     regexp = self.addonADG.getThreadParam('media', 'regexp')
     compflags = self.addonADG.getThreadParam('media', 'compflags')
     return self.parser.handle(ntype.MEDIA, keySet, regexp, compflags)
 def test_getAttrDict2(self):
     htmlStr = """<a href0="el 'tiempo com" href1="el 'tiempo" com' href2='el 'tiempo' com' href3=''el tiempo' com' href4='el 'tiempo com''>"""
     parser = CustomRegEx.ExtRegexParser()
     attrD = parser.getAttrDict(htmlStr)
     assert attrD[
         'href0'] == "el \'tiempo com", "Error comilla interior simple"
     assert attrD[
         'href1'] == "el \'tiempo\" com", "Error comillas interiores mixtas"
     assert attrD[
         'href2'] == "el \'tiempo\' com", "Error comillas interiores"
     assert attrD[
         'href3'] == "\'el tiempo\' com", "Error comillas interiores ajustadas a la izquierda"
     assert attrD[
         'href4'] == "el \'tiempo com\'", "Error comillas interiores ajustadas a la derecha"
Exemplo n.º 27
0
def vidzi(videoId, headers=None):
    strVal = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
    headers = headers or {}
    headers['User-Agent'] = MOBILE_BROWSER
    encodeHeaders = urllib.urlencode(headers)
    url = 'http://vidzi.tv/%s.html<headers>%s' % (videoId, encodeHeaders)
    content = basicFunc.openUrl(url)[1]
    pattern = r"(?#<script *='eval.+?'=pack>)"
    packed = CustomRegEx.search(pattern, content).group('pack')
    pattern = "}\((?P<tupla>\'.+?\))(?:,0,{})*\)"
    m = re.search(pattern, packed)
    mgrp = m.group(1).rsplit(',', 3)
    patron, base, nTags, lista = mgrp[0], int(mgrp[1]), int(mgrp[2]), eval(
        mgrp[3])
    while nTags:
        nTags -= 1
        tag = strVal[nTags] if nTags < base else strVal[nTags /
                                                        base] + strVal[nTags %
                                                                       base]
        patron = re.sub('\\b' + tag + '\\b', lista[nTags] or tag, patron)
    pattern = 'file:"([^"]+(?:mp4|ed=))"'
    sources = CustomRegEx.findall(pattern, patron)
    return sources.pop()
Exemplo n.º 28
0
def getWebData(url, regexPattern, initConf=None, **kwargs):
    yield [PROCESS_MESSAGE, ('Contactando sitio web bvc', ), kwargs]
    if not initConf:
        initConf = r'curl  --user-agent "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36" --cookie-jar "cookies.lwp" --location'
    net = network.network(initConf)
    content, end_url = net.openUrl(url)
    if isinstance(content, Exception):
        raise content
    yield [PROCESS_MESSAGE, ('Pagina web entregada', ), kwargs]

    reg = CustomRegEx.compile(regexPattern)

    response = reg.findall(content)
    yield [PROCESS_DATA, (response, ), kwargs]
Exemplo n.º 29
0
def thevideo(videoId, encHeaders = ''):
    headers = {'User-Agent':DESKTOP_BROWSER, 
               'Referer': 'http://thevideo.me/%s' % videoId}
    encodeHeaders = urllib.urlencode(headers)
    urlStr = 'http://thevideo.me/%s<headers>%s' % (videoId, encodeHeaders)
    content = basicFunc.openUrl(urlStr)[1]
    pattern = r'''name: '(?P<var1>[^']+)', value: '(?P<var2>[^']+)' \}\).prependTo\(\"#veriform\"\)'''
    formVars = CustomRegEx.findall(pattern, content)
    pattern = r"(?#<form .input<name=var1 value=var2>*>)"
    formVars.extend(CustomRegEx.findall(pattern, content))   
    pattern = r"\$\.cookie\(\'(?P<var1>[^']+)\', \'(?P<var2>[^']+)\'"
    cookieval = CustomRegEx.findall(pattern, content)
    qte = urllib.quote
    postdata = '&'.join(map(lambda x: '='.join(x),[(var1, qte(var2) if var2 else '') for var1, var2 in formVars]))
    headers['Cookie'] = '; '.join(map(lambda x: '='.join(x),cookieval))
    encodeHeaders = urllib.urlencode(headers)
    urlStr = 'http://thevideo.me/%s<post>%s<headers>%s' % (videoId, postdata, encodeHeaders)
    content = basicFunc.openUrl(urlStr)[1]
    pattern = r"label: '(?P<res>[^']+)', file: '(?P<url>[^']+)'"
    sources = CustomRegEx.findall(pattern, content)
    res, href = sources.pop()
    return href
    pass
Exemplo n.º 30
0
    def handle_media(self, keySet, regexp, compflags):
        tags = CustomRegEx.compile(regexp).groupindex.keys()
        INDENT = '\n\t'
        mediacode = 'def media():'
        mediacode += INDENT + 'import urlresolver'
        if 'url' in keySet:
            if len(keySet) > 1:
                mediacode += INDENT + 'if args.get("url", None):'
                INDENT += '\t'
            mediacode += INDENT + 'url = args.get("url")[0]'
            regexp = regexp.replace("'", "\\'")
            sep = "'"
            mediacode += INDENT + 'regexp = ' + sep + regexp + sep
            mediacode += INDENT + 'url, data = openUrl(url)'
            mediacode += INDENT + 'compflags =' + compflags
            mediacode += INDENT + 'subMenus = parseUrlContent(url, data, regexp, compflags )'
            if 'videourl' in tags:
                mediacode += INDENT + 'videoUrl = subMenus[0]["videourl"]'
                mediacode += INDENT + 'url = urlresolver.HostedMediaFile(url = videoUrl).resolve()'
            elif 'videoUrl' in tags:
                mediacode += INDENT + 'url = subMenus[0]["videoUrl"]'
        if 'videoUrl' in keySet:
            INDENT = '\n\t'
            if len(keySet) > 1:
                mediacode += INDENT + 'if args.get("videoUrl", None):'
                INDENT += '\t'
            mediacode += INDENT + 'videoUrl = args.get("videoUrl")[0]'
            mediacode += INDENT + 'url = urlresolver.HostedMediaFile(url=videoUrl).resolve()'
        if 'videoId' in keySet:
            INDENT = '\n\t'
            if len(keySet) > 1:
                mediacode += INDENT + 'if args.get("videoId", None):'
                INDENT += '\t'
            mediacode += INDENT + 'videoId = args.get("videoId")[0]'
            mediacode += INDENT + "videoHost = args.get('videoHost')[0]"
            mediacode += INDENT + 'url = urlresolver.HostedMediaFile(host=videoHost,media_id=videoId).resolve()'

        INDENT = '\n\t'

        mediacode += INDENT + 'li = xbmcgui.ListItem(path = url)'
        mediacode += INDENT + 'if args.get("icondef", None): li.setThumbnailImage(args["icondef"][0])'
        mediacode += INDENT + 'if args.get("labeldef", None): li.setLabel(args["labeldef"][0])'
        mediacode += INDENT + "li.setProperty('IsPlayable', 'true')"
        mediacode += INDENT + "li.setProperty('mimetype', 'video/x-msvideo')"
        mediacode += INDENT + "return xbmcplugin.setResolvedUrl(handle=addon_handle,succeeded=True,listitem=li)"
        return mediacode
Exemplo n.º 31
0
 def handle_media(self, keySet, regexp, compflags):
     tags = CustomRegEx.compile(regexp).groupindex.keys()
     INDENT = '\n\t'
     mediacode  = 'def media():'
     mediacode += INDENT + 'import urlresolver'
     if 'url' in keySet:
         if len(keySet) > 1:
             mediacode += INDENT + 'if args.get("url", None):'
             INDENT += '\t'
         mediacode += INDENT + 'url = args.get("url")[0]'
         regexp = regexp.replace("'", "\\'")    
         sep = "'"
         mediacode += INDENT + 'regexp = ' + sep + regexp + sep
         mediacode += INDENT + 'url, data = openUrl(url)'
         mediacode += INDENT + 'compflags ='  + compflags
         mediacode += INDENT + 'subMenus = parseUrlContent(url, data, regexp, compflags )'
         if 'videourl' in tags:
             mediacode += INDENT + 'videoUrl = subMenus[0]["videourl"]'
             mediacode += INDENT + 'url = urlresolver.HostedMediaFile(url = videoUrl).resolve()'
         elif 'videoUrl' in tags:
             mediacode += INDENT + 'url = subMenus[0]["videoUrl"]'
     if 'videoUrl' in keySet:
         INDENT = '\n\t'
         if len(keySet) > 1:
             mediacode += INDENT + 'if args.get("videoUrl", None):'
             INDENT += '\t'
         mediacode += INDENT + 'videoUrl = args.get("videoUrl")[0]'
         mediacode += INDENT + 'url = urlresolver.HostedMediaFile(url=videoUrl).resolve()'
     if 'videoId' in keySet:
         INDENT = '\n\t'
         if len(keySet) > 1:
             mediacode += INDENT + 'if args.get("videoId", None):'
             INDENT += '\t'
         mediacode += INDENT + 'videoId = args.get("videoId")[0]'
         mediacode += INDENT + "videoHost = args.get('videoHost')[0]"
         mediacode += INDENT + 'url = urlresolver.HostedMediaFile(host=videoHost,media_id=videoId).resolve()'
         
     INDENT = '\n\t'
     
     mediacode += INDENT + 'li = xbmcgui.ListItem(path = url)'
     mediacode += INDENT + 'if args.get("icondef", None): li.setThumbnailImage(args["icondef"][0])'
     mediacode += INDENT + 'if args.get("labeldef", None): li.setLabel(args["labeldef"][0])'
     mediacode += INDENT + "li.setProperty('IsPlayable', 'true')"
     mediacode += INDENT + "li.setProperty('mimetype', 'video/x-msvideo')"
     mediacode += INDENT + "return xbmcplugin.setResolvedUrl(handle=addon_handle,succeeded=True,listitem=li)"
     return mediacode    
Exemplo n.º 32
0
    def prepareEqLocals(self, startEq):
        pattern = r'[+*]*(?:start|stop)\("(.+?)"\)[*+]*'
        procIds = CustomRegEx.findall(pattern, startEq.replace(' ', ''))
        locals = dict()
        for id in procIds:
            key2, key1 = '_end%s_' % id, '_beg%s_' % id
            locals[key1] = id in self.activeList
            locals[key2] = False

        functions = dict(
            lt=lambda x, n: self.actProcess(x) < n,
            gt=lambda x, n: self.actProcess(x) > n,
            isact=lambda x: x in self.activeList,
            start=lambda x: locals['_beg%s_' % x],
            stop=lambda x: locals['_beg%s_' % x] and locals['_end%s_' % x])
        locals.update(functions)
        return locals
Exemplo n.º 33
0
    def getMediaCode(self):
        keyValues = set(['url', 'videoUrl', 'videoId'])
        lista = [(elem, self.addonADG.getThreadParam(elem, 'regexp'))
                 for elem in self.addonADG.getChildren('media')
                 if self.addonADG.getThreadAttr(elem, 'type') == 'thread']
        keySet = set()
        for elem in lista:
            cmpregex = CustomRegEx.compile(elem[1])
            regexvars = keyValues.intersection(cmpregex.groupindex.keys())
            keySet.update(regexvars)
        if not keySet:
            self.ERRORS += 'WARNING: Sources not send any of ' + str(
                keyValues) + ' to media' + '\n'

        regexp = self.addonADG.getThreadParam('media', 'regexp')
        compflags = self.addonADG.getThreadParam('media', 'compflags')
        return self.parser.handle(ntype.MEDIA, keySet, regexp, compflags)
 def test_getAttrDict1(self, cycle):
     htmlLst = [
         '<a', 'href0="eltiempo.com"', 'href1="eltiempo.com\'',
         'href2=\'eltiempo.com\'', 'href3=eltiempo.com'
     ]
     htmlLst.append(htmlLst.pop(cycle))
     htmlStr = ' '.join(htmlLst) + '>'
     print htmlStr
     parser = CustomRegEx.ExtRegexParser()
     tag, attrD = parser.getAttrDict(htmlStr, noTag=False)
     attrD.pop('*ParamPos*')
     assert sorted(attrD.keys()) == [
         'href%d' % k for k in range(4)
     ], "Los attributos reportados no corresponde a los reales"
     assert set(attrD.values()) == set(
         ["eltiempo.com"]
     ), "Por lo menos el valor de un attributo reportado no corresponde al real"
Exemplo n.º 35
0
def allmyvideos(videoId, headers = None):
    headers = headers or {}
    headers['User-Agent'] = MOBILE_BROWSER
    encodeHeaders = urllib.urlencode(headers) 
    url = 'http://allmyvideos.net/%s<headers>%s' % (videoId, encodeHeaders)
    content = basicFunc.openUrl(url)[1]
    pattern = r'(?#<form .input<name=name value=value>*>)'
    formVars = CustomRegEx.findall(pattern, content)
    qte = urllib.quote
    postdata = '&'.join(map(lambda x: '='.join(x),[(var1, qte(var2) if var2 else '') for var1, var2 in formVars]))
    urlStr = 'http://allmyvideos.net/%s<post>%s<headers>%s' % (videoId, postdata, encodeHeaders)
    content = basicFunc.openUrl(urlStr)[1]
    pattern = r'"file" : "(?P<url>[^"]+)".+?"label" : "(?P<label>[^"]+)"'
    sources = re.findall(pattern, content, re.DOTALL)
    href, res = sources.pop()
    urlStr = '%s|%s' % (href,urllib.urlencode({'User-Agent':MOBILE_BROWSER}))
    return urlStr
    pass
Exemplo n.º 36
0
 def HTMLstruct(self):
     tagSpan = self.regexpEd.getSelRange('actMatch')
     content = self.regexpEd.getContent(*tagSpan)
     try:
         htmlParse = CustomRegEx.ExtRegexParser({},
                                                []).htmlStruct(content, 0)
     except:
         equis = 'Not HTML conform'
         tkMessageBox.showinfo('Actual match HTMLstruct', equis)
     else:
         fmt = '{:<20} {:<40}'.format
         #             equis = '\n'.join([fmt(x[0].count('.')*'  ' + '*' + x[0].rpartition('.')[2],x[1][:40]) for x in htmlParse])
         equis = [
             fmt(x[0].count('.') * '  ' + '*' + x[0].rpartition('.')[2],
                 x[1][:40]) for x in htmlParse
         ]
         from xbmcgui import Dialog
         k = Dialog().select('Actual match HTMLstruct', equis)
Exemplo n.º 37
0
def parsingUrlData(url, regexPattern, initConf=None, **kwargs):
    yield [PROCESS_MESSAGE, ('Contactando sitio web bvc', ), kwargs]
    if not initConf:
        initConf = r'curl  --user-agent "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36" --cookie-jar "cookies.lwp" --location'
    net = network.network(initConf)
    content, end_url = net.openUrl(url)
    yield [PROCESS_MESSAGE, ('Pagina weg entregada', ), kwargs]

    reg = CustomRegEx.compile(regexPattern)

    k = 0
    pos = baseIndex = 0
    while True:
        match = reg.search(content, pos)
        if not match:
            break
        k += 1
        pos = match.end(0)
        yield [PROCESS_DATA, (k, match, baseIndex), kwargs]
Exemplo n.º 38
0
def allmyvideos(videoId, headers=None):
    headers = headers or {}
    headers['User-Agent'] = MOBILE_BROWSER
    encodeHeaders = urllib.urlencode(headers)
    url = 'http://allmyvideos.net/%s<headers>%s' % (videoId, encodeHeaders)
    content = basicFunc.openUrl(url)[1]
    pattern = r'(?#<form .input<name=name value=value>*>)'
    formVars = CustomRegEx.findall(pattern, content)
    qte = urllib.quote
    postdata = '&'.join(
        map(lambda x: '='.join(x),
            [(var1, qte(var2) if var2 else '') for var1, var2 in formVars]))
    urlStr = 'http://allmyvideos.net/%s<post>%s<headers>%s' % (
        videoId, postdata, encodeHeaders)
    content = basicFunc.openUrl(urlStr)[1]
    pattern = r'"file" : "(?P<url>[^"]+)".+?"label" : "(?P<label>[^"]+)"'
    sources = re.findall(pattern, content, re.DOTALL)
    href, res = sources.pop()
    urlStr = '%s|%s' % (href, urllib.urlencode({'User-Agent': MOBILE_BROWSER}))
    return urlStr
    pass
 def test_getAttrDict0(self, cycle):
     htmlLst = [
         '<a', 'href2=eltiempo.com', 'href1', 'href0=\"eltiempo.com\"'
     ]
     htmlLst.append(htmlLst.pop(cycle))
     htmlStr = ' '.join(htmlLst) + '>'
     print htmlStr
     parser = CustomRegEx.ExtRegexParser()
     htmlStr = 18 * ' ' + htmlStr
     tag, attrD = parser.getAttrDict(htmlStr, offset=18, noTag=False)
     attrP = attrD.pop('*ParamPos*')
     assert tag == 'a', "El tag reportado no corresponde al tag real"
     assert len(attrD) == 3
     assert attrD[
         'href0'] == "eltiempo.com", "Error valor de attributo normal"
     assert attrD['href1'] == "", "Error atributo sin valor"
     assert attrD[
         'href2'] == "eltiempo.com", "Error valor de attributo sin comillas"
     getSlice = htmlStr.__getslice__
     print '***' + htmlStr + '***'
     for k in attrD:
         print k, attrD[k], getSlice(*attrP[k]), attrP[k]
     assert all([attrD[k] == getSlice(*attrP[k]) for k in attrD])
Exemplo n.º 40
0
def getFormXmlStr(content):
    form_xml ='<?xml version="1.0" encoding="utf-8" standalone="yes"?>\n<settings>\n'
    pattern = r'(?#<form>)'
    comPattern = CustomRegEx.compile(pattern)
    k = 0
    posIni = 0
    while True:
        formData = getFormData(comPattern, content, posIni)
        if not formData: break
        posIni, formAttr, formFields = formData
        formAttr = dict([(key, escapeXml(value)) for key, value in formAttr.items()])
        form_xml += '\t<category label="Form %s">\n' % (k + 1)
        if formAttr:
            form_xml += '\t\t<setting type="lsep" label ="Form attributes"/>\n'
            for name, value in sorted(formAttr.items()):
                form_xml += '\t\t<setting id="fa_{0}" type="text" label="{0}" default="{1}" enable="false"/>\n'.format(name, value)
        bFlag = 0
        for key in formFields:
            if formFields[key].has_key('prepend'):
                if bFlag == 0:
                    bFlag = 1
                    form_xml += '\t\t<setting type="lsep" label ="Form Prepend Vars"/>\n'
            else:
                if bFlag < 2:
                    bFlag = 2
                    form_xml += '\t\t<setting type="lsep" label ="Form Vars"/>\n'
            if isinstance(formFields[key].get('value', ''), basestring):
                formFields[key].update([(fkey, escapeXml(formFields[key][fkey])) for fkey in ['name', 'value', 'checked'] if formFields[key].has_key(fkey)])
                atype = formFields[key].get('type', '')
                if atype == 'hidden':
                    felem = '<setting id="{name}" type="text" label="{name}" default="{value}" enable="false"/>\n'
                    pass
                elif atype in ['radio', 'checkbox']:
                    formFields[key]['checked'] = 'true' if formFields[key].has_key('checked') else 'false'
                    felem = '<setting id="{name}" type="bool" label ="{name}" default="{checked}"/>\n'
                    pass
                elif atype == 'text':
                    formFields[key]['value'] = formFields[key].get('value', '')
                    felem = '<setting id="{name}" type="text" label="{name}" default="{value}"/>\n'
                elif atype == 'submit':
                    felem = '<setting type="lsep" label ="{value}" noline="true"/>\n'
                elif atype == 'file':
                    formFields[key]['defaultValue'] = formFields[key].get('defaultValue', '')
                    felem = '<setting id="if_{name}" type="file" label="{name}" default="{defaultValue}"/>'
                else:
                    formFields[key]['value'] = formFields[key].get('value', '')
                    felem = '<setting id="{name}" type="text" label="{name}" default="{value}"/>\n'
            else:
                toEscape = ['name', 'value', 'default']
                formFields[key]['value'] = '|'.join(formFields[key]['value'])
                if formFields[key].has_key('lvalue'):
                    formFields[key]['lvalue'] = '|'.join(formFields[key]['lvalue'])
                    toEscape.append('lvalue')
                formFields[key]['default'] = formFields[key].get('default', '')
                formFields[key].update([(fkey, escapeXml(formFields[key][fkey])) for fkey in toEscape])
                if formFields[key].has_key('lvalue'):
                    felem = '<setting id="{name}" type="drpdwnlst" label="{name}" lvalues="{lvalue}" values="{value}" default="{default}"/>\n'
                else:
                    felem = '<setting id="{name}" type="labelenum" label="{name}" lvalues="{value}" default="{default}"/>\n'
            form_xml += '\t\t' + felem.format(**formFields[key])
        form_xml += '\t</category>\n'
        k += 1
    form_xml += '</settings>\n'
    return form_xml
Exemplo n.º 41
0
    def openloadORIG(videoId, encHeaders = ''):
        headers = {'User-Agent':MOBILE_BROWSER}
        encodeHeaders = urllib.urlencode(headers)
        urlStr = 'https://openload.co/embed/%s/<headers>%s' % (videoId, encodeHeaders)
        content = basicFunc.openUrl(urlStr)[1]    
        varTags = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
        pattern = r'(?#<video script.*=puzzle>)'
        puzzle = CustomRegEx.findall(pattern, content)[0]
        vars = sorted(set(re.findall(r'\(([^=)(]+)\) *=', puzzle)))
        keys1 = re.findall(r', *(?P<key>[^: ]+) *:', puzzle)
        keys2 = re.findall(r"\(゚Д゚\) *\[[^']+\] *=", puzzle)
        keys = sorted(set(keys1 + keys2))
        totVars = vars + keys
        for k in range(len(vars)):
            puzzle = puzzle.replace(vars[k], varTags[k])
        for k in range(len(keys)):
            puzzle = puzzle.replace(keys[k], varTags[-k - 1])
    #     puzzle = puzzle.replace('\xef\xbe\x89'.decode('utf-8'), '').replace(' ','')
        puzzle = re.sub(r'[ \x80-\xff]','',puzzle)
        pat_dicId = r'\(([A-Z])\)={'
        m = re.search(pat_dicId, puzzle)
        assert m, 'No se encontro Id del diccionario'
        dicId = m.group(1)
    #     pat_obj = r"\(\(%s\)\+\\'_\\'\)" % dicId
        dic_pat1 = r"\(\(%s\)\+\'_\'\)" % dicId
        dic_pat2 = r"\(%s\+([^+)]+)\)" % dicId
        dic_pat3 = r"\(%s\)\.(.+?)\b" % dicId
        dic_pat4 = r"(?<=[{,])([^: ]+)(?=:)"
        
        puzzle = re.sub(dic_pat1, "'[object object]_'", puzzle)
        puzzle = re.sub(dic_pat2, lambda x: "('[object object]'+str(%s))" % x.group(1), puzzle)
        puzzle = re.sub(dic_pat3, lambda x: "(%s)['%s']" % (dicId, x.group(1)), puzzle)
        puzzle = re.sub(dic_pat4, lambda x: "'%s'" % x.group(1), puzzle)
    
        pat_str1 = r"\((\(.+?\)|[A-Z])\+\'_\'\)"
        pat_str2 = r"\([^()]+\)\[[A-Z]\]\[[A-Z]\]"
        puzzle = re.sub(pat_str1, lambda x: "(str(%s)+'_')" % x.group(1), puzzle)
        puzzle = re.sub(pat_str2, "'function'", puzzle)
    
        codeGlb = {}    
        code = puzzle.split(';')
        code.pop()
        code[0] = code[0][:2] + "'undefined'"
#         for k, linea in enumerate(code[:-1]):
#             try:
#                 exec(linea, codeGlb)
#             except:
#                 print 'Linea %s con errores ' % k, linea
#                 code[k] = linea.split('=')[0] + '=' + "'\\\\'"
#                 print 'Se corrige como ', code[k]
#                 exec(code[k], codeGlb)
        
        linea = code[-1]
        linea = re.sub(r"\(([A-Z]+)\)", lambda x: x.group(1), linea)
        linea = re.sub(r"\([oc]\^_\^o\)", lambda x: "%s" % eval(x.group(), codeGlb), linea)
        while re.search(r"\([^)\]'\[(]+\)", linea):        
            linea = re.sub(r"\([^)\]'\[(]+\)", lambda x: "%s" % eval(x.group(), codeGlb), linea)
        linea = re.sub(r"[A-Z](?=[^\]\[])", lambda x: "%s" % eval(x.group(), codeGlb), linea)
        linea = re.sub(r"E\[[\'_A-Z]+\]", lambda x: "%s" % eval(x.group(), codeGlb), linea)
        linea = linea.replace('+', '')
        linea = linea.decode('unicode-escape')
        m = re.search(r'http.+?true', linea)
        urlStr = '%s|%s' % (m.group(),encodeHeaders)
        return urlStr
Exemplo n.º 42
0
    def handle_apimenu(self, nodeId, menuId, paramDict, menuIcons, searchFlag, spanFlag):
        from basicFunc import INFOLABELS_KEYS
        otherParam = {}
        for key in paramDict.keys():
            if not key.startswith('op_'): continue
            modKey = key[3:]
            otherParam[modKey] = paramDict.pop(key)
        addonInfoKeys = [key for key in otherParam if key.startswith('addonInfo')]
        if len(addonInfoKeys) > 1:
            addonInfo = {}
            for key in addonInfoKeys:
                value = otherParam.pop(key)
                key, value =  value.rpartition('<>')[0:3:2]
                if key: addonInfo[key] = value
                else: addonInfoDef = value
        addonInfoFlag = paramDict.has_key('regexp') 
        if addonInfoFlag: regexp = paramDict.pop('regexp')
        INDENT = '\n\t'        
        sourceCode = 'def ' + nodeId + '():'
        if regexp.find('?#<PASS>') != -1: sourceCode += '\n\t'+ 'global args'
        if paramDict.get('url', None):paramDict.pop('url')
        sourceCode += '\n\t'+ 'url = args.get("url")[0]'
        suffix = ')'
        if menuId:
            if spanFlag:
                sourceCode += '\n\t'+ 'limInf, limSup = eval(args.get("span", ["(0,0)"])[0])'
                suffix = ', posIni = limInf, posFin = limSup)'
        spanFlag = False
        if addonInfoFlag:
            spanFlag = regexp.find('?#<SPAN') != -1 
            regexp = regexp.replace("'", "\\'")    
            sep = "'"
            sourceCode += '\n\t'+ 'regexp = r' + sep + regexp + sep
            sourceCode += '\n\t'+ 'url, data = openUrl(url)'
            if paramDict.get('compflags', None):
                sourceCode += '\n\t'+ 'compflags = ' + paramDict.pop('compflags')
                sourceCode += '\n\t'+ 'subMenus = parseUrlContent(url, data, regexp, compflags' + suffix
            else:
                sourceCode += '\n\t'+ 'subMenus = parseUrlContent(url, data, regexp' + suffix
            tags = CustomRegEx.compile(regexp).groupindex.keys()
            addonInfoFlag = any(map(lambda x: x in INFOLABELS_KEYS, tags))
            
        if regexp.find('?#<PASS>') != -1:
            sourceCode += '\n\t'+ "args = dict((key, [value]) for key, value in subMenus[0].items())"
#             if spanFlag: sourceCode += '\n\t'+ "args['span'] = [str(subMenus[0]['span'])]"
            sourceCode += '\n\t'+ "return " +  str(paramDict['menu']) + "()"
            return sourceCode

        if menuIcons:
            iconList = '["' + '", "'.join(menuIcons) + '"]'
            sourceCode += '\n\t' + 'iconList = ' + iconList
            sourceCode += '\n\t' + 'for k in range(len(subMenus)):'
            sourceCode += '\n\t\t' + 'kmod = min(k, len(iconList) - 1)' 
            sourceCode += '\n\t\t' + 'subMenus[k]["iconImage"] = os.path.join(_media, iconList[kmod])'
        
        contextMenuFlag = paramDict.has_key('contextmenus')
        if contextMenuFlag:
            contextMenu = [tuple(elem.split(',')) for elem in paramDict.pop('contextmenus').split('|')]
            onlyContext = paramDict.pop('onlycontext') if paramDict.has_key('onlycontext') else False
            sourceCode += '\n\t'+ 'contextMenu = {"lista":' + str(contextMenu) + ', "replaceItems":' + str(onlyContext) + '}' 
        
        if len(addonInfoKeys) > 1:
            sourceCode += '\n\t'+ 'addonInfo=' + str(addonInfo)
            
        sourceCode += '\n\t'+ 'menuContent = []'
        sourceCode += '\n\t'+ 'for elem in subMenus:'
        sourceCode += '\n\t\t'+ 'itemParam = dict([(key,elem.pop(key)) for key  in elem.keys() if key in LISTITEM_KEYS])'
        isFolder = str(paramDict['menu'] != 'media') if paramDict.has_key('menu') else 'True'
        sourceCode += '\n\t\t'+ 'itemParam["isFolder"] = ' + isFolder
        sourceCode += '\n\t\t'+ 'otherParam = ' + str(otherParam)
        if len(addonInfoKeys) > 1:
            sourceCode += '\n\t\t'+ 'otherParam["addonInfo"] = addonInfo.get(menu, "%s")' % addonInfoDef 
        if contextMenuFlag:
            sourceCode += '\n\t\t'+ 'otherParam["contextMenu"] = dict(contextMenu)'
        if addonInfoFlag:
            sourceCode += '\n\t\t'+ 'otherParam["addonInfo"] = dict([(key,elem.pop(key)) for key  in elem.keys() if key in INFOLABELS_KEYS])'
        if regexp.find('videoUrl') == -1:
            sourceCode += '\n\t\t'+ 'paramDict = dict([(key, value[0]) for key, value in args.items() if hasattr(value, "__getitem__") and key not in ["header", "footer"]])'
        else:
            sourceCode += '\n\t\t'+ 'paramDict = dict([(key, value[0]) for key, value in args.items() if hasattr(value, "__getitem__") and key not in ["url", "header", "footer"]])'
        sourceCode += '\n\t\t'+ 'paramDict.update(' + str({ key:value for key, value in paramDict.items() if key not in ['header','headregexp','nextregexp', 'iconflag', 'iconimage']}) + ')'
#         sourceCode += '\n\t\t'+ 'paramDict = ' + str({ key:value for key, value in paramDict.items() if key not in ['nextregexp', 'iconflag', 'iconimage']})
        sourceCode += '\n\t\t'+ 'paramDict.update(elem)'
        if spanFlag: sourceCode += '\n\t\t'+ 'paramDict["url"] = url'
        sourceCode += '\n\t\t'+ 'menuContent.append([paramDict, itemParam, otherParam])'
        sourceCode += '\n\t'+ 'return menuContent'
        if searchFlag: sourceCode += ' or EMPTYCONTENT' 
        return sourceCode
Exemplo n.º 43
0
def getMenuHeaderFooter(param, args, data, menus):
    htmlUnescape = HTMLParser.HTMLParser().unescape
    menuId = args.get('menu', ['rootmenu'])[0]
    url = args.get("url")[0]
    headerFooter = []
    for k, elem in enumerate(menus):
        opLabel, opregexp = elem
        opdefault, sep, opvalues = opregexp.partition('|')
        opvalues = opvalues or opdefault
        opdefault = opdefault if sep else ''
        pIni, pFin = 0, -1
        if opdefault.startswith('(?#<SPAN>)'):
            pIni, match = -1, CustomRegEx.search(opdefault, data)
            if match: pIni, pFin = match.span(0)
        opmenu = CustomRegEx.findall(opvalues, data[pIni:pFin])
        if not opmenu: continue
        tags = CustomRegEx.compile(opvalues).groupindex.keys()
        if 'url' in tags:
            menuUrl = [elem[tags.index('url')] for elem in opmenu] if len(tags) > 1 else opmenu[0]
        if 'label' in tags:
            menuLabel = map(htmlUnescape, [elem[tags.index('label')] for elem in opmenu])
        else:
            placeHolder = 'Next >>>' if param == 'footer' else 'Header >>>'
            menuLabel = len(menuUrl)*[placeHolder]
        if len(opmenu) == 1: opLabel = menuLabel[0]
        if 'varvalue' in tags: 
            varValue = [elem[tags.index('varvalue')] for elem in opmenu] if len(tags) > 1 else opmenu

        if opdefault:
            cmpregex = CustomRegEx.compile(opdefault)
            tags = cmpregex.groupindex.keys()
            match = cmpregex.search(data)
            if tags:
                if 'label' in tags:
                    opdefault = htmlUnescape(match.group(1) if match else '')
                elif 'defvalue' in tags:
                    opdefault = htmlUnescape(match.group('defvalue'))
                elif 'varname' in tags:
                    varName = match.group('varname')
                    urlquery = urlparse.urlsplit(url).query
                    queryDict = dict(urlparse.parse_qsl(urlquery))
                    opdefault = queryDict.get(varName, '')
                    try:
                        indx = varValue.index(opdefault)
                    except:
                        opdefault = ''
                    else:
                        opdefault = menuLabel[indx]
                    menuUrl = []
                    for elem in varValue:
                        queryDict[varName] = elem
                        menuUrl.append('?' + urllib.urlencode(queryDict))
                
        paramDict = dict([(key, value[0]) for key, value in args.items() if hasattr(value, "__getitem__") and key not in ["header", "footer"]])
        paramDict.update({'section':param, 'url':url, param:k, 'menu':menuId})
        paramDict['menulabel'] = base64.urlsafe_b64encode(str(menuLabel))
        paramDict['menuurl'] = base64.urlsafe_b64encode(str(menuUrl))
        label = '[COLOR yellow]' + opLabel + opdefault + '[/COLOR]'
        itemParam = {'isFolder':True, 'label':label}
        headerFooter.append([paramDict, itemParam, None])
    return headerFooter
Exemplo n.º 44
0
def getFormXmlStr(content):
    form_xml = '<?xml version="1.0" encoding="utf-8" standalone="yes"?>\n<settings>\n'
    pattern = r'(?#<form>)'
    comPattern = CustomRegEx.compile(pattern)
    k = 0
    posIni = 0
    while True:
        formData = getFormData(comPattern, content, posIni)
        if not formData: break
        posIni, formAttr, formFields = formData
        formAttr = dict([(key, escapeXml(value))
                         for key, value in formAttr.items()])
        form_xml += '\t<category label="Form %s">\n' % (k + 1)
        if formAttr:
            form_xml += '\t\t<setting type="lsep" label ="Form attributes"/>\n'
            for name, value in sorted(formAttr.items()):
                form_xml += '\t\t<setting id="fa_{0}" type="text" label="{0}" default="{1}" enable="false"/>\n'.format(
                    name, value)
        bFlag = 0
        for key in formFields:
            if formFields[key].has_key('prepend'):
                if bFlag == 0:
                    bFlag = 1
                    form_xml += '\t\t<setting type="lsep" label ="Form Prepend Vars"/>\n'
            else:
                if bFlag < 2:
                    bFlag = 2
                    form_xml += '\t\t<setting type="lsep" label ="Form Vars"/>\n'
            if isinstance(formFields[key].get('value', ''), basestring):
                formFields[key].update([
                    (fkey, escapeXml(formFields[key][fkey]))
                    for fkey in ['name', 'value', 'checked']
                    if formFields[key].has_key(fkey)
                ])
                atype = formFields[key].get('type', '')
                if atype == 'hidden':
                    felem = '<setting id="{name}" type="text" label="{name}" default="{value}" enable="false"/>\n'
                    pass
                elif atype in ['radio', 'checkbox']:
                    formFields[key]['checked'] = 'true' if formFields[
                        key].has_key('checked') else 'false'
                    felem = '<setting id="{name}" type="bool" label ="{name}" default="{checked}"/>\n'
                    pass
                elif atype == 'text':
                    formFields[key]['value'] = formFields[key].get('value', '')
                    felem = '<setting id="{name}" type="text" label="{name}" default="{value}"/>\n'
                elif atype == 'submit':
                    felem = '<setting type="lsep" label ="{value}" noline="true"/>\n'
                elif atype == 'file':
                    formFields[key]['defaultValue'] = formFields[key].get(
                        'defaultValue', '')
                    felem = '<setting id="if_{name}" type="file" label="{name}" default="{defaultValue}"/>'
                else:
                    formFields[key]['value'] = formFields[key].get('value', '')
                    felem = '<setting id="{name}" type="text" label="{name}" default="{value}"/>\n'
            else:
                toEscape = ['name', 'value', 'default']
                formFields[key]['value'] = '|'.join(formFields[key]['value'])
                if formFields[key].has_key('lvalue'):
                    formFields[key]['lvalue'] = '|'.join(
                        formFields[key]['lvalue'])
                    toEscape.append('lvalue')
                formFields[key]['default'] = formFields[key].get('default', '')
                formFields[key].update([(fkey,
                                         escapeXml(formFields[key][fkey]))
                                        for fkey in toEscape])
                if formFields[key].has_key('lvalue'):
                    felem = '<setting id="{name}" type="drpdwnlst" label="{name}" lvalues="{lvalue}" values="{value}" default="{default}"/>\n'
                else:
                    felem = '<setting id="{name}" type="labelenum" label="{name}" lvalues="{value}" default="{default}"/>\n'
            form_xml += '\t\t' + felem.format(**formFields[key])
        form_xml += '\t</category>\n'
        k += 1
    form_xml += '</settings>\n'
    return form_xml
Exemplo n.º 45
0
def openload(videoId, headers = None):
    headers = headers or {}
    headers['User-Agent'] = MOBILE_BROWSER
    encodeHeaders = urllib.urlencode(headers)
    urlStr = 'https://openload.co/embed/%s/<headers>%s' % (videoId, encodeHeaders)
    content = basicFunc.openUrl(urlStr)[1]    
    varTags = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    pattern = r'(?#<video script.*=puzzle>)'
    puzzle = CustomRegEx.findall(pattern, content)[0]
    vars = sorted(set(re.findall(r'\(([^=)(]+)\) *=', puzzle)))
    keys1 = re.findall(r', *(?P<key>[^: ]+) *:', puzzle)
    keys2 = re.findall(r"\(゚Д゚\) *\[[^']+\] *=", puzzle)
    keys = sorted(set(keys1 + keys2))
    totVars = vars + keys
    for k in range(len(vars)):
        puzzle = puzzle.replace(vars[k], varTags[k])
    for k in range(len(keys)):
        puzzle = puzzle.replace(keys[k], varTags[-k - 1])
#     puzzle = puzzle.replace('\xef\xbe\x89'.decode('utf-8'), '').replace(' ','')
    puzzle = re.sub(r'[ \x80-\xff]','',puzzle)
    pat_dicId = r'\(([A-Z])\)={'
    m = re.search(pat_dicId, puzzle)
    assert m, 'No se encontro Id del diccionario'
    dicId = m.group(1)
#     pat_obj = r"\(\(%s\)\+\\'_\\'\)" % dicId
    dic_pat1 = r"\(\(%s\)\+\'_\'\)" % dicId
    dic_pat2 = r"\(%s\+([^+)]+)\)" % dicId
    dic_pat3 = r"\(%s\)\.(.+?)\b" % dicId
    dic_pat4 = r"(?<=[{,])([^: ]+)(?=:)"
    
    puzzle = re.sub(dic_pat1, "'[object object]_'", puzzle)
    puzzle = re.sub(dic_pat2, lambda x: "('[object object]'+str((%s)))" % x.group(1), puzzle)
    puzzle = re.sub(dic_pat3, lambda x: "(%s)['%s']" % (dicId, x.group(1)), puzzle)
    puzzle = re.sub(dic_pat4, lambda x: "'%s'" % x.group(1), puzzle)

    pat_str1 = r"\((\(.+?\)|[A-Z])\+\'_\'\)"
    pat_str2 = r"\([^()]+\)\[[A-Z]\]\[[A-Z]\]"
    pat_str3 = r"(?<=;)([^+]+)\+=([^;]+)"
    puzzle = re.sub(pat_str1, lambda x: "(str((%s))+'_')" % x.group(1), puzzle)
    puzzle = re.sub(pat_str2, "'function'", puzzle)
    puzzle = re.sub(pat_str3, lambda x: "%s=%s+%s" % (x.group(1), x.group(1), x.group(2)), puzzle)

    codeGlb = {}    
    code = puzzle.split(';')
    code.pop()
    code[0] = code[0][:2] + "'undefined'"
    for linea in code[:-1]:
        linea = re.sub(r"\(([A-Z]+)\)", lambda x: x.group(1), linea)
        varss = re.split(r"(?<=[_a-zA-Z\]])=(?=[^=])",linea)
        value = eval(varss.pop(), codeGlb)
        for var in varss:
            m = re.match(r"([^\[]+)\[([^\]]+)\]", var)
            if m:
                var, key = m.groups()
                key = eval(key, codeGlb)
                codeGlb[var][key] = value
            else:
                codeGlb[var] = value
        
    
    linea = code[-1]
    linea = re.sub(r"\(([A-Z]+)\)", lambda x: x.group(1), linea)
    linea = re.sub(r"\([oc]\^_\^o\)", lambda x: "%s" % eval(x.group(), codeGlb), linea)
    while re.search(r"\([^)\]'\[(]+\)", linea):        
        linea = re.sub(r"\([^)\]'\[(]+\)", lambda x: "%s" % eval(x.group(), codeGlb), linea)
    linea = re.sub(r"[A-Z](?=[^\]\[])", lambda x: "%s" % eval(x.group(), codeGlb), linea)
    linea = re.sub(r"E\[[\'_A-Z]+\]", lambda x: "%s" % eval(x.group(), codeGlb), linea)
    linea = linea.replace('+', '')
    linea = linea.decode('unicode-escape')
    m = re.search(r'http.+?true', linea)
    urlStr = basicFunc.openUrl(m.group(), True)
    urlStr = '%s|%s' % (m.group(),urllib.urlencode({'User-Agent':MOBILE_BROWSER}))
    return urlStr