Пример #1
0
def jinni_search(query):
    logging.info(u'Doing a normal search for "{0}"'.format(query))
    
    # File "/usr/lib/python2.6/urllib.py", line 1269, in urlencode
    #  v = quote_plus(str(v))
    # UnicodeEncodeError: 'ascii' codec can't encode character u'\xe9' in position 1: ordinal not in range(128)
    #
    # See: http://mail.python.org/pipermail/baypiggies/2007-April/002102.html
    url = "http://www.jinni.com/discovery.html?{0}".format(urllib.urlencode({
        "query": query.encode("utf-8")
    }))
    
    request = urllib2.Request(url)
    response = open_url(request)
    content = response.read()
    document = lxml.html.soupparser.fromstring(content)
    
    # Find the script tag that contains the search results and parse it
    try:
        script_text = [script.text for script in document.xpath('//script[not(@src)]') 
            if "obj_collageEntry" in script.text][0]
        # PyNarcissus doesn't handle unicode properly:
        # 
        # File "jsparser.py", line 197, in __init__
        #   self.source = str(s)
        # UnicodeEncodeError: 'ascii' codec can't encode characters in position 31704-31706: ordinal not in range(128)
        # 
        # So encoding to UTF-8 first
        js_tree = parse_js(script_text.encode("utf-8"))
        results = convert(js_tree).values()
    except IndexError, ex:
        # No search results available
        results = []
Пример #2
0
def jinni_findSuggestionsWithFilters(query):
    logging.info(u'Doing a suggestion search for "{0}"...'.format(query))
    
    url = "http://www.jinni.com/dwr/call/plaincall/AjaxController.findSuggestionsWithFilters.dwr"
    values = {
        # Both the httpSessionId and scriptSessionId need to be submitted
        # or the server will respond with a "HTTP Error 501: Not Implemented".
        # However, they are not validated.
        # FIXME: when logged in for some reason you do need to send along a valid httpSessionId
        "httpSessionId": [cookie.value for cookie in cj if cookie.name == "JSESSIONID"][0],
        "scriptSessionId": "", # i.e. 3C675DDBB02222BE8CB51E2415259E99878
        "callCount": "1",
        "page": "/discovery.html",
        "c0-scriptName": "AjaxController",
        "c0-methodName": "findSuggestionsWithFilters",
        "c0-id": "0",
        "c0-param0": "string:{0}".format(query.encode("utf-8")),
        "c0-e1": "null:null",
        "c0-e2": "boolean:false",
        "c0-e3": "boolean:false",
        "c0-e4": "boolean:false",
        "c0-e5": "Array:[]",
        "c0-param1": "Object_Object:{contentTypeFilter:reference:c0-e1, onlineContentFilter:reference:c0-e2, dvdContentFilter:reference:c0-e3, theaterContentFilter:reference:c0-e4, contentAffiliates:reference:c0-e5}",
        "batchId": "2"
    }
    
    data = urllib.urlencode(values)
    request = urllib2.Request(url, data)
    response = open_url(request)
    content = response.read()
    
    js_tree = parse_js(content)
    tree = convert(js_tree)
    evaluate(js_tree, tree)
    
    results = tree["s1"]
    
    return results