Пример #1
0
def googleSearch(title):
    '''
    It is the function of the application that make the request at google WS using
    getting the first url of response.
    
    @param title: the title of the article to serach for
    @return: the first url
    '''
    
    _query=title

    
    # create SOAP proxy object
    google = SOAPProxy(_url, _namespace)
    

    
    # call search method over SOAP proxy
    results = google.doGoogleSearch( _license_key, _query, 
                                     _start, _maxResults, 
                                     _filter, _restrict,
                                     _safeSearch, _lang_restrict, '', '' )
               
    # display results
#    print 'google search for  " ' + _query + ' "\n'
#    print 'estimated result count: ' + str(results.estimatedTotalResultsCount)
#    print '           search time: ' + str(results.searchTime) + '\n'
#    print 'results ' + str(_start + 1) + ' - ' + str(_start + _maxResults) +':\n'
                                                           
    numresults = len(results.resultElements)
    if numresults:
        url = results.resultElements[0].URL
    else:
        url= "#"
    return url
Пример #2
0
def googleSearch(title):
    '''
    It is the function of the application that make the request at google WS using
    getting the first url of response.
    
    @param title: the title of the article to serach for
    @return: the first url
    '''
    
     
    # create SOAP proxy object
    google = SOAPProxy(_url, _namespace)
    
    _query= google.doSpellingSuggestion( _license_key, title )
    
    if _query == None:
        _query=title
    
    # call search method over SOAP proxy
    results = google.doGoogleSearch( _license_key, _query, 
                                     _start, _maxResults, 
                                     _filter, _restrict,
                                     _safeSearch, _lang_restrict, '', '' )
               
    # display results
#    print 'google search for  " ' + _query + ' "\n'
#    print 'estimated result count: ' + str(results.estimatedTotalResultsCount)
#    print '           search time: ' + str(results.searchTime) + '\n'
#    print 'results ' + str(_start + 1) + ' - ' + str(_start + _maxResults) +':\n'
                                                           
    numresults = len(results.resultElements)
    if numresults:
        for i in range(numresults):
            url = results.resultElements[i].URL
            r = re.compile("http://(.*)\.pdf",re.IGNORECASE)
            if (r.match(url)):
                url = r.match(url).group()
                type = "pdf"
                return (url,type,_query)
            else:
                r = re.compile("http://citeseer.ist.psu.edu/")
                if(r.match(url)):
                    type = "citeseer"
                    return (url,type,_query)
                else:
                    r = re.compile("http://portal.acm.org/")
                    if(r.match(url)):
                        type = "acm"
                        return (url,type,_query)
                    else:
                        r = re.compile("http://doi.ieeecomputersociety/")
                        if(r.match(url)):
                            type = "doi"
                            return (url,type,_query)
                        else: 
                            pass
        type = "default"
        return  (url,type,_query)                          
    else:
        url= "#"
        type = None
        return (url,type,_query)