def search(self, query, start=0, count=10, filter=True): """ search(query, start = 0, count = 10, filter = True) -> results Search the web with Google. """ if filter: _filter = 1 else: _filter = 0 url = 'http://www.google.com/xhtml?' _query = http.encode_url({ 'q': query, 'start': start, 'num': count, 'filter': _filter }) content = self.open_url(url + _query).read() results = [] for item in re.findall( '<a href="(.*?)" accesskey="\d+">(.*?)</a>(.*?)<span class="url">', content): title = item[1] excerpt = item[2] url = item[0][item[0].index(';u=') + 3:] if not url.startswith('https://') and not url.startswith('ftp://'): url = 'http://' + url results.append((url, title, excerpt)) return results
def search(self, query, start = 0, count = 10, filter = True): """ search(query, start = 0, count = 10, filter = True) -> results Search the web with Google. """ if filter: _filter = 1 else: _filter = 0 url = 'http://www.google.com/xhtml?' _query = http.encode_url({'q':query, 'start':start, 'num':count, 'filter': _filter}) content = self.open_url(url + _query).read() results = [] for item in re.findall( '<a href="(.*?)" accesskey="\d+">(.*?)</a>(.*?)<span class="url">', content): title = item[1] excerpt = item[2] url = item[0][item[0].index(';u=') + 3:] if not url.startswith('https://') and not url.startswith('ftp://'): url = 'http://' + url results.append((url, title, excerpt)) return results
def translate_url(self, url, from_lang = 'en', to_lang = 'de'): """ translate_url(url, from_lang = 'en', to_lang = 'de') -> string Traslate url from_lang to_lang. """ _url = 'http://66.249.93.104/translate_c?' query = http.encode_url({'langpair':from_lang + '|' + to_lang, 'u':url}) return self.open_url(_url + query).read()
def translate_text(self, text, from_lang = 'en', to_lang = 'de'): """ translate_text(text, from_lang = 'en' , to_lang = 'de') -> string Translate text from_lang to_lang. """ url = 'http://translate.google.com/translate_t?' query = http.encode_url({'langpair':from_lang + '|' + to_lang, 'text':text}) content = self.open_url(url + query).read() textareas = re.findall('<textarea .*?>(.*?)</textarea>', content) return textareas[0].strip()
def search(self, *terms): """ search(term1, term2, term3, ...) -> [] Extract set from terms. """ url = 'http://labs.google.com/sets?' _query = http.encode_url([('q' + str(index + 1), value) for index, value in enumerate(terms)]) content = self.open_url(url + _query).read() return re.findall('<center>(.*?)\s*</center></a>', content)
def translate_url(self, url, from_lang='en', to_lang='de'): """ translate_url(url, from_lang = 'en', to_lang = 'de') -> string Traslate url from_lang to_lang. """ _url = 'http://66.249.93.104/translate_c?' query = http.encode_url({ 'langpair': from_lang + '|' + to_lang, 'u': url }) return self.open_url(_url + query).read()
def search(self, query): """ search(query) -> [] Suggest queries based on query. """ url = 'http://www.google.com/complete/search?' _query = http.encode_url({'qu': query}) content = self.open_url(url + _query).read() terms = re.findall('"(.*?)",?', content) print terms return terms[3: -((len(terms) - 3)/2 + 1)]
def search(self, *terms): """ search(term1, term2, term3, ...) -> [] Extract set from terms. """ url = 'http://labs.google.com/sets?' _query = http.encode_url( [('q' + str(index + 1), value) for index, value in enumerate(terms)]) content = self.open_url(url + _query).read() return re.findall('<center>(.*?)\s*</center></a>', content)
def search(self, query): """ search(query) -> [] Suggest queries based on query. """ url = 'http://www.google.com/complete/search?' _query = http.encode_url({'qu': query}) content = self.open_url(url + _query).read() terms = re.findall('"(.*?)",?', content) print terms return terms[3:-((len(terms) - 3) / 2 + 1)]
def query_dns(self, query, last = '', page = 0): """ query_dns(query, last = '', page = 0) -> results Query the dns database of netcraft. """ type = 'site contains' tokens = query.split(':') if len(tokens) != 1: if tokens[0] == 'contains': type = 'site contains' elif tokens[0] == 'starts': type = 'site starts with' elif tokens[0] == 'ends': type = 'site ends with' elif tokens[0] == 'subdomain': type = 'subdomain matches' host = tokens[1] else: host = tokens[0] url = 'http://searchdns.netcraft.com/?' _query = http.encode_url( {'host':host, 'last':last, 'from':(page * 20) + 1, 'restriction':type}) content = self.open_url(url + _query).read() results = [] for item in re.findall( '<td align="left">\n' + '<a href="(.*?)">.*?</a></td>\n' + '<td align="center">.*?</td>\n' + '<td>(.*?)</td>\n' + '<td><a href=".*?q=(.*?)">(.*?)</a></td>\n' + '<td><a href=".*?">(.*?)</a></td>', content): results.append(( re.match('.*?://(.*?)/', item[0]).group(1), item[1], item[3], item[2], item[4])) return results
def translate_text(self, text, from_lang='en', to_lang='de'): """ translate_text(text, from_lang = 'en' , to_lang = 'de') -> string Translate text from_lang to_lang. """ url = 'http://translate.google.com/translate_t?' query = http.encode_url({ 'langpair': from_lang + '|' + to_lang, 'text': text }) content = self.open_url(url + query).read() textareas = re.findall('<textarea .*?>(.*?)</textarea>', content) return textareas[0].strip()
def query_dns(self, query, last='', page=0): """ query_dns(query, last = '', page = 0) -> results Query the dns database of netcraft. """ type = 'site contains' tokens = query.split(':') if len(tokens) != 1: if tokens[0] == 'contains': type = 'site contains' elif tokens[0] == 'starts': type = 'site starts with' elif tokens[0] == 'ends': type = 'site ends with' elif tokens[0] == 'subdomain': type = 'subdomain matches' host = tokens[1] else: host = tokens[0] url = 'http://searchdns.netcraft.com/?' _query = http.encode_url({ 'host': host, 'last': last, 'from': (page * 20) + 1, 'restriction': type }) content = self.open_url(url + _query).read() results = [] for item in re.findall( '<td align="left">\n' + '<a href="(.*?)">.*?</a></td>\n' + '<td align="center">.*?</td>\n' + '<td>(.*?)</td>\n' + '<td><a href=".*?q=(.*?)">(.*?)</a></td>\n' + '<td><a href=".*?">(.*?)</a></td>', content): results.append((re.match('.*?://(.*?)/', item[0]).group(1), item[1], item[3], item[2], item[4])) return results