def get_queryset(self): views = [URL.factory(i) for i in self.site.views] for router in self.site.values(): views += [URL.factory(i) for i in router.views] return Collection(views)
def detail_list(self): _list = [] for domain in self.summary: if domain['provider'] == 'cdmon': _list.append(domain) else: domain = self.get_details(domain) _list.append(domain) c = Collection(_list) return c
def parse_all(self): services_payload = { 'action': 'loadServices', 'dades[alpha]': 'tots', 'dades[hash]': self.ourhash, 'dades[nom]': '', 'dades[order][column]': 1, 'dades[order][type]': 'asc', 'dades[page]': 1, 'dades[type]': 0, 'dades[view]': 1000, } services_list_r = self.s.post(self.services_url, services_payload) raw = services_list_r.text soup = self.get_soup(hydrate(raw)) allrows = soup.find_all('tr') _list = [] for r in allrows: _id = None try: _id = clean_string(r['id'].split("-")[-1]) except: pass cells = r.find_all('td') if len(cells) == 3: dtend_span = cells[1].find('span', attrs={"class": "dtend"}) date = u'' if dtend_span: date = remove_shit( clean_string(dtend_span.text.split(" ")[-1])) name = clean_string(cells[1].div.a.get('href')).split('=')[-1] #~ canonical_name = clean_string(cells[1].a.get('href')).split('=')[-1] canonical_name = name slug = slugify(unicode(name.replace('.', '_'))) _dict = { 'id': _id, 'name': name, 'canonical_name': canonical_name, 'provider': 'cdmon', 'slug': slug, 'date': date } logging.debug('Found in CDMON %s' % name) if 'alta' in cells[1].text: _dict['provider'] = 'other' _list.append(_dict) #~ else: #~ print(r) c = Collection(_list) return c
def get_details(self, domain): logging.info("Getting details %s" % domain['slug']) name = domain['name'] if '\u00f1' in name: name = name.replace('\u00f1', u'ñ') mng_url = self.index_url + u'/es/dominios/principal' response = self.s.get(mng_url, params={'dades[dom]': name}, allow_redirects=True) soup = self.get_soup(response.text) items = soup.find_all('li', role="listitem") domain['records'] = Collection([]) if len(items) > 0: domain['status'] = False domain['locked'] = True domain['notifications'] = True domain['private_whois'] = True domain['automatic_renovation'] = True for index, item in enumerate(items): _id = item.get('id') if _id: if _id.startswith('block') and 'soff' in item.span.a.get( 'class'): domain['locked'] = False if _id.startswith('aviso') and 'soff' in item.span.a.get( 'class'): domain['notifications'] = False if _id.startswith('whoIS') and 'soff' in item.span.a.get( 'class'): domain['private_whois'] = False if _id.startswith('autoRenew' ) and 'soff' in item.span.a.get('class'): domain['automatic_renovation'] = False if 'Fecha de' in item.strong.text: domain['date_created'] = remove_shit( item.span.text.strip()) if 'Auth Code' in item.strong.text: domain['auth_code'] = item.span.text if 'hasta:' in item.strong.text: domain['date_valid'] = remove_shit(item.span.text.strip()) if index == 1 and item.span.text == 'Activo': domain['status'] = True domain['records'] = self.get_records(domain) return domain
def get_records(self, domain): _list = [] one_domain = self.index_url + '/es/dns/listado-registros' od_r = self.s.get(one_domain, params={'dades[dom]': domain['canonical_name']}) raw = od_r.text soup = BeautifulSoup(raw, "lxml") main_domain = soup.find('div', class_="txt").strong.text main = soup.find('div', class_="panel-list-body") for tbody in main.table.find_all('tbody'): record_type = tbody.get('id').split('-')[-1] for tr in tbody.find_all('tr'): is_record = False try: record_id = tr.get('id').split('-')[1] is_record = True except: pass if is_record == True: cells = tr.find_all('td') if hasattr(cells[0].label, 'input'): record_id = cells[0].label.input['value'] host = cells[1].strong.text record = cells[2].strong.text ttl = False priority = '' if record_type in ['TXT', 'SPF']: record = '"%s"' % cells[2].strong.text elif record_type == 'MX': priority = cells[3].strong.text _dict = { 'id': record_id, 'record_type': record_type, 'host': host, 'record': record, 'priority': priority, 'ttl': ttl } _list.append(_dict) c = Collection(_list) return c
import json from pprint import pprint import operator from functools import reduce import sys sys.path.append(os.path.abspath('../')) from lookupy import Collection, Q f = open('www.youtube.com.har') data = json.load(f) f.close() c = Collection(data['log']['entries']) print("==== All javascript assets fetched ====") js_assets = c.filter(response__content__mimeType='text/javascript') \ .select('request__url') pprint(list(js_assets)) print() print("==== URLs that were blocked ====") blocked_urls = c.filter(timings__blocked__gt=0) \ .select('request__url') pprint(list(blocked_urls)) print() print("==== GET requests that responded with 200 OK ====") get_200 = c.filter(request__method__exact='GET',
def get_queryset(self): items = [Controller.factory(i) for i in self.site.values()] return Collection(items)
'type': 'micro' }, { 'framework': 'Zend', 'language': 'PHP', 'type': 'full-stack' }, { 'framework': 'Slim', 'language': 'PHP', 'type': 'micro' }] print('Data is a list of dict') print(data) print() c = Collection(data) print('Collection wraps data') print(c) print() print('Collection provides QuerySet as it\'s ``items`` attribute') print(c) print() print('filter returns a lazy QuerySet') print(c.filter(framework__startswith='S')) print() print('items in which the framework field startswith \'S\'') print(list(c.filter(framework__startswith='S')))