erowid = ScrapeModel( name='erowid', domain='https://www.erowid.org/experiences/', num_getters=2, phases=[ Phase( source_worker=WebSource, parser=HTMLParser, sources=[ Source( url= "https://www.erowid.org/experiences/exp.cgi?ShowViews=1&Cellar=0&Start=0&Max=24777" ) ], templates=(Template(name='report_url', selector='.exp-list-table tr', source={ 'active': False, 'copy_attrs': True }, attrs=( Attr(name='url', selector='td:nth-of-type(2) a', func='sel_url'), Attr(name='title', selector='td:nth-of-type(2) a', func='sel_text'), Attr(name='rating', selector='td:nth-of-type(1) img', func='sel_attr', kws={'attr': 'alt'}), Attr(name='author', selector='td:nth-of-type(3)', func='sel_text'), Attr(name='substances', selector='td:nth-of-type(4)', func='sel_text', kws={ 'replacers': '&', 'substitute': ',', 'regex': '([A-z0-9\-]+\s*[A-z0-9\-*\s]*)' }), Attr(name='date', selector='td:nth-of-type(5)', func='sel_text'), Attr(name='views', selector='td:nth-of-type(6)', func='sel_text'), )), )), Phase(source_worker=WebSource, parser=HTMLParser, templates=(Template(name='drug_report', selector='', db_type='mongo_db', db='erowid', table='drug_report', attrs=( Attr(name='text', selector='.report-text-surround', func='sel_text'), Attr(name='weight', selector='td.bodyweight-amount', func='sel_text'), )), )), ])
efi_dumps = ScrapeModel( name='efi_dumps', domain='https://ghostlyhaks.com/', num_getters=2, phases=[ Phase( source_worker=WebSource, parser=HTMLParser, sources=[ Source( url="https://ghostlyhaks.com/forum/rom-eeprom-bios-efi-uefi" ) ], templates=( Template( name='forum_post', selector='.kbody tr', db_type='mongo_db', db='efi_dumps', table='forum_post', attrs=( Attr(name='url', selector='a.ktopic-title', func='sel_url', source=Source( active=False)), # source is for next run Attr(name='user', selector='.kwho-user', func='sel_text'), Attr(name='user_url', selector='.kwho-user', func='sel_url'), )), Template( name='next_page', selector='.kpagination', attrs=[ Attr(name='url', selector='a', func='sel_url', source=Source()) # source is for next run ]), )), Phase(source_worker=WebSource, parser=HTMLParser, templates=(Template( name='forum_post', selector='a[href*=".zip"], a[href*=".tar"]', db_type='mongo_db', db='efi_dumps', table='efi_dumps', attrs=[Attr(name='url', selector='', func='sel_url')]), )), ])
nytimes = ScrapeModel(name='nytimes', domain='https://www.nytimes.com/', num_getters=2, phases=[ Phase(source_worker=WebSource, parser=HTMLParser, sources=[ Source(url="https://www.nytimes.com/")], templates=( Template( name='menu', selector='#site-index-navigation li', db_type='MongoDB', db='nytimes', table='menu', attrs=( Attr(name='url', selector='a', func='sel_url', source=Source(active=False)), # source is for next run ) ), ) ), Phase(source_worker=WebSource, parser=HTMLParser, sources=[ Source(url="https://www.nytimes.com/")], templates=( Template( name='articlelist', selector='', db_type='MongoDB', db='nytimes', table='articles', attrs=( Attr(name='title', selector='h1', func='sel_text'), Attr(name='text', selector='.story-body-supplemental p', func='sel_text'), Attr(name='writer', selector='span.byline-author', func='sel_text'), Attr(name='text', selector='.story-body-supplemental', func='sel_tex') Attr(name='date', selector='time.datine', func='sel_attr', kws={'attr': 'datetime'}), Attr(name='related', func='sel_url', selector='#related-combined-coverage a.story-link'), Attr(name='text', selector='.story-body-supplemental p', func='sel_text'), ) ),
belgian_parlement_roles = ScrapeModel( name='belgian_parlement_roles', domain='https://fr.wikipedia.org/', num_getters=2, phases=[ Phase( source_worker=WebSource, parser=HTMLParser, sources=[ Source( url= "https://fr.wikipedia.org/wiki/Liste_des_gouvernements_de_la_Belgique" ) ], templates=( Template( name='government', selector='.wikitable tr td:nth-of-type(2)', attrs=( Attr(name='url', selector='a', func='sel_url', source=Source( active=False)), # source is for next run )), )), Phase(source_worker=WebSource, parser=HTMLParser, templates=(Template(name='government', selector='table:nth-of-type(1) tr', db_type='mongo_db', db='belgian_politics', table='politicians', attrs=( Attr(name='url', selector='td:nth-of-type(2) a', func='sel_url'), Attr(name='title', selector='td:nth-of-type(1)', func='sel_text'), )), )), ])
kinky = ScrapeModel( name='kinky', domain='http://www.kinky.nl/', cookies=kinkycookies, phases=[ Phase( source_worker=WebSource, parser=HTMLParser, sources=[ Source( url= 'http://www.kinky.nl/sex-afspraken/mannen/default.aspx?pagesize=5000', attrs=[Attr(name='sex', value='man')]), Source( url= 'http://www.kinky.nl/sex-afspraken/vrouwen/default.aspx?pagesize=5000', attrs=[Attr(name='sex', value='vrouw')]), Source( url= 'http://www.kinky.nl/sex-afspraken/transsexuelen/default.aspx?pagesize=5000', attrs=[Attr(name='sex', value='trans')]), Source( url= 'http://www.kinky.nl/sex-afspraken/stellen/default.aspx?pagesize=5000', attrs=[Attr(name='sex', value='stellen')]), Source( url= 'http://www.kinky.nl/sex-afspraken/gay/default.aspx?pagesize=5000', attrs=[Attr(name='sex', value='gay')]), ], templates=[ Template(name='advert', selector='#advertenties > div', db_type='mongo_db', db='kinky', table='adds', attrs=[ Attr(name='phone', selector='.quickinfo > span', func='sel_text', kws={ 'children': True, 'debug': True, 'regex': 'Mijn telefoonnummer: (.*)' }), Attr(name='city', selector='.quickinfo span.country', func='sel_text'), Attr(name='url', selector='.advertentie_kop a', func='sel_attr', kws={'attr': 'href'}) ]) ]), ])
southpark = ScrapeModel(name='southpark', domain='http://southpark.cc.com/', num_getters=2, phases=[ Phase(source_worker=WebSource, parser=HTMLParser, sources=[ Source(url="http://southpark.cc.com/")], templates=( Template( name='video', selector='', db_type='MongoDB', db='southpark', table='video', attrs=( Attr(name='url', selector='a', func='sel_url', source=Source(active=False)), # source is for next run Attr(name='title', selector='h1', func='sel_text'), Attr(name='text', selector='p', func='sel_text'), ) ), ) ), Phase(source_worker=WebSource, parser=HTMLParser, sources=[ Source(url="http://southpark.cc.com/")], templates=( Template( name='video', selector='', db_type='MongoDB', db='southpark', table='video', attrs=( Attr(name='url', selector='a', func='sel_url', source=Source(active=False)), # source is for next run Attr(name='title', selector='h1', func='sel_text'), Attr(name='text', selector='p', func='sel_text'), ) ), ) ), ])
youtube_channel = ScrapeModel( name='youtube_channel', domain='https://youtube.com/', num_getters=2, awaiting=True, phases=[ Phase(source_worker=WebSource, parser=HTMLParser, sources=[ Source(url='https://www.youtube.com/user/ozzymanreviews/videos'), Source(url='https://www.youtube.com/user/Draadstal/videos'), Source(url='https://www.youtube.com/channel/UCQMs9pijXYAdqvkEMJyCM4g/videos'), Source(url='https://www.youtube.com/channel/UCi1LpRIlG1tDY5Z54VTel2w/videos'), Source(url='https://www.youtube.com/user/vpro/videos'), Source(url='https://www.youtube.com/user/nprmusic/videos'), ], templates=( Template( name='channel_videos', selector='li.channels-content-item', db_type='mongo_db', db='youtube_channel', table='channel_videos', attrs=[ Attr(name='url', selector='h3.yt-lockup-title a', func='sel_url'), Attr(name='title', selector='h3', func='sel_text'), Attr(name='views', selector='.yt-lockup-meta-info', func='sel_text', kws={'regex': '(.*) weergaven', 'numbers': True}), ] ), Template( name='next_videos', selector='.browse-items-load-more-button', attrs=[ Attr(name='url', func='sel_attr', kws={'attr': 'data-uix-load-more-href'}, source=Source(src_template='http://youtube.com{}', json_key=['content_html', 'load_more_widget_html'])) ]), ) ), ] )
headlines = ScrapeModel(name='headlines', domain='http://www.headlines24.nl/', num_getters=2, phases=[ Phase(source_worker=WebSource, parser=HTMLParser, sources=[ Source(url="http://www.headlines24.nl/")], templates=( Template( name='category', selector='', db_type='mongo_db', db='headlines', table='category', attrs=( Attr(name='url', selector='a', func='sel_url', source=Source(active=False)), # source is for next run Attr(name='title', selector='h1', func='sel_text'), Attr(name='text', selector='p', func='sel_text'), ) ), ) ), Phase(source_worker=WebSource, parser=HTMLParser, sources=[ Source(url="http://www.headlines24.nl/")], templates=( Template( name='category', selector='', db_type='mongo_db', db='headlines', table='category', attrs=( Attr(name='url', selector='a', func='sel_url', source=Source(active=False)), # source is for next run Attr(name='title', selector='h1', func='sel_text'), Attr(name='text', selector='p', func='sel_text'), ) ), ) ), ])
paradiso = ScrapeModel( name='paradiso', domain='https://paradiso.nl', phases=[ Phase(source_worker=WebSource, sources=[Source(url='https://paradiso.nl/web/Agenda.htm')], parser=HTMLParser, templates=[ Template(name='event_link', selector='a.event-link', attrs=[ Attr(name='url', func='sel_attr', kws={'attr': 'href'}, source={'active': False}) ]) ]), Phase(templates=[ Template(name='event', db_type='MongoDB', db='paradiso', table='events', attrs=[ Attr(name='name', selector='meta[name=evenementts]', func='sel_attr', kws={'attr': 'content'}), Attr(name='date', selector='meta[name=evenementts]', func='parse_attr', kws={'attr': 'content'}), Attr(name='time', selector='meta[name=evenementtijd]', func='parse_attr', kws={'attr': 'content'}), Attr(name='price', selector='.info p', func='parse_text', kws={'regex': '(\d+,\d*)'}), ]) ]) ])
volkskrant = ScrapeModel( name='volkskrant', domain='http://www.volkskrant.nl/', num_getters=2, cookies={'nl_cookiewall_version': '1'}, phases=[ Phase( source_worker=WebSource, parser=HTMLParser, sources=[ Source(url="http://www.volkskrant.nl/archief/{}".format(year)) for year in range(1987, today) ], templates=(Template(name='day_url', selector='td', attrs=(Attr( name='url', selector='a', func='sel_url', source=Source(active=False)), )), )), Phase( source_worker=WebSource, parser=HTMLParser, templates=( Template(name='article_url', selector='article', attrs=(Attr(name='url', selector='a', func='sel_url', source=Source(active=False)), )), Template(name='next_page_url', selector='a.pager', attrs=(Attr(name='url', selector='', func='sel_url', source=True), )), ), ), Phase(source_worker=WebSource, parser=HTMLParser, templates=(Template( name='article', selector='', db_type='MongoDB', db='volkskrant', table='articles', attrs=( Attr(name='url', selector='a', func='sel_url', source=Source(active=False)), Attr(name='title', selector='h1', func='sel_text'), Attr(name='subtitle', selector='h2', func='sel_text'), Attr(name='author', selector='span[itemprop="author"]', func='sel_text'), Attr(name='author', selector='time[itemprop="datePublished"]', func='sel_text'), Attr(name='category', selector='meta[property="article:section"]', func='sel_attr', kws={'attr': 'content'}), Attr(name='description', selector='p[itemprop="description"]', func='sel_text'), Attr(name='text', selector='.article__body__paragraph', func='sel_text'), )), )), ])
meertens = ScrapeModel( name='namen', domain='http://www.meertens.knaw.nl/', num_getters=2, phases=[ Phase(source_worker=WebSource, parser=HTMLParser, sources=( Source(url="http://www.meertens.knaw.nl/nvb/naam/begintmet/" + l) for l in ['Aad']), # string.ascii_lowercase), templates=[ Template( name='name', selector='tr.data', db_type='mongo_db', db='names', table='name_count_test', attrs=[ Attr(name='name', selector='td:nth-of-type(1)', func='sel_text'), Attr(name='men', selector='td:nth-of-type(2)', func='sel_text', kws={'numbers': True}), Attr(name='women', selector='td:nth-of-type(3)', func='sel_text', kws={'numbers': True}), Attr(name='url', selector='td:nth-of-type(1) a', func='sel_attr', kws={'attr': 'href'}, source={'active': False}, source_condition={'women': '> 50', 'men': '> 50'}), ] ), Template( name='next_url', selector='.right', attrs=[ Attr(name='next', selector='abc', func='sel_attr', kws={'attr': 'href'}, source={'active': True}), ]) ] ), Phase(source_worker=WebSource, parser=HTMLParser, templates=[ Template( name='name', selector='table.nameinfo', func='update', kws={'key': 'name'}, db_type='mongo_db', db='names', table='name_count_test', attrs=[ Attr(name='name', selector='div.name', func='sel_text'), Attr(name='men', func='sel_text', kws={'numbers': True}, selector='tr:nth-of-type(2) td:nth-of-type(3)'), Attr(name='men_second', func='sel_text', kws={'numbers': True}, selector='tr:nth-of-type(3) td:nth-of-type(3)'), Attr(name='women', func='sel_text', kws={'numbers': True}, selector='tr:nth-of-type(6) td:nth-of-type(3)'), Attr(name='women_second', func='sel_text', kws={'numbers': True}, selector='tr:nth-of-type(7) td:nth-of-type(3)'), ] ), Template( name='data_url', selector='a[href*="absoluut/man/eerstenaam"]', attrs=[ Attr(name='next', selector='a', func='sel_attr', kws={'attr': 'href'}, source=Source(active=False, attrs=[ Attr(name='sex_name', value='men') ]) ), ] ), Template( name='data_url', selector='a[href*="absoluut/man/volgnaam"]', attrs=[ Attr(name='next', selector='a', func='sel_attr', kws={'attr': 'href'}, source=Source(active=False, attrs=[ Attr(name='sex_name', value='men_second') ]) ), ] ), Template( name='data_url', selector='a[href*="absoluut/vrouw/eerstenaam"]', attrs=[ Attr(name='next', selector='a', func='sel_attr', kws={'attr': 'href'}, source=Source(active=False, attrs=[ Attr(name='sex_name', value='women') ]) ), ] ), Template( name='data_url', selector='a[href*="absoluut/vrouw/volgnaam"]', attrs=[ Attr(name='next', selector='a', func='sel_attr', kws={'attr': 'href'}, source=Source(active=False, attrs=[ Attr(name='sex_name', value='women_second') ]) ), ] ), ] ), Phase(source_worker=WebSource, parser=HTMLParser, templates=[ Template( name='history', selector='#content', db_type='mongo_db', db='names', table='history2', kws={'key': 'name'}, attrs=[ Attr(name='name', selector='div.name', func='sel_text'), Attr(name='years', selector='script', func='sel_js_array', kws={'var_name': 'year_list', 'var_type': int}), Attr(name='values', selector='script', func='sel_js_array', kws={'var_name': 'value_list', 'var_type': float}), Attr(name='step ] ) ] ) ] )
pornstars = ScrapeModel( name='pornhub_pornstars', domain='http://pornhub.com', num_getters=2, phases=[ Phase(source_worker=WebSource, parser=HTMLParser, sources=[Source(url='http://www.pornhub.com/pornstars?o=a')], templates=[ Template(name='alphabet', selector='.alphabetFilter .dropdownWrapper li', attrs=[ Attr(name='url', selector='a', func='sel_url', source=Source(active=False)) ]) ]), Phase(source_worker=WebSource, parser=HTMLParser, templates=[ Template(name='pornstar', selector='.pornstarIndex li', db_type='MongoDB', db='pornstars', collection='ranking', attrs=[ Attr(name='name', selector='.title', func='sel_text'), Attr(name='rank', selector='.rank_number', func='sel_text', kws={'numbers': True}), Attr(name='views', selector='.pstarViews', func='sel_text', kws={'numbers': True}), Attr(name='videos', selector='.videosNumber', func='sel_text', kws={'numbers': True}), Attr(name='url', selector='a.title', func='sel_url'), Attr(name='image_url', selector='img', func='sel_attr', kws={'attr': 'src'}), ]), Template(name='next_urls', selector='.pagination3', attrs=[ Attr(name='url', selector='a', func='sel_url', source=Source()) ]) ]) ])
import string from pymongo import MongoClient petitions = ScrapeModel( name='petitions', domain='https://petities.nl/', num_getters=2, phases=[ Phase(source_worker=WebSource, parser=HTMLParser, sources=( Source(url="https://petities.nl/petitions/borstkankeronderzoek-vervroegen/signatures?locale=nl"),), templates=[ Template(name='next_page', selector='.navigation-bar .navigation-bar', attrs=[Attr(name='url', selector='a', func='sel_url', kws={'attr': 'href'}, source=True)]), Template(name='signature', selector='.petition-signature-list', db_type='mongo_db', db='petitions', table='borstkanker', attrs=[ Attr(name='name', selector='.petition-signature-name', func='sel_text'), Attr(name='time', selector='.signature-time', func='sel_text'), Attr(name='location', selector='.petition-signature-location', func='sel_text'), Attr(name='occupation', selector='.petition-signature-occupation', func='sel_text') ]) ] ) ] ) ds = Dispatcher() ds.add_scraper(petitions) ds.run()
theoffice = ScrapeModel( name='theoffice', domain='http://watchtheofficeonline.com', num_getters=2, phases=[ Phase( source_worker=WebSource, parser=HTMLParser, sources=[ Source(url="http://watchtheofficeonline.com/s{}e{}".format( season, episode)) for season in range(1, 10) for episode in range(1, 30) ], templates=(Template( name='episode', selector='.so-panel.widget.widget_siteorigin-panels-builder', db_type='shell_command', db='theoffice', table='season', kws={ 'command': 'sudo mkdir -p ' + filepath + '/{season}/ &' + ' sudo youtube-dl -o /mnt/Movies/{season}/{episode} {url}' }, attrs=( Attr(name='url', selector='a', func=['sel_url', 'sel_text'], kws=[{}, { 'needle': r'.*(s\d+e\d+)' }]), Attr(name='episode', selector='.textwidget', func='sel_text', kws={ 'index': 3, 'substitute': '_', 'replacers': ' ' }), Attr(name='season', selector='.textwidget', func='sel_text', kws={ 'index': 1, 'replacers': ' ' }), )), )), ])
cl = MongoClient() db = cl.gsmhelpdesk_nummerreeksen col = db.number_range gsmhelpdesk_nummerreeksen = ScrapeModel(name='gsmhelpdesk_nummerreeksen', domain='http://www.gsmhelpdesk.nl/', num_getters=2, phases=[ Phase(source_worker=WebSource, parser=HTMLParser, sources=[ Source(url="http://www.gsmhelpdesk.nl/helpdesk/30/nummerreeksen"), ], templates=( Template( name='number_range', selector='tr', db_type='mongo_db', db='gsmhelpdesk_nummerreeksen', table='number_range', attrs=( Attr(name='start', selector='td:nth-of-type(1)', func='sel_text', kws={'numbers': True}), Attr(name='end', selector='td:nth-of-type(2)', func='sel_text', kws={'numbers': True}), ) ), ) ), ] ) disp = Dispatcher() disp.add_scraper(gsmhelpdesk_nummerreeksen) disp.run()
funda = ScrapeModel( name='funda.nl', domain='http://funda.nl', num_sources=1, phases=[ Phase( parser=HTMLParser, source_worker=WebSource, sources=[ Source(url='http://funda.nl/huur/amsterdam/woonhuis/'), Source(url='http://funda.nl/huur/amsterdam/appartement/'), Source(url='http://funda.nl/koop/amsterdam/woonhuis/'), Source(url='http://funda.nl/koop/amsterdam/appartement'), ], templates=[ Template( name='house', selector='.search-result', db_type='mongo_db', db='funda', table='for_hire', attrs=[ Attr(name='price', selector='.search-result-price', func='sel_text', kws={'numbers': True}), Attr(name='street', selector='.search-result-title', func='sel_text'), Attr(name='realtor', selector='.realtor', func='sel_text'), Attr(name='rooms', selector='.search-result-info', func='sel_text', kws={ 'regex': '(\d+) kamers', 'numbers': True }), Attr(name='zip', selector='.search-result-subtitle', func='sel_text', kws={'regex': '(\d{4} \w{2})'}), Attr(name='city', func='sel_text', selector='.search-result-subtitle', kws={'regex': '\d{4} \w{2} (\w+)'}), Attr( name='living_area', func='sel_text', selector= '.search-result-info span[title="Woonoppervlakte"]', kws={ 'regex': '(\d+)', 'numbers': True }), Attr(name='meeting_url', selector='.search-result-header a', func='sel_attr', kws={'attr': 'href'}, source={ 'src_template': '{}bezichtiging/', 'active': False }), ]), Template( selector='.pagination', attrs=[ Attr( name='url', selector='a', func='sel_attr', kws={'attr': 'href'}, # source=Source() ) ]) ]), Phase( parser=HTMLParser, source_worker=WebSource, active=False, templates=[ Template( name='bezichtiging', selector='.makelaars-contact-form', attrs=[ Attr( name='__RequestVerificationToken', selector='input[name="__RequestVerificationToken"]', func='sel_attr', kws={'attr': 'value'}), Attr(name='url', selector='form', func='sel_attr', kws={'attr': 'action'}), ], source=Source(method='post', active=False, duplicate=True, data={ 'Day': '', 'DayPart': '', 'Opmerking': '', 'Aanhef': 'Dhr', 'Naam': 'Henk de Vries', 'Email': '*****@*****.**', 'ConfirmEmail': '', 'Telefoon': '0205566206', })) ]) ])
thuisbezorgd = ScrapeModel( name='thuisbezorgd', domain='http://thuisbezorgd.nl', num_getters=2, phases=[ Phase( source_worker=WebSource, parser=HTMLParser, sources=[Source(url="https://www.thuisbezorgd.nl/")], templates=( Template( name='sections', selector='', attrs=( Attr(name='url', selector='a[href*="eten-bestellen-"]', func='sel_url', source=Source()), # source is for next run )), Template( name='restaurant', selector='.restaurant', db_type='MongoDB', db='thuisbezorgd', table='restaurants', attrs=( Attr(name='url', selector='a.restaurantname', func='sel_url', source=Source( active=False, src_template='{}')), # source is for next run Attr(name='name', selector='a.restaurantname', func='sel_text'), )), )), Phase( source_worker=WebSource, parser=HTMLParser, templates=( Template( name='reviews', selector='', db_type='MongoDB', db='thuisbezorgd', table='reviews', attrs=( Attr(name='url', selector='a', func='sel_url', source=Source( active=False)), # source is for next run Attr(name='title', selector='h1', func='sel_text'), Attr(name='text', selector='p', func='sel_text'), )), )), ])
# TODO Set the right classes for the websites. from dispatcher import Dispatcher from components import Attr, Source, HTMLObject, Phase, ScrapeModel, Follow kinky =ScrapeModel(name='kinky', domain='kinky.nl', phases=[ Phase(source=[Source(url='http://kinky.nl/sex-afspraken/vrouwen/default.aspx?pagesize=4500')], templates=[Template(name='advert', selector='.advertentie_kop > a', attrs=[Attr(name='url', source={'active': False})]) ]), Phase(templates=[ Template(name='advert', selector='.advertentie_kop > a', db='kinky.nl', table='adverts', Attr(name='add_text', func= 'sel_text', selector='description p'), Attr(name='possibilities', func= 'sel_text', selector= '.possibilities li'), Attr(name='update', func= 'sel_text', selector='update'), Attr(name='town', func= 'sel_text', selector= '.naw div', kws={'regex': '.*Plaats: ([A-z]*);'}), Attr(name='work_area', func= 'sel_text', selector='naw div', kws={'regex': '.*Werkgebied: '}), Attr(name='prices', func= 'sel_text', selector='.prizes td:nth-of-type(2)'), Attr(name='pictures', func= 'sel_attr', selector='.galleryRow img', kws={'attr': 'src'}), Attr(name='phone', selector='.mainprofile .webbutton span', func='sel_text'), Attr(name='name', selector='h1.title', func='sel_text'), '__reg__age': ['.naw div', '.*Leeftijd: (\d\d)'], '__reg__length': ['.naw div', '.*Lengte: (\d\d\d)'], '__reg__hair_color': ['.naw div', '.*Kleur haar: ([a-z]*);'], '__reg__build': ['.naw div', '.*Lichaamsbouw: ([A-z]*);'], '__reg__looks': ['.naw div', '.*Uiterlijk: ([A-z]*);'], , , , 'start': [ 'meta': { 'sex': 'female',