def Main(): '''Wrapper.''' try: # # Collecting data from UNHCR. # print '%s Collecting data from UNHCR.' % item('bullet') data = Fetch.Fetch() # # Patching data. # Epoch time doesn't seem to be 1970. # print '%s Patching data.' % item('bullet') # pdata = Patch.Epoch(data) pdata = Patch.Date(data) # # Storing data in database. # print '%s Storing records in database.' % item('bullet') CleanTable('monthly_arrivals_by_country') StoreRecords(pdata, 'monthly_arrivals_by_country') print '%s Collected data from UNHCR successfully.' % item('success') scraperwiki.status('ok') except Exception as e: print '%s UNHCR Collector failed.' % item('error') scraperwiki.status('error', 'Collection failed.')
def collect(): '''Scrapes and stores data in database.''' # # Collecting URLs from OpenNepal. # print '%s Collecting dataset URLs from OpenNepal.' % item('bullet') urls = [] for page in range(0, 1): data = Scraper.ScrapeURLs(page=page) urls += data # # Storing URLs. # CleanTable('opennepal_urls') StoreRecords(urls, 'opennepal_urls') # # Scrape content. # errors = 0 content = [] print '%s Scraping datasets.' % item('bullet') for url in urls: try: c = Scraper.ScrapeContent(url=url['url']) content.append(c) except Exception as e: errors += 1 print '%s Error scraping dataset: %s' % (item('error'), url['url']) print e print '%s There were a total of %s error(s) scraping data.' % (item('warn'), str(errors)) # # Storing content. # CleanTable('opennepal_content') StoreRecords(content, 'opennepal_content') print '%s Collected data from OpenNepal successfully.\n' % item('success') return content
def export_json(data): '''Exports scraped data to JSONs in disk.''' print '%s Exporting datasets JSON to disk.' % item('bullet') # # Default directory. # d = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0] data_dir = os.path.join(d, 'data') # # Calling JSON generators # to the default dir. # ExportDatasets(data, data_dir) ExportResources(data, data_dir) print '%s Successfully exported JSON files.\n' % item('success')
def patch(data): '''Patching data.''' print '%s Patching data.' % item('bullet') out = [] for record in data: record['id'] = Patcher.Slug(record['title']) record['dataset_date'] = Patcher.Date(record['date']) out.append(record) # # Storing content. # CleanTable('opennepal_content') StoreRecords(out, 'opennepal_content') print '%s Patched data successfully.\n' % item('success') return out
def main(development=False): '''Wrapper.''' data = collect() try: # # Either collect data or use # previously collected data from # database. # if development is False: data = collect() pdata = patch(data) else: cursor = scraperwiki.sqlite.execute('SELECT * FROM opennepal_content') pdata = [] for record in cursor['data']: pdata.append(dict(zip(cursor['keys'], record))) # # Create static JSON files. # export_json(data=pdata) scraperwiki.status('ok') # # Send notification if scraper fails. # except Exception as e: print '%s OpenNepal Scraper failed.' % item('error') print e scraperwiki.status('error', 'Collection failed.') os.system("mail -s 'OpenNepal: Scraper failed.' [email protected]")