def etl(pk): last_imported = datetime.datetime.now() crawler = Crawler.objects.get(pk=pk) # add to queue if crawler.sitemap: # get sitemap and add urls to queue from opensemanticetl.tasks import index_sitemap index_sitemap.apply_async( kwargs={ 'uri': crawler.sitemap }, queue='open_semantic_etl_tasks', priority=5 ) else: if crawler.crawler_type=="DOMAIN" or crawler.crawler_type=="PATH": # add website to queue from opensemanticetl.tasks import index_web_crawl index_web_crawl.apply_async( kwargs={ 'uri': crawler.uri, 'crawler_type': crawler.crawler_type }, queue='open_semantic_etl_tasks', priority=5 ) else: # add web page to queue from opensemanticetl.tasks import index_web index_web.apply_async( kwargs={ 'uri': crawler.uri }, queue='open_semantic_etl_tasks', priority=5 ) # save new timestamp crawler.last_imported = last_imported crawler.save()
def queue_index_web(request): uri = request.GET["uri"] result = index_web.apply_async(kwargs={'uri': uri}, queue='tasks', priority=5) return HttpResponse(json.dumps({'queue': result.id}), content_type="application/json")
def queue_index_web(request): uri = request.GET["uri"] from opensemanticetl.tasks import index_web result = index_web.apply_async(kwargs={'uri': uri}, queue='open_semantic_etl_tasks', priority=5) return HttpResponse(json.dumps({'queue': result.id}), content_type="application/json")