def create_sample(extraction_result, sample_id, job_id, url, source_type, source_val='', domain='', label=None, silent=False, vote_sample=True, btm_sample=False, training=True, *args, **kwargs): """ If error while capturing web propagate it. Finally deletes TemporarySample. extraction_result should be [True, True] - otherwise chaining failed. """ extracted = all([x is True for x in extraction_result]) job = Job.objects.get(id=job_id) # Checking if all previous tasks succeeded. if extracted: # Proper sample entry Sample.objects.filter(id=sample_id).update( source_type=source_type, source_val=source_val, domain=domain, vote_sample=vote_sample, btm_sample=btm_sample, training=training, ) sample = Sample.objects.get(id=sample_id) if not silent: # Golden sample if label is not None: # GoldSample created sucesfully - pushing event. gold = GoldSample( sample=sample, label=label ) gold.save() send_event( "EventNewGoldSample", job_id=job.id, gold_id=gold.id, ) # Ordinary sample else: # Sample created sucesfully - pushing event. send_event( "EventNewBTMSample" if btm_sample else "EventNewSample", job_id=job.id, sample_id=sample_id, ) else: # Extraction failed, cleanup. Sample.objects.filter(id=sample_id).delete() if label is not None: Job.objects.filter(id=job.id, gold_left__gte=0)\ .update(gold_left=F('gold_left') - 1) return (extracted, sample_id)
def copy_sample_to_job(sample_id, job_id, source_type, label='', source_val='', btm_sample=False, *args, **kwargs): try: old_sample = Sample.objects.get(id=sample_id) job = Job.objects.get(id=job_id) vote_sample = False if btm_sample else True training = False if btm_sample else True new_sample = Sample.objects.create( job=job, url=old_sample.url, text=old_sample.text, screenshot=old_sample.screenshot, source_type=source_type, source_val=source_val, btm_sample=btm_sample, vote_sample=vote_sample, training=training, ) send_event( "EventSampleScreenshotDone", sample_id=new_sample.id, sample_url=new_sample.url, job_id=new_sample.job_id, ) send_event( "EventSampleContentDone", sample_id=new_sample.id, sample_url=new_sample.url, job_id=new_sample.job_id, ) # Golden sample if label is not None: # GoldSample created sucesfully - pushing event. gold = GoldSample( sample=new_sample, label=label ) gold.save() send_event( "EventNewGoldSample", job_id=job.id, gold_id=gold.id, ) # Ordinary sample else: # Sample created sucesfully - pushing event. send_event( "EventNewBTMSample" if btm_sample else "EventNewSample", job_id=job.id, sample_id=new_sample.id, ) except IntegrityError: # Such sample has been created in the mean time, dont do anything return Sample.objects.get(job=job, url=old_sample.url).id except DatabaseError, e: # Retry process on db error, such as 'Database is locked' copy_sample_to_job.retry(exc=e, countdown=min(60 * 2 ** current.request.retries, 60 * 60 * 24))