Exemplo n.º 1
0
def task_join_catalogs(job_id):
    """Union of unique rows in two catalogs"""
    job = JoinCatalogs.objects.get(pk=job_id)
    job.job_status = 'started'
    job.save()

    if default_storage.exists(job.left_table.handle.name):
        left = csv.DictReader(default_storage.open(job.left_table.handle.name))
    if default_storage.exists(job.right_table.handle.name):
        right = csv.DictReader(default_storage.open(job.right_table.handle.name))
    keys = set(left.fieldnames + right.fieldnames)
    left = [r for r in left]
    right = [r for r in right]

    joinedCatalogs, columns = join(left, right, job.fk_field)
    path = os.path.join(BASE_DIR, MEDIA_ROOT, 'catalogs', '{}.csv'.format(job.results_label))
    handler = open(path,'w')
    handler = csv.DictWriter(handler, fieldnames=keys)
    handler.writeheader()
    handler.writerows(joinedCatalogs)

    job.completed = 1
    job.results_handle = 'catalogs/{}.csv'.format(job.results_label)

    result = Catalog(
        name=job.results_label,
        handle='catalogs/{}.csv'.format(job.results_label)
    )
    result.save()
    job.complete = True
    job.completed_date = timezone.now()
    job.job_status = 'complete'
    job.save()
    return True
Exemplo n.º 2
0
    def post(self, request):
        form = self.form_class(request.POST)
        if form.is_valid():
            job = form.save(commit=False)
            left = default_storage.open(job.left_table.handle.name)
            right = default_storage.open(job.right_table.handle.name)
            left = csv.DictReader(left)
            right = csv.DictReader(right)
            keys = left.fieldnames
            for key in right.fieldnames:
                if key not in keys:
                    keys.append(key)
            keys = sorted(keys)

            left = [r for r in left]
            right = [r for r in right]

            union = left + right
            path = os.path.join(BASE_DIR, MEDIA_ROOT, 'catalogs', '{}.csv'.format(job.results_label))
            handler = open(path,'w')
            handler = csv.DictWriter(handler, fieldnames=keys)
            handler.writeheader()
            handler.writerows(union)

            job.msg_json = union
            job.job_status = "complete"
            job.completed = True
            job.results_handle = 'catalogs/{}.csv'.format(job.results_label)
            response = HttpResponse()
            response['content-type'] = 'text/plain'
            # data['left_table'] = job.left_table.handle.name
            # data['left_content'] = default_storage.exists(job.left_table.handle.name)
            data = json.dumps(union, indent=2)
            result = Catalog(
                name=job.results_label,
                json=data,
                handle='catalogs/{}.csv'.format(job.results_label)
            )
            # result.handle.save('catalogs/joined_csv.csv', union)
            result.save()
            response.content = data

            job.save()
            return response
            # return redirect(reverse('jobs:index'))
        return render(request, self.template_name, {'form': form})