def test_doc_to_source_submission(self): ''' Part of the set up method, this takes a csv and inserts it into the source submission table. This method in context of this test represents what would happen when a user uploads a csv and the data flows through "etl_tasks/transform_upload" Uploading the csv to the server is itself a different task.. so for now we preform "transform_upload" on the test file. This method is in charge of one specific thing.. taking an input stream such as a csv, or an ODK submission, and creating one row in the database with the schema that it was received. Later in the ingest process, users are allowed to specify settings to each file in order to translate them into data the application can consume and visualize. The Doc Transofrm Method is responsible for the following: 1. Inserting one record into source_submission for each csv row 2. Inserting any new mappings into source_object_map 3. Associating *all* source_object_maps with self.document_id ( even those created in other documents) 4. Inserting one record into submission_detail ''' self.set_up() dt = DocTransform(self.user.id, self.document.id) source_submissions = dt.process_file() test_file = open(settings.MEDIA_ROOT + self.test_file_location, 'r') file_line_count = sum(1 for line in test_file) - 1 # for the header! self.assertEqual(len(source_submissions), file_line_count)
def get_object_list(self,request): doc_id = request.GET['document_id'] dt = DocTransform(1,doc_id) dt.main() return Document.objects.filter(id = doc_id).values()
def test_doc_to_source_submission(self): ''' Part of the set up method, this takes a csv and inserts it into the source submission table. This method in context of this test represents what would happen when a user uploads a csv and the data flows through "etl_tasks/transform_upload" Uploading the csv to the server is itself a different task.. so for now we preform "transform_upload" on the test file. This method is in charge of one specific thing.. taking an input stream such as a csv, or an ODK submission, and creating one row in the database with the schema that it was received. Later in the ingest process, users are allowed to specify settings to each file in order to translate them into data the application can consume and visualize. The Doc Transofrm Method is responsible for the following: 1. Inserting one record into source_submission for each csv row 2. Inserting any new mappings into source_object_map 3. Associating *all* source_object_maps with self.document_id ( even those created in other documents) 4. Inserting one record into source_submission_detail ''' self.set_up() dt = DocTransform(self.user.id, self.document.id\ , self.test_file_location) source_submissions = dt.process_file() test_file = open(settings.MEDIA_ROOT + self.test_file_location ,'r') file_line_count = sum(1 for line in test_file) - 1 # for the header! self.assertEqual(len(source_submissions),file_line_count)
def set_up(self): self.region_list = Region.objects.all().values_list('name',flat=True) self.test_file_location = 'ebola_data.csv' self.create_metadata() self.user = User.objects.get(username = '******') self.document = Document.objects.get(doc_title = 'test') dt = DocTransform(self.user.id, self.document.id\ , self.test_file_location) self.source_submissions_ids = dt.process_file()
def pre_process_file(request, document_id): column_mappings = {} column_mappings["campaign_col"] = request.GET["campaign_col"] column_mappings["value_col"] = request.GET["value_col"] column_mappings["region_code_col"] = request.GET["region_code_col"] column_mappings["indicator_col"] = request.GET["indicator_col"] dt = DocTransform(document_id, column_mappings) try: sdps = dt.dp_df_to_source_datapoints() except IntegrityError: sdps = SourceDataPoint.objects.filter(document_id=document_id) populate_document_metadata(document_id) return HttpResponseRedirect(reverse("source_data:field_mapping", kwargs={"document_id": document_id}))
def file_upload(request): accepted_file_formats = ['.csv', '.xls', '.xlsx'] if request.method == 'GET': form = DocumentForm() return render_to_response('upload/file_upload.html', {'form': form}, context_instance=RequestContext(request)) elif request.method == 'POST': created_by = request.user to_upload = request.FILES['docfile'] newdoc = Document.objects.create(docfile=to_upload, created_by=created_by) dt = DocTransform(newdoc.id) source_submissions = dt.process_file() return HttpResponseRedirect('/doc_review/overview/%s' % newdoc.id)
def set_up(self): ''' Refresh master needs a few peices of metadata to be abel to do it's job. Location, Campaign, User .. all of the main models that you can see initialized in the first migrations in the datapoints application. The set up method also runs the DocTransform method which simulates the upload of a csv or processing of an ODK submission. Ideally this test will run independently of this module, but for now this is how we initialize data in the system via the .csv below. ''' self.test_file_location = 'ebola_data.csv' self.location_list = Location.objects.all().values_list('name',flat=True) self.create_metadata() self.user = User.objects.get(username = '******') self.document = Document.objects.get(doc_title = 'test') self.document.docfile = self.test_file_location self.document.save() dt = DocTransform(self.user.id, self.document.id) dt.main()
def file_upload(request): accepted_file_formats = ['.csv','.xls','.xlsx'] if request.method == 'GET': form = DocumentForm() return render_to_response( 'upload/file_upload.html', {'form': form}, context_instance=RequestContext(request) ) elif request.method == 'POST': created_by = request.user to_upload = request.FILES['docfile'] newdoc = Document.objects.create(docfile=to_upload,created_by=created_by) dt = DocTransform(newdoc.id) source_submissions = dt.process_file() return HttpResponseRedirect('/doc_review/overview/%s' % newdoc.id)
def set_up(self): ''' Refresh master needs a few peices of metadata to be abel to do it's job. Location, Campaign, User .. all of the main models that you can see initialized in the first migrations in the datapoints application. The set up method also runs the DocTransform method which simulates the upload of a csv or processing of an ODK submission. Ideally this test will run independently of this module, but for now this is how we initialize data in the system via the .csv below. ''' self.test_file_location = 'ebola_data.csv' self.location_list = Location.objects.all().values_list('name', flat=True) self.create_metadata() self.user = User.objects.get(username='******') self.document = Document.objects.get(doc_title='test') self.document.docfile = self.test_file_location self.document.save() dt = DocTransform(self.user.id, self.document.id) self.source_submissions_ids = dt.process_file()
def get_object_list(self, request): doc_id = request.GET['document_id'] dt = DocTransform(request.user.id, doc_id) dt.main() return Document.objects.filter(id=doc_id).values()