def test_clean_keys(self): LabelValueStore.objects(silo_id=self.silo.id).delete() lvs = LabelValueStore() orig_data = { 'Header 1': 'r1c1', 'create_date': 'r1c3', 'edit_date': 'r1c2', '_id': 'r1c4' } for k, v in orig_data.iteritems(): key = cleanKey(k) val = smart_str(v, strings_only=True) key = smart_str(key) val = val.strip() setattr(lvs, key, val) lvs.silo_id = self.silo.id lvs.save() returned_data = json.loads(LabelValueStore.objects( silo_id=self.silo.id).to_json())[0] returned_data.pop('_id') expected_data = { 'Header 1': 'r1c1', 'created_date': 'r1c3', 'editted_date': 'r1c2', 'user_assigned_id': 'r1c4', 'read_id': -1, 'silo_id': self.silo.id } self.assertEqual(returned_data, expected_data) LabelValueStore.objects(silo_id=self.silo.id).delete()
def test_import_from_gsheet_helper_skipped_rows( self, mock_get_credential_obj, mock_get_authorized_service, mock_get_gsheet_metadata, mock_fetch_data_gsheet, mock_get_or_create_read): lvs = LabelValueStore() lvs.silo_id = self.silo.id lvs.save() data = [{ 'First.Name': 'John', 'Last.Name': 'Doe', 'E-mail': '*****@*****.**', }, { 'First.Name': 'Bob', 'Last.Name': 'Smith', 'E-mail': '*****@*****.**', }] save_data_to_silo(self.silo, data, self.read) # create multiple lvs lvs = LabelValueStore() lvs.silo_id = self.silo.id lvs.save() save_data_to_silo(self.silo, data, self.read) factories.UniqueFields(name='E-mail', silo=self.silo) data = [ ['First.Name', 'Last.Name', 'E-mail'], ['John', 'Lennon', '*****@*****.**'], ] expected_result = [{ 'silo_id': self.silo.id }, { 'msg': 'Skipped updating/adding records where ' '[email protected],silo_id=1 because there are ' 'already multiple records.', 'level': messages.WARNING }, { 'msg': 'Operation successful', 'level': messages.SUCCESS }] mock_get_credential_obj.return_value = Mock(OAuth2Credentials) mock_get_authorized_service.return_value = Mock() mock_get_gsheet_metadata.return_value = (Mock(), None) mock_fetch_data_gsheet.return_value = (data, None) mock_get_or_create_read.return_value = self.read result = gviews_v4.import_from_gsheet_helper(self.tola_user.user, self.silo.id, self.silo.name, 1234, partialcomplete=True) self.assertEqual(result, ([], expected_result))
def silo_data_api(request, id): if id <= 0: return HttpResponseBadRequest("The silo_id = %s is invalid" % id) data = LabelValueStore.objects(silo_id=id).to_json() json_data = json.loads(data) return JsonResponse(json_data, safe=False)
def handle(self, *args, **options): skip_row = False frequency = options['frequency'] if frequency != "daily" and frequency != "weekly": return self.stdout.write("Frequency argument can either be 'daily' or 'weekly'") silos = Silo.objects.filter(unique_fields__isnull=False, reads__autopull=True, reads__autopull_frequency__isnull=False, reads__autopull_frequency = frequency).distinct() read_type = ReadType.objects.get(read_type="JSON") for silo in silos: reads = silo.reads.filter(type=read_type.pk) for read in reads: ona_token = ThirdPartyTokens.objects.get(user=silo.owner.pk, name="ONA") response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token}) data = json.loads(response.content) # import data into this silo num_rows = len(data) if num_rows == 0: continue counter = None #loop over data and insert create and edit dates and append to dict for counter, row in enumerate(data): skip_row = False #if the value of unique column is already in existing_silo_data then skip the row for unique_field in silo.unique_fields.all(): filter_criteria = {'silo_id': silo.pk, unique_field.name: row[unique_field.name]} if LabelValueStore.objects.filter(**filter_criteria).count() > 0: skip_row = True continue if skip_row == True: continue # at this point, the unique column value is not in existing data so append it. lvs = LabelValueStore() lvs.silo_id = silo.pk for new_label, new_value in row.iteritems(): if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date": setattr(lvs, new_label, new_value) lvs.create_date = timezone.now() result = lvs.save() if num_rows == (counter+1): combineColumns(silo.pk) self.stdout.write('Successfully fetched the READ_ID, "%s", from ONA' % read.pk)
def handle(self, *args, **options): silo = None read = None silo_id = options['silo_id'] username = options['username'] user = User.objects.get(username__exact=username) reads = Read.objects.filter(owner=user) try: silo = Silo.objects.get(pk=silo_id) except Silo.DoesNotExist: raise CommandError('Silo "%s" does not exist' % silo_id) for read_id in options['read_ids']: try: read = reads.filter(pk=read_id)[0] except Read.DoesNotExist: raise CommandError('Read "%s" does not exist for user, %s' % (read_id, user.username)) # Fetch the data from ONA ona_token = ThirdPartyTokens.objects.get(user=user.pk, name="ONA") response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token}) data = json.loads(response.content) # import data into this silo num_rows = len(data) if num_rows == 0: continue counter = None #loop over data and insert create and edit dates and append to dict for counter, row in enumerate(data): lvs = LabelValueStore() lvs.silo_id = silo.pk for new_label, new_value in row.iteritems(): if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date": setattr(lvs, new_label, new_value) lvs.create_date = timezone.now() result = lvs.save() if num_rows == (counter+1): combineColumns(silo_id) self.stdout.write('Successfully fetched the READ_ID, "%s", from database' % read_id)
def getSiloColumnNames(id): lvs = LabelValueStore.objects(silo_id=id).to_json() data = {} jsonlvs = json.loads(lvs) for item in jsonlvs: for k, v in item.iteritems(): #print("The key and value are ({}) = ({})".format(k, v)) if k == "_id" or k == "edit_date" or k == "create_date" or k == "silo_id": continue else: data[k] = v return data
def combineColumns(silo_id): client = MongoClient(settings.MONGODB_HOST) db = client.tola lvs = json.loads(LabelValueStore.objects(silo_id=silo_id).to_json()) cols = [] for l in lvs: cols.extend([k for k in l.keys() if k not in cols]) for l in lvs: for c in cols: if c not in l.keys(): db.label_value_store.update_one({"_id": ObjectId(l["_id"]["$oid"])}, {"$set": {c: ""}}, False) return True
def combineColumns(silo_id): client = MongoClient(settings.MONGODB_HOST) db = client.tola lvs = json.loads(LabelValueStore.objects(silo_id=silo_id).to_json()) cols = [] for l in lvs: cols.extend([k for k in l.keys() if k not in cols]) for l in lvs: for c in cols: if c not in l.keys(): db.label_value_store.update_one( {"_id": ObjectId(l['_id']['$oid'])}, {"$set": { c: '' }}, False) return True
def test_import_from_gsheet_helper_with_integer_unique_fields( self, mock_get_credential_obj, mock_get_authorized_service, mock_get_gsheet_metadata, mock_fetch_data_gsheet, mock_get_or_create_read): '''Import function should update existing data when it got new data with same unique field''' lvs = LabelValueStore() lvs.silo_id = self.silo.id lvs.save() data = [{ 'First.Name': 'John', 'Last.Name': 'Doe', 'Number': 1, }, { 'First.Name': 'Bob', 'Last.Name': 'Smith', 'Number': 2, }] save_data_to_silo(self.silo, data, self.read) factories.UniqueFields(name='Number', silo=self.silo) data = [ ['First.Name', 'Last.Name', 'Number'], ['John', 'Lennon', 1], ] expected_result = [{ 'silo_id': self.silo.id }, { 'msg': 'Operation successful', 'level': messages.SUCCESS }] mock_get_credential_obj.return_value = Mock(OAuth2Credentials) mock_get_authorized_service.return_value = Mock() mock_get_gsheet_metadata.return_value = (Mock(), None) mock_fetch_data_gsheet.return_value = (data, None) mock_get_or_create_read.return_value = self.read result = gviews_v4.import_from_gsheet_helper(self.hikaya_user.user, self.silo.id, self.silo.name, 1234) self.assertEqual(result, expected_result) lvss = LabelValueStore.objects.filter(silo_id=self.silo.id) count = 0 for lvs in lvss: lvs_json = json.loads(lvs.to_json()) if lvs_json.get('First_Name') == 'John': self.assertEqual(lvs_json.get('Last_Name'), 'Lennon') count += 1 self.assertEqual(count, 1) self.assertEqual(lvss.count(), 3)
def test_import_from_gsheet_helper_unique_fields( self, mock_get_credential_obj, mock_get_authorized_service, mock_get_gsheet_metadata, mock_fetch_data_gsheet, mock_get_or_create_read): lvs = LabelValueStore() lvs.silo_id = self.silo.id lvs.save() data = [{ 'First.Name': 'John', 'Last.Name': 'Doe', 'E-mail': '*****@*****.**', }, { 'First.Name': 'Bob', 'Last.Name': 'Smith', 'E-mail': '*****@*****.**', }] save_data_to_silo(self.silo, data, self.read) factories.UniqueFields(name='E-mail', silo=self.silo) data = [ ['First.Name', 'Last.Name', 'E-mail'], ['John', 'Lennon', '*****@*****.**'], ] expected_result = [{ 'silo_id': self.silo.id }, { 'msg': 'Operation successful', 'level': messages.SUCCESS }] mock_get_credential_obj.return_value = Mock(OAuth2Credentials) mock_get_authorized_service.return_value = Mock() mock_get_gsheet_metadata.return_value = (Mock(), None) mock_fetch_data_gsheet.return_value = (data, None) mock_get_or_create_read.return_value = self.read result = gviews_v4.import_from_gsheet_helper(self.tola_user.user, self.silo.id, self.silo.name, 1234) self.assertEqual(result, expected_result) lvss = LabelValueStore.objects.filter(silo_id=self.silo.id) count = 0 for lvs in lvss: lvs_json = json.loads(lvs.to_json()) if lvs_json.get('First_Name') == 'John': self.assertEqual(lvs_json.get('Last_Name'), 'Lennon') count += 1 self.assertEqual(count, 1)
def test_postNewFormulaColumn(self): data = { 'math_operation': 'sum', 'column_name': '', 'columns': [] } response = self.client.post('/new_formula_column/{}/'.format( self.silo.pk), data=data) self.assertEqual(response.status_code, 302) lvs = LabelValueStore() lvs.a = "1" lvs.b = "2" lvs.c = "3" lvs.silo_id = self.silo.pk lvs.save() lvs = LabelValueStore() lvs.a = "2" lvs.b = "2" lvs.c = "3.3" lvs.silo_id = self.silo.pk lvs.save() lvs = LabelValueStore() lvs.a = "3" lvs.b = "2" lvs.c = "hi" lvs.silo_id = self.silo.pk lvs.save() data = { 'math_operation': 'sum', 'column_name': '', 'columns': ['a', 'b', 'c'] } response = self.client.post('/new_formula_column/{}/'.format( self.silo.pk), data=data) self.assertEqual(response.status_code, 302) formula_column = self.silo.formulacolumns.get(column_name='sum') self.assertEqual(formula_column.operation, 'sum') self.assertEqual(formula_column.mapping, '["a", "b", "c"]') self.assertEqual(formula_column.column_name, 'sum') self.assertEqual(getSiloColumnNames(self.silo.pk), ["sum"]) self.silo = Silo.objects.get(pk=self.silo.pk) self.assertEqual(getColToTypeDict(self.silo).get('sum'), 'float') try: lvs = LabelValueStore.objects.get(a="1", b="2", c="3", sum=6.0, read_id=-1, silo_id=self.silo.pk) lvs.delete() except LabelValueStore.DoesNotExist as e: self.assert_(False) try: lvs = LabelValueStore.objects.get(a="2", b="2", c="3.3", sum=7.3, read_id=-1, silo_id=self.silo.pk) lvs.delete() except LabelValueStore.DoesNotExist as e: self.assert_(False) try: lvs = LabelValueStore.objects.get(a="3", b="2", c="hi", sum="Error", read_id=-1, silo_id=self.silo.pk) lvs.delete() except LabelValueStore.DoesNotExist as e: self.assert_(False)
def tableDashboard(request,id=0): """ DEMO only survey for Tola survey for use with public talks about TolaData Share URL to survey and data will be aggregated in tolatables then imported to this dashboard :return: """ # get all countires countries = Country.objects.all() get_table = Silo.objects.get(id=id) try: get_fields = UniqueFields.objects.get(silo__id=id) except UniqueFields.DoesNotExist: get_fields = None doc = LabelValueStore.objects(silo_id=id).to_json() data = ast.literal_eval(doc) from collections import Counter latitude = [] longitude = [] lat_long = {} country = {} # each field needs a count of unique answers if get_fields is None and data: get_fields = {} # loop over the field names only for field in data[0]: # to-do move these into models exclude_string = ['read_id','silo_id','_id','formhub/uuid','meta/instanceID','user_assigned_id','meta/instanceName','create_date'] map_lat_string = ['lat', 'latitude', 'x'] map_long_string = ['long','lng','longitude', 'y'] map_country_string = ['countries','country'] if field not in exclude_string: get_fields[field] = {} # create a dict with fields as the key cnt = Counter() answers = [] # a list for the answers for idx, col in enumerate(data): # get_fields[field][idx] = col[field] # append list of all answers try: answers.append(col[field]) # append list of answers except KeyError: answers.append(None) # no answer # loop and count each unique answer for a in answers: # if answer has a dict in it loop over that if isinstance(a, dict): for x in a: cnt[x] +=1 else: cnt[a] += 1 unique_count = cnt # append unique answer plus count to dict get_fields[field][idx] = unique_count.most_common() from django.utils.safestring import SafeString temp = [] temp2 = [] for letter, count in get_fields[field][idx]: temp.append(str(letter)) temp2.append(str(count)) get_fields[field][idx] = {"label": SafeString(temp), "count": SafeString(temp2)} # if a latitude string add it to the map list if field in map_lat_string: for idx, col in enumerate(data): latitude.append(col[field]) # if a longitude string add it to the map list if field in map_long_string: for idx, col in enumerate(data): longitude.append(col[field]) # merge lat and long lat_long = dict(zip(latitude,longitude)) else: get_fields = None columns = ast.literal_eval(get_table.columns) return render(request, "reports/table_dashboard.html", {'data': data, 'get_table': get_table, 'countries': countries, 'get_fields': get_fields, 'lat_long': lat_long, 'columns': columns})
def saveDataToSilo(silo, data): """ This saves data to the silo Keyword arguments: silo -- the silo object, which is meta data for its labe_value_store data -- a python list of dictionaries. stored in MONGODB """ unique_fields = silo.unique_fields.all() skipped_rows = set() enc = "latin-1" for counter, row in enumerate(data): # reseting filter_criteria for each row filter_criteria = {} for uf in unique_fields: try: filter_criteria.update({str(uf.name): str(row[uf.name])}) except KeyError: # when this excpetion occurs, it means that the col identified # as the unique_col is not present in the fetched dataset pass # if filter_criteria is set, then update it with current silo_id # else set filter_criteria to some non-existent key and value so # that it triggers a DoesNotExist exception in order to create a new # document instead of updating an existing one. if filter_criteria: filter_criteria.update({'silo_id': silo.id}) else: filter_criteria.update( {"nonexistentkey": "NEVER0101010101010NEVER"}) try: lvs = LabelValueStore.objects.get(**filter_criteria) #print("updating") setattr(lvs, "edit_date", timezone.now()) except LabelValueStore.DoesNotExist as e: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() #print("creating") except LabelValueStore.MultipleObjectsReturned as e: for k, v in filter_criteria.iteritems(): skipped_rows.add("%s=%s" % (k, v)) #print("skipping") continue counter = 0 # set the fields in the curernt document and save it for key, val in row.iteritems(): if key == "" or key is None or key == "silo_id": continue elif key == "id" or key == "_id": key = "user_assigned_id" elif key == "edit_date": key = "editted_date" elif key == "create_date": key = "created_date" if type(val) == str or type(val) == unicode: val = smart_str(val, strings_only=True) setattr(lvs, key.replace(".", "_").replace("$", "USD"), val) counter += 1 lvs.save() combineColumns(silo.pk) res = {"skipped_rows": skipped_rows, "num_rows": counter} return res
def saveDataToSilo(silo, data): """ This saves data to the silo Keyword arguments: silo -- the silo object, which is meta data for its labe_value_store data -- a python list of dictionaries. stored in MONGODB """ unique_fields = silo.unique_fields.all() skipped_rows = set() enc = "latin-1" for counter, row in enumerate(data): # reseting filter_criteria for each row filter_criteria = {} for uf in unique_fields: try: filter_criteria.update({str(uf.name): str(row[uf.name])}) except KeyError: # when this excpetion occurs, it means that the col identified # as the unique_col is not present in the fetched dataset pass # if filter_criteria is set, then update it with current silo_id # else set filter_criteria to some non-existent key and value so # that it triggers a DoesNotExist exception in order to create a new # document instead of updating an existing one. if filter_criteria: filter_criteria.update({"silo_id": silo.id}) else: filter_criteria.update({"nonexistentkey": "NEVER0101010101010NEVER"}) try: lvs = LabelValueStore.objects.get(**filter_criteria) # print("updating") setattr(lvs, "edit_date", timezone.now()) except LabelValueStore.DoesNotExist as e: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() # print("creating") except LabelValueStore.MultipleObjectsReturned as e: for k, v in filter_criteria.iteritems(): skipped_rows.add("%s=%s" % (k, v)) # print("skipping") continue counter = 0 # set the fields in the curernt document and save it for key, val in row.iteritems(): if key == "" or key is None or key == "silo_id": continue elif key == "id" or key == "_id": key = "user_assigned_id" elif key == "edit_date": key = "editted_date" elif key == "create_date": key = "created_date" if type(val) == str or type(val) == unicode: val = smart_str(val, strings_only=True) setattr(lvs, key.replace(".", "_").replace("$", "USD"), val) counter += 1 lvs.save() combineColumns(silo.pk) res = {"skipped_rows": skipped_rows, "num_rows": counter} return res
def save_data_to_silo(silo, data, read=-1, user=None): """ This saves data to the silo Keyword arguments: silo -- the silo object, which is meta data for its labe_value_store data -- a python list of dictionaries. stored in MONGODB read -- the read object, optional only for backwards compatability user -- an optional parameter to use if its necessary to retrieve from ThirdPartyTokens """ try: if read.type.read_type == "ONA" and user: saveOnaDataToSilo(silo, data, read, user) read_source_id = read.id except AttributeError: read_source_id = read unique_fields = silo.unique_fields.all() skipped_rows = set() counter = 0 keys = [] try: keys = data.fieldnames keys = [cleanKey(key) for key in keys] except AttributeError as e: logger.warning(e) for counter, row in enumerate(data): # resetting filter_criteria for each row filter_criteria = {} for uf in unique_fields: try: filter_criteria.update({str(uf.name): str(row[uf.name])}) except KeyError as e: # when this excpetion occurs, it means that the col identified # as the unique_col is not present in the fetched dataset logger.info(e) # if filter_criteria is set, then update it with current silo_id # else set filter_criteria to some non-existent key and value so # that it triggers a DoesNotExist exception in order to create a new # document instead of updating an existing one. if filter_criteria: filter_criteria.update({'silo_id': silo.id}) try: lvs = LabelValueStore.objects.get(**filter_criteria) setattr(lvs, "edit_date", timezone.now()) lvs.read_id = read_source_id except LabelValueStore.DoesNotExist: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() lvs.read_id = read_source_id except LabelValueStore.MultipleObjectsReturned: for k, v in filter_criteria.iteritems(): skipped_rows.add("{}={}".format(str(k), str(v))) continue else: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() lvs.read_id = read_source_id row = clean_data_obj(row) for key, val in row.iteritems(): if not isinstance(key, tuple): if key not in keys: keys.append(key) setattr(lvs, key, val) counter += 1 lvs = calculateFormulaCell(lvs, silo) lvs.save() addColsToSilo(silo, keys) res = {"skipped_rows": skipped_rows, "num_rows": counter} return res
def table_dashboard(request, id=0): """ Dynamic Dashboard report based on Table Data find lat and long fields :return: """ # get all countires countries = Country.objects.all() get_table = Silo.objects.get(id=id) try: get_init_fields = UniqueFields.objects.get(silo__id=id) except UniqueFields.DoesNotExist: get_init_fields = None doc = LabelValueStore.objects(silo_id=id).to_json() try: data = ast.literal_eval(doc) except ValueError: data = json.loads(doc) latitude = [] longitude = [] lat_long = {} country = {} # each field needs a count of unique answers if get_init_fields is None and data: get_fields = {} # loop over the field names only for field in data[0]: # to-do move these into models exclude_string = [ 'read_id', 'silo_id', '_id', 'formhub/uuid', 'meta/instanceID', 'user_assigned_id', 'meta/instanceName', 'create_date' ] map_lat_string = ['lat', 'latitude', 'x'] map_long_string = ['long', 'lng', 'longitude', 'y'] map_country_string = ['countries', 'country'] map_location = ['location', 'coordinated', 'coord'] if field not in exclude_string: # create a dict with fields as the key get_fields[field] = {} cnt = Counter() answers = [] # a list for the answers for idx, col in enumerate(data): # get_fields[field][idx] = col[field] # append list of all answers try: # append list of answers answers.append(col[field]) except KeyError: answers.append(None) # no answer # loop and count each unique answer """ TODO: Needs to be moved to a recursive function that checks each level for list or dict and continues to parse until a count can be found """ for a in answers: # if answer has a dict or list count each element if isinstance(a, dict): for x in a.keys(): cnt[x] += 1 elif isinstance(a, list): # if a list within a list for x in a: if isinstance(x, dict): for y in x.keys(): cnt[y] += 1 else: cnt[x] += 1 else: cnt[a] += 1 unique_count = cnt # append unique answer plus count to dict get_fields[field][idx] = unique_count.most_common() temp = [] temp2 = [] for letter, count in get_fields[field][idx]: if isinstance(letter, unicode): temp.append( SafeString( unicodedata.normalize('NFKD', letter).encode( 'ascii', 'ignore'))) else: temp.append(letter) temp2.append(count) try: find_none = temp.index(None) temp[find_none] = 'None' except ValueError: temp = temp get_fields[field][idx] = { "label": SafeString(temp), "count": SafeString(temp2) } # if a latitude string add it to the map list if field in map_lat_string: for idx, col in enumerate(data): latitude.append(col[field]) # if a longitude string add it to the map list if field in map_long_string: for idx, col in enumerate(data): longitude.append(col[field]) # if a longitude string add it to the map list if field in map_location: for idx, col in enumerate(data): latitude.append( itertools.islice(col[field].iteritems(), 3, 4)) longitude.append( itertools.islice(col[field].iteritems(), 4, 5)) # if a country name if field in map_country_string: for idx, col in enumerate(data): country_obj = Country.objects.get(country=col[field]) longitude.append(country_obj.longitude) latitude.append(country_obj.latitude) country.append(country_obj.country) # merge lat and long lat_long = dict(zip(latitude, longitude)) else: get_fields = None try: columns = ast.literal_eval(get_table.columns) except ValueError: columns = json.loads(get_table.columns) return render( request, "reports/table_dashboard.html", { 'data': data, 'get_table': get_table, 'countries': countries, 'get_fields': get_fields, 'lat_long': lat_long, 'country': country, 'columns': columns })
def saveDataToSilo(silo, data, read=-1, user=None): """ This saves data to the silo Keyword arguments: silo -- the silo object, which is meta data for its labe_value_store data -- a python list of dictionaries. stored in MONGODB read -- the read object, optional only for backwards compatability user -- an optional parameter to use if its necessary to retrieve from ThirdPartyTokens """ try: if read.type.read_type == "ONA" and user: saveOnaDataToSilo(silo, data, read, user) read_source_id = read.id except AttributeError as e: read_source_id = read unique_fields = silo.unique_fields.all() skipped_rows = set() keys = [] try: keys = data.fieldnames keys = [cleanKey(key) for key in keys] except AttributeError: pass fieldToType = getColToTypeDict(silo) for counter, row in enumerate(data): # reseting filter_criteria for each row filter_criteria = {} for uf in unique_fields: try: filter_criteria.update({str(uf.name): str(row[uf.name])}) except KeyError as e: # when this excpetion occurs, it means that the col identified # as the unique_col is not present in the fetched dataset logger.info(e) # if filter_criteria is set, then update it with current silo_id # else set filter_criteria to some non-existent key and value so # that it triggers a DoesNotExist exception in order to create a new # document instead of updating an existing one. if filter_criteria: filter_criteria.update({'silo_id': silo.id}) # else: # filter_criteria.update({"nonexistentkey":"NEVER0101010101010NEVER"}) try: lvs = LabelValueStore.objects.get(**filter_criteria) #print("updating") setattr(lvs, "edit_date", timezone.now()) lvs.read_id = read_source_id except LabelValueStore.DoesNotExist as e: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() lvs.read_id = read_source_id except LabelValueStore.MultipleObjectsReturned as e: for k, v in filter_criteria.iteritems(): skipped_rows.add("%s=%s" % (str(k), str(v))) #print("skipping") continue else: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() lvs.read_id = read_source_id counter = 0 row = cleanDataObj(row, silo) for key, val in row.iteritems(): # if key == "" or key is None or key == "silo_id": continue # elif key == "id" or key == "_id": key = "user_assigned_id" # elif key == "edit_date": key = "editted_date" # elif key == "create_date": key = "created_date" # if type(val) == str or type(val) == unicode: # val = smart_str(val, strings_only=True).strip() # if fieldToType.get(key, 'string') == 'int': # try: # val = int(val) # except ValueError as e: # continue # if fieldToType.get(key, 'string') == 'double': # try: # val = float(val) # except ValueError as e: # continue if not isinstance(key, tuple): if key not in keys: keys.append(key) setattr(lvs, key, val) counter += 1 lvs = calculateFormulaCell(lvs, silo) lvs.save() addColsToSilo(silo, keys) res = {"skipped_rows": skipped_rows, "num_rows": counter} return res