def handle(self, *args, **options): skip_row = False frequency = options['frequency'] if frequency != "daily" and frequency != "weekly": return self.stdout.write("Frequency argument can either be 'daily' or 'weekly'") silos = Silo.objects.filter(unique_fields__isnull=False, reads__autopull=True, reads__autopull_frequency__isnull=False, reads__autopull_frequency = frequency).distinct() read_type = ReadType.objects.get(read_type="JSON") for silo in silos: reads = silo.reads.filter(type=read_type.pk) for read in reads: ona_token = ThirdPartyTokens.objects.get(user=silo.owner.pk, name="ONA") response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token}) data = json.loads(response.content) # import data into this silo num_rows = len(data) if num_rows == 0: continue counter = None #loop over data and insert create and edit dates and append to dict for counter, row in enumerate(data): skip_row = False #if the value of unique column is already in existing_silo_data then skip the row for unique_field in silo.unique_fields.all(): filter_criteria = {'silo_id': silo.pk, unique_field.name: row[unique_field.name]} if LabelValueStore.objects.filter(**filter_criteria).count() > 0: skip_row = True continue if skip_row == True: continue # at this point, the unique column value is not in existing data so append it. lvs = LabelValueStore() lvs.silo_id = silo.pk for new_label, new_value in row.iteritems(): if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date": setattr(lvs, new_label, new_value) lvs.create_date = timezone.now() result = lvs.save() if num_rows == (counter+1): combineColumns(silo.pk) self.stdout.write('Successfully fetched the READ_ID, "%s", from ONA' % read.pk)
def handle(self, *args, **options): silo = None read = None silo_id = options['silo_id'] username = options['username'] user = User.objects.get(username__exact=username) reads = Read.objects.filter(owner=user) try: silo = Silo.objects.get(pk=silo_id) except Silo.DoesNotExist: raise CommandError('Silo "%s" does not exist' % silo_id) for read_id in options['read_ids']: try: read = reads.filter(pk=read_id)[0] except Read.DoesNotExist: raise CommandError('Read "%s" does not exist for user, %s' % (read_id, user.username)) # Fetch the data from ONA ona_token = ThirdPartyTokens.objects.get(user=user.pk, name="ONA") response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token}) data = json.loads(response.content) # import data into this silo num_rows = len(data) if num_rows == 0: continue counter = None #loop over data and insert create and edit dates and append to dict for counter, row in enumerate(data): lvs = LabelValueStore() lvs.silo_id = silo.pk for new_label, new_value in row.iteritems(): if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date": setattr(lvs, new_label, new_value) lvs.create_date = timezone.now() result = lvs.save() if num_rows == (counter+1): combineColumns(silo_id) self.stdout.write('Successfully fetched the READ_ID, "%s", from database' % read_id)
def saveDataToSilo(silo, data): """ This saves data to the silo Keyword arguments: silo -- the silo object, which is meta data for its labe_value_store data -- a python list of dictionaries. stored in MONGODB """ unique_fields = silo.unique_fields.all() skipped_rows = set() enc = "latin-1" for counter, row in enumerate(data): # reseting filter_criteria for each row filter_criteria = {} for uf in unique_fields: try: filter_criteria.update({str(uf.name): str(row[uf.name])}) except KeyError: # when this excpetion occurs, it means that the col identified # as the unique_col is not present in the fetched dataset pass # if filter_criteria is set, then update it with current silo_id # else set filter_criteria to some non-existent key and value so # that it triggers a DoesNotExist exception in order to create a new # document instead of updating an existing one. if filter_criteria: filter_criteria.update({'silo_id': silo.id}) else: filter_criteria.update( {"nonexistentkey": "NEVER0101010101010NEVER"}) try: lvs = LabelValueStore.objects.get(**filter_criteria) #print("updating") setattr(lvs, "edit_date", timezone.now()) except LabelValueStore.DoesNotExist as e: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() #print("creating") except LabelValueStore.MultipleObjectsReturned as e: for k, v in filter_criteria.iteritems(): skipped_rows.add("%s=%s" % (k, v)) #print("skipping") continue counter = 0 # set the fields in the curernt document and save it for key, val in row.iteritems(): if key == "" or key is None or key == "silo_id": continue elif key == "id" or key == "_id": key = "user_assigned_id" elif key == "edit_date": key = "editted_date" elif key == "create_date": key = "created_date" if type(val) == str or type(val) == unicode: val = smart_str(val, strings_only=True) setattr(lvs, key.replace(".", "_").replace("$", "USD"), val) counter += 1 lvs.save() combineColumns(silo.pk) res = {"skipped_rows": skipped_rows, "num_rows": counter} return res
def saveDataToSilo(silo, data): """ This saves data to the silo Keyword arguments: silo -- the silo object, which is meta data for its labe_value_store data -- a python list of dictionaries. stored in MONGODB """ unique_fields = silo.unique_fields.all() skipped_rows = set() enc = "latin-1" for counter, row in enumerate(data): # reseting filter_criteria for each row filter_criteria = {} for uf in unique_fields: try: filter_criteria.update({str(uf.name): str(row[uf.name])}) except KeyError: # when this excpetion occurs, it means that the col identified # as the unique_col is not present in the fetched dataset pass # if filter_criteria is set, then update it with current silo_id # else set filter_criteria to some non-existent key and value so # that it triggers a DoesNotExist exception in order to create a new # document instead of updating an existing one. if filter_criteria: filter_criteria.update({"silo_id": silo.id}) else: filter_criteria.update({"nonexistentkey": "NEVER0101010101010NEVER"}) try: lvs = LabelValueStore.objects.get(**filter_criteria) # print("updating") setattr(lvs, "edit_date", timezone.now()) except LabelValueStore.DoesNotExist as e: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() # print("creating") except LabelValueStore.MultipleObjectsReturned as e: for k, v in filter_criteria.iteritems(): skipped_rows.add("%s=%s" % (k, v)) # print("skipping") continue counter = 0 # set the fields in the curernt document and save it for key, val in row.iteritems(): if key == "" or key is None or key == "silo_id": continue elif key == "id" or key == "_id": key = "user_assigned_id" elif key == "edit_date": key = "editted_date" elif key == "create_date": key = "created_date" if type(val) == str or type(val) == unicode: val = smart_str(val, strings_only=True) setattr(lvs, key.replace(".", "_").replace("$", "USD"), val) counter += 1 lvs.save() combineColumns(silo.pk) res = {"skipped_rows": skipped_rows, "num_rows": counter} return res
def save_data_to_silo(silo, data, read=-1, user=None): """ This saves data to the silo Keyword arguments: silo -- the silo object, which is meta data for its labe_value_store data -- a python list of dictionaries. stored in MONGODB read -- the read object, optional only for backwards compatability user -- an optional parameter to use if its necessary to retrieve from ThirdPartyTokens """ try: if read.type.read_type == "ONA" and user: saveOnaDataToSilo(silo, data, read, user) read_source_id = read.id except AttributeError: read_source_id = read unique_fields = silo.unique_fields.all() skipped_rows = set() counter = 0 keys = [] try: keys = data.fieldnames keys = [cleanKey(key) for key in keys] except AttributeError as e: logger.warning(e) for counter, row in enumerate(data): # resetting filter_criteria for each row filter_criteria = {} for uf in unique_fields: try: filter_criteria.update({str(uf.name): str(row[uf.name])}) except KeyError as e: # when this excpetion occurs, it means that the col identified # as the unique_col is not present in the fetched dataset logger.info(e) # if filter_criteria is set, then update it with current silo_id # else set filter_criteria to some non-existent key and value so # that it triggers a DoesNotExist exception in order to create a new # document instead of updating an existing one. if filter_criteria: filter_criteria.update({'silo_id': silo.id}) try: lvs = LabelValueStore.objects.get(**filter_criteria) setattr(lvs, "edit_date", timezone.now()) lvs.read_id = read_source_id except LabelValueStore.DoesNotExist: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() lvs.read_id = read_source_id except LabelValueStore.MultipleObjectsReturned: for k, v in filter_criteria.iteritems(): skipped_rows.add("{}={}".format(str(k), str(v))) continue else: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() lvs.read_id = read_source_id row = clean_data_obj(row) for key, val in row.iteritems(): if not isinstance(key, tuple): if key not in keys: keys.append(key) setattr(lvs, key, val) counter += 1 lvs = calculateFormulaCell(lvs, silo) lvs.save() addColsToSilo(silo, keys) res = {"skipped_rows": skipped_rows, "num_rows": counter} return res
def saveDataToSilo(silo, data, read=-1, user=None): """ This saves data to the silo Keyword arguments: silo -- the silo object, which is meta data for its labe_value_store data -- a python list of dictionaries. stored in MONGODB read -- the read object, optional only for backwards compatability user -- an optional parameter to use if its necessary to retrieve from ThirdPartyTokens """ try: if read.type.read_type == "ONA" and user: saveOnaDataToSilo(silo, data, read, user) read_source_id = read.id except AttributeError as e: read_source_id = read unique_fields = silo.unique_fields.all() skipped_rows = set() keys = [] try: keys = data.fieldnames keys = [cleanKey(key) for key in keys] except AttributeError: pass fieldToType = getColToTypeDict(silo) for counter, row in enumerate(data): # reseting filter_criteria for each row filter_criteria = {} for uf in unique_fields: try: filter_criteria.update({str(uf.name): str(row[uf.name])}) except KeyError as e: # when this excpetion occurs, it means that the col identified # as the unique_col is not present in the fetched dataset logger.info(e) # if filter_criteria is set, then update it with current silo_id # else set filter_criteria to some non-existent key and value so # that it triggers a DoesNotExist exception in order to create a new # document instead of updating an existing one. if filter_criteria: filter_criteria.update({'silo_id': silo.id}) # else: # filter_criteria.update({"nonexistentkey":"NEVER0101010101010NEVER"}) try: lvs = LabelValueStore.objects.get(**filter_criteria) #print("updating") setattr(lvs, "edit_date", timezone.now()) lvs.read_id = read_source_id except LabelValueStore.DoesNotExist as e: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() lvs.read_id = read_source_id except LabelValueStore.MultipleObjectsReturned as e: for k, v in filter_criteria.iteritems(): skipped_rows.add("%s=%s" % (str(k), str(v))) #print("skipping") continue else: lvs = LabelValueStore() lvs.silo_id = silo.pk lvs.create_date = timezone.now() lvs.read_id = read_source_id counter = 0 row = cleanDataObj(row, silo) for key, val in row.iteritems(): # if key == "" or key is None or key == "silo_id": continue # elif key == "id" or key == "_id": key = "user_assigned_id" # elif key == "edit_date": key = "editted_date" # elif key == "create_date": key = "created_date" # if type(val) == str or type(val) == unicode: # val = smart_str(val, strings_only=True).strip() # if fieldToType.get(key, 'string') == 'int': # try: # val = int(val) # except ValueError as e: # continue # if fieldToType.get(key, 'string') == 'double': # try: # val = float(val) # except ValueError as e: # continue if not isinstance(key, tuple): if key not in keys: keys.append(key) setattr(lvs, key, val) counter += 1 lvs = calculateFormulaCell(lvs, silo) lvs.save() addColsToSilo(silo, keys) res = {"skipped_rows": skipped_rows, "num_rows": counter} return res