Пример #1
0
 def test_get_editColumnOrder(self):
     addColsToSilo(self.silo, ['a', 'b', 'c', 'd', 'e', 'f'])
     hideSiloColumns(self.silo, ['b', 'e'])
     self.silo.rows_to_hide = json.dumps([
         {
             "logic": "BLANKCHAR",
             "operation": "",
             "number": "",
             "conditional": "---",
         },
         {
             "logic": "AND",
             "operation": "empty",
             "number": "",
             "conditional": ["a", "b"],
         },
         {
             "logic": "OR",
             "operation": "empty",
             "number": "",
             "conditional": ["c", "d"],
         }
     ])
     self.silo.save()
     request = self.factory.get(self.url)
     request.user = self.user
     request._dont_enforce_csrf_checks = True
     response = addColumnFilter(request, self.silo.pk)
     self.assertEqual(response.status_code, 200)
Пример #2
0
 def test_post_editColumnOrder(self):
     addColsToSilo(self.silo, ['a', 'b', 'c', 'd', 'e', 'f'])
     hideSiloColumns(self.silo, ['b', 'e'])
     cols_ordered = ['c', 'f', 'a', 'd']
     response = self.client.post('/edit_column_order/%s/' %
                                 str(self.silo.pk),
                                 data={'columns': cols_ordered})
     self.assertEqual(response.status_code, 302)
     self.assertEqual(getSiloColumnNames(self.silo.pk),
                      ['c', 'f', 'a', 'd'])
     response = self.client.post('/edit_column_order/0/',
                                 data={'columns': cols_ordered})
     self.assertEqual(response.status_code, 302)
Пример #3
0
def copy_from_cache(cache_silo, silo, read):
    cached_data = db.label_value_store.find({
        'silo_id': cache_silo.id})
    for record in cached_data:
        record.pop('_id')
        record['silo_id'] = silo.id
        record['read_id'] = read.id
        db.label_value_store.insert(record)

    cols = []
    col_types = {}
    for col in json.loads(cache_silo.columns):
        cols.append(col['name'])
        col_types[col['name']] = col['type']
    addColsToSilo(silo, cols, col_types)
Пример #4
0
def getCommCareDataHelper(conf):
    """
    Use fetch and request CommCareData to store all of the case data

    domain -- the domain name used for a commcare project
    auth -- the authorization required
    auth_header -- True = use Header, False = use Digest authorization
    total_cases -- total cases to get
    silo - silo to put the data into
    read -- read that the data is apart of
    """

    # CommCare has a max limit of 50 for report downloads
    if conf.download_type == 'commcare_report':
        record_limit = 50
    else:
        record_limit = 100

    # replace the record limit and fetch the data
    base_url = conf.base_url.replace('limit=1', 'limit=' + str(record_limit))
    if conf.download_type == 'commcare_form':
        cache_obj = CommCareCache.objects.get(form_id=conf.form_id)
        base_url += '&received_on_start=' + \
            cache_obj.last_updated.isoformat()[:-6]
    data_raw = fetchCommCareData(conf.to_dict(), base_url, 0, record_limit)
    data_collects = data_raw.apply_async()
    data_retrieval = [v.get() for v in data_collects]
    columns = set()
    for data in data_retrieval:
        columns = columns.union(data)

    # Add new columns to the list of current columns this is slower because
    # Order has to be maintained (2n instead of n)
    silo = Silo.objects.get(pk=conf.silo_id)
    addColsToSilo(silo, columns)
    return (messages.SUCCESS, "CommCare data imported successfully", columns)
Пример #5
0
def getCommCareCaseData(domain, auth, auth_header, total_cases, silo, read):
    """
    Use fetch and request CommCareData to store all of the case data

    domain -- the domain name used for a commcare project
    auth -- the authorization required
    auth_header -- True = use Header, False = use Digest authorization
    total_cases -- total cases to get
    silo - silo to put the data into
    read -- read that the data is apart of
    """

    RECORDS_PER_REQUEST = 100
    base_url = "https://www.commcarehq.org/a/"+ domain\
                +"/api/v0.5/case/?format=JSON&limit="+str(RECORDS_PER_REQUEST)

    data_raw = fetchCommCareData(base_url, auth, auth_header,\
                    0, total_cases, RECORDS_PER_REQUEST, silo.id, read.id)
    data_collects = data_raw.apply_async()
    data_retrieval = [v.get() for v in data_collects]
    columns = set()
    for data in data_retrieval:
        columns = columns.union(data)
    #correct the columns
    for column in columns:
        if "." in column:
            columns.remove(column)
            columns.add(column.replace(".", "_"))
        if "$" in column:
            columns.remove(column)
            columns.add(column.replace("$", "USD"))
    try:
        columns.remove("")
    except KeyError as e:
        pass
    try:
        columns.remove("silo_id")
    except KeyError as e:
        pass
    try:
        columns.remove("read_id")
    except KeyError as e:
        pass
    try:
        columns.remove("id")
        columns.add("user_assigned_id")
    except KeyError as e:
        pass
    try:
        columns.remove("_id")
        columns.add("user_assigned_id")
    except KeyError as e:
        pass
    try:
        columns.remove("edit_date")
        columns.add("editted_date")
    except KeyError as e:
        pass
    try:
        columns.remove("create_date")
        columns.add("created_date")
    except KeyError as e:
        pass

    #add new columns to the list of current columns this is slower because
    #order has to be maintained (2n instead of n)
    addColsToSilo(silo, columns)
    hideSiloColumns(silo, ["case_id"])

    return (messages.SUCCESS, "CommCare cases imported successfully", columns)
Пример #6
0
def import_from_gsheet_helper(user,
                              silo_id,
                              silo_name,
                              spreadsheet_id,
                              sheet_id=None,
                              partialcomplete=False):
    msgs = []

    if spreadsheet_id is None:
        msgs.append({
            "level": messages.ERROR,
            "msg": "A Google Spreadsheet is not selected to import data from.",
            "redirect": reverse('index')
        })

    credential_obj = _get_credential_object(user)
    if not isinstance(credential_obj, OAuth2Credentials):
        msgs.append(credential_obj)
        return msgs

    defaults = {
        "name": silo_name,
        "description": "Google Sheet Import",
        "public": False,
        "owner": user
    }
    silo, created = Silo.objects.get_or_create(
        pk=None if silo_id == '0' else silo_id, defaults=defaults)
    msgs.append({"silo_id": silo.id})

    service = _get_authorized_service(credential_obj)

    # fetch the google spreadsheet metadata
    try:
        spreadsheet = service.spreadsheets().get(
            spreadsheetId=spreadsheet_id).execute()
    except HttpAccessTokenRefreshError:
        return [_get_credential_object(user, True)]
    except Exception as e:
        error = json.loads(e.content).get("error")
        msg = "%s: %s" % (error.get("status"), error.get("message"))
        msgs.append({"level": messages.ERROR, "msg": msg})
        return msgs

    spreadsheet_name = spreadsheet.get("properties", {}).get("title", "")

    gsheet_read = _get_or_create_read("GSheet Import", spreadsheet_name,
                                      "Google Spreadsheet Import",
                                      spreadsheet_id, user, silo)
    sheet_name = "Sheet1"
    if sheet_id:
        gsheet_read.gsheet_id = sheet_id
        gsheet_read.save()

    if gsheet_read.gsheet_id:
        sheets = spreadsheet.get("sheets", None)
        for sheet in sheets:
            properties = sheet.get("properties", None)
            if properties:
                if str(properties.get("sheetId")) == str(
                        gsheet_read.gsheet_id):
                    sheet_name = properties.get("title")

    headers = []
    data = None

    combine_cols = False
    # Fetch data from gsheet
    try:
        result = service.spreadsheets().values().get(
            spreadsheetId=spreadsheet_id, range=sheet_name).execute()
        data = result.get("values", [])
    except Exception as e:
        logger.error(e)
        msgs.append({
            "level": messages.ERROR,
            "msg": "Something went wrong 22: %s" % e,
            "redirect": None
        })
        return msgs

    unique_fields = silo.unique_fields.all()
    skipped_rows = set()
    lvss = []
    for r, row in enumerate(data):
        if r == 0:
            headers = []
            for header in row:
                header = cleanKey(header)
                headers.append(header)

            addColsToSilo(silo, headers)
            continue
        filter_criteria = {}

        # build filter_criteria if unique field(s) have been setup for this silo
        for unique_field in unique_fields:
            try:
                filter_criteria.update(
                    {unique_field.name: row[headers.index(unique_field.name)]})
            except KeyError:
                pass
            except ValueError:
                pass
        if filter_criteria:
            filter_criteria.update({'silo_id': silo.id})
            # if a row is found, then fetch and update it
            # if no row is found then create a new one
            # if multiple rows are found then skip b/c not sure which one to update
            try:
                lvs = LabelValueStore.objects.get(**filter_criteria)
                lvs.edit_date = timezone.now()
            except LabelValueStore.DoesNotExist as e:
                lvs = LabelValueStore()
            except LabelValueStore.MultipleObjectsReturned as e:
                for k, v in filter_criteria.iteritems():
                    skipped_rows.add("%s=%s" % (k, v))
                continue
        else:
            lvs = LabelValueStore()

        for c, col in enumerate(row):
            try:
                key = headers[c]
            except IndexError as e:
                #this happens when a column header is missing gsheet
                continue
            if key == "" or key is None or key == "silo_id": continue
            elif key == "id" or key == "_id": key = "user_assigned_id"
            elif key == "edit_date": key = "editted_date"
            elif key == "create_date": key = "created_date"
            val = smart_str(row[c], strings_only=True)
            key = smart_str(key)
            val = val.strip()
            setattr(lvs, key.replace(".", "_").replace("$", "USD"), val)
        lvs.silo_id = silo.id
        lvs.read_id = gsheet_read.id
        lvs.create_date = timezone.now()
        lvs = calculateFormulaCell(lvs, silo)
        if partialcomplete:
            lvss.append(lvs)
        else:
            lvs.save()

    if skipped_rows:
        msgs.append({
            "level":
            messages.WARNING,
            "msg":
            "Skipped updating/adding records where %s because there are already multiple records."
            % ",".join(str(s) for s in skipped_rows)
        })

    msgs.append({"level": messages.SUCCESS, "msg": "Operation successful"})
    if partialcomplete:
        return (lvss, msgs)
    return msgs
Пример #7
0
def import_from_gsheet_helper(user,
                              silo_id,
                              silo_name,
                              spreadsheet_id,
                              sheet_id=None,
                              partialcomplete=False):
    msgs = []
    if spreadsheet_id is None:
        msgs.append({
            'level': messages.ERROR,
            'msg': 'A Google Spreadsheet is not selected to import data from.',
            'redirect': reverse('index')
        })

    credential_obj = _get_credential_object(user)
    if not isinstance(credential_obj, OAuth2Credentials):
        msgs.append(credential_obj)
        return msgs

    defaults = {
        'name': silo_name,
        'description': 'Google Sheet Import',
        'public': False,
        'owner': user
    }
    silo, created = Silo.objects.get_or_create(
        pk=None if silo_id == '0' else silo_id, defaults=defaults)
    msgs.append({'silo_id': silo.id})

    metadata, error = _get_gsheet_metadata(credential_obj, spreadsheet_id,
                                           user)
    if error:
        if 'credential' in error:
            return error.get('credential')
        else:
            msgs.append(error)
            return msgs

    spreadsheet_name = metadata.get('properties', {}).get('title', '')
    gsheet_read = _get_or_create_read('GSheet Import', spreadsheet_name,
                                      'Google Spreadsheet Import',
                                      spreadsheet_id, user, silo)
    if sheet_id:
        gsheet_read.gsheet_id = sheet_id
        gsheet_read.save()

    sheet_name = 'Sheet1'
    if gsheet_read.gsheet_id:
        sheets = metadata.get('sheets', None)
        for sheet in sheets:
            properties = sheet.get('properties', None)
            if properties:
                if str(properties.get('sheetId')) == str(
                        gsheet_read.gsheet_id):
                    sheet_name = properties.get('title')

    values, error = _fetch_data_gsheet(credential_obj, spreadsheet_id,
                                       sheet_name)
    if error:
        msgs.append(error)
        return msgs

    unique_fields = silo.unique_fields.all()
    skipped_rows = set()
    headers = []
    lvss = []

    # get the column names
    header = values.pop(0)
    for h in header:
        h = cleanKey(h)
        headers.append(h)

    data = _convert_gsheet_data(headers, values)

    for r, row in enumerate(data):
        filter_criteria = {}
        # build filter_criteria if unique field(s) have been setup for this silo
        for uf in unique_fields:
            try:
                if str(row[uf.name]).isdigit():
                    filter_criteria.update({str(uf.name): int(row[uf.name])})
                else:
                    filter_criteria.update({str(uf.name): str(row[uf.name])})
            except AttributeError as e:
                logger.warning(e)
        if filter_criteria:
            filter_criteria.update({'silo_id': silo.id})
            try:
                lvs = LabelValueStore.objects.get(**filter_criteria)
                lvs.edit_date = timezone.now()
            except LabelValueStore.DoesNotExist:
                lvs = LabelValueStore()
            except LabelValueStore.MultipleObjectsReturned:
                for k, v in filter_criteria.iteritems():
                    skipped_rows.add('{}={}'.format(k, v))
                continue
        else:
            lvs = LabelValueStore()

        # add the key and values to the document
        row = clean_data_obj(row)
        for key, val in row.iteritems():
            val = smart_str(val, strings_only=True)
            setattr(lvs, key, val)

        lvs.silo_id = silo.id
        lvs.read_id = gsheet_read.id
        lvs.create_date = timezone.now()
        lvs = calculateFormulaCell(lvs, silo)
        if partialcomplete:
            lvss.append(lvs)
        else:
            lvs.save()
    addColsToSilo(silo, headers)

    if skipped_rows:
        skipped_str = ','.join(str(s) for s in skipped_rows)
        msg = 'Skipped updating/adding records where {} because there are ' \
              'already multiple records.'.format(skipped_str)
        msgs.append({'level': messages.WARNING, 'msg': msg})

    msgs.append({'level': messages.SUCCESS, 'msg': 'Operation successful'})
    if partialcomplete:
        return lvss, msgs
    return msgs
Пример #8
0
    def handle(self, *args, **options):

        frequency = options['frequency']
        if frequency != "daily" and frequency != "weekly":
            return self.stdout.write(
                "Frequency argument can either be 'daily' or 'weekly'")

        silos = Silo.objects.filter(
            reads__autopull_frequency__isnull=False,
            reads__autopull_frequency=frequency,
            reads__type__read_type="CommCare").distinct()
        commcare_type_id = ReadType.objects.get(read_type='CommCare').id
        for silo in silos:
            for read in silo.reads.all():
                if read.type.read_type != 'CommCare':
                    print 'skipping read', read
                    continue
                conf = CommCareImportConfig(silo_id=silo.id,
                                            read_id=read.id,
                                            tables_user_id=silo.owner.pk,
                                            base_url=read.read_url,
                                            update=True)

                try:
                    conf.set_auth_header()
                except Exception as e:
                    self.stdout.write(
                        'No commcare api key for silo %s, read "%s"' %
                        (silo.pk, read.pk))
                    continue

                # get/build metadata based on download_type
                if 'case' in conf.base_url:
                    last_data_retrieved = str(getNewestDataDate(silo.id))[:10]
                    conf.base_url += "&date_modified_start=" + last_data_retrieved
                    conf.download_type = 'case'
                    conf.record_count = get_commcare_record_count(conf)
                elif 'configurablereportdata' in conf.base_url:
                    conf.download_type = 'commcare_report'
                    url_parts = conf.base_url.split('/')
                    conf.project = url_parts[4]
                    conf.report_id = url_parts[8]
                    conf.record_count = get_commcare_record_count(conf)
                elif '/form/' in conf.base_url:
                    cache_obj = CommCareCache.objects.get(
                        form_id=read.resource_id)
                    conf.download_type = 'commcare_form'
                    conf.project = cache_obj.project
                    conf.form_name = cache_obj.form_name
                    conf.form_id = cache_obj.form_id
                    conf.base_url = read.read_url + '&received_on_start=' + cache_obj.last_updated.isoformat(
                    )[:-6]
                    conf.record_count = get_commcare_record_count(conf)

                response = requests.get(conf.base_url,
                                        headers=conf.auth_header)
                if response.status_code == 401:
                    commcare_token.delete()
                    self.stdout.write(
                        'Incorrect commcare api key for silo %s, read %s' %
                        (silo.pk, read.pk))
                    continue
                elif response.status_code != 200:
                    self.stdout.write(
                        'Failure retrieving commcare data for silo %s, read %s'
                        % (silo.pk, read.pk))
                    continue

                if '/form/' in conf.base_url:
                    client = MongoClient(settings.MONGODB_URI)
                    db = client.get_database(settings.TOLATABLES_MONGODB_NAME)
                    db.label_value_store.delete_many({'read_id': read.id})
                    cache_silo = Silo.objects.get(pk=cache_obj.silo.id)
                    copy_from_cache(cache_silo, silo, read)

                #Now call the update data function in commcare tasks
                conf.base_url = read.read_url
                conf.use_token = True

                data_raw = fetchCommCareData(conf.to_dict(), conf.base_url, 0,
                                             50)
                data_collects = data_raw.apply_async()
                new_colnames = set()
                for colset in [set(v.get()) for v in data_collects]:
                    new_colnames.update(colset)
                cleaned_colnames = [cleanKey(name) for name in new_colnames]
                addColsToSilo(silo, cleaned_colnames)

                self.stdout.write(
                    'Successfully fetched silo %s, read %s from CommCare' %
                    (silo.pk, read.pk))
Пример #9
0
    def handle(self, *args, **options):

        disabled_projects = []
        # Get a list of projects to import data from
        if options['project']:
            project = [options['project']]
        else:
            read_urls = Read.objects.filter(type__read_type='CommCare') \
                .values_list('read_url', flat=True)
            projects = set()
            for read_url in read_urls:
                projects.add(read_url.split('/')[4])

        base_url = 'https://www.commcarehq.org/a/%s/api/v0.5/form/' \
            '?limit=1&received_on_start=%s'

        for project in projects:
            if project in disabled_projects:
                continue
            print 'Downloading ' + project

            try:
                cache_obj = CommCareCache.objects.filter(project=project) \
                    .first()
                last_updated = cache_obj.last_updated.isoformat()[:-6]
            except (IndexError, AttributeError):
                last_updated = '1980-01-01'

            conf = CommCareImportConfig(
                project=project,
                tables_user_id=User.objects.get(email='*****@*****.**',
                                                username='******').id,
                base_url=base_url % (project, last_updated),
                download_type='commcare_form',
                update=True,
                use_token=True,
                for_cache=True)

            try:
                conf.set_auth_header()
            except ThirdPartyTokens.DoesNotExist:
                self.stdout.write('CommCare Token is not in the DB')
                continue

            response = requests.get(conf.base_url, headers=conf.auth_header)
            if response.status_code == 401:
                commcare_token = ThirdPartyTokens.objects.get(token=conf.token)
                commcare_token.delete()
                self.stdout.write('Incorrect CommCare api key for project %s' %
                                  project)
                continue
            elif response.status_code != 200:
                self.stdout.write(
                    'Failure retrieving CommCare data for project %s' %
                    project)

            conf.record_count = get_commcare_record_count(conf)
            if conf.record_count == 0:
                self.stdout.write(
                    'Fetching of record counts failed. Skipping project %s' %
                    project)
                continue

            # Trigger the data retrieval and retrieve the column names
            url = conf.base_url.replace('limit=1', 'limit=100')
            data_raw = fetchCommCareData(conf.to_dict(), url, 0, 100)
            data_collects = data_raw.apply_async()
            data_retrieval = [v.get() for v in data_collects]

            # Add columns to the Silo and max date to Cache
            silo_ref = {}
            save_date = datetime.datetime(1980, 1, 1).replace(tzinfo=pytz.UTC)
            for column_dict, max_date in data_retrieval:
                for silo_id in column_dict:
                    cleaned_colnames = [
                        cleanKey(name) for name in column_dict[silo_id]
                    ]
                    if silo_id in silo_ref:
                        silo = silo_ref[silo_id]
                        addColsToSilo(silo, cleaned_colnames)
                    else:
                        try:
                            silo = Silo.objects.get(pk=silo_id)
                            silo_ref[silo_id] = silo
                            addColsToSilo(silo, cleaned_colnames)
                        except ValueError as e:
                            print 'ERROR: Null value for silo_id'
                            print 'Conf object: ', conf
                            print 'Column dict', column_dict
                            print 'Error message: ', e

                save_date = max(
                    parse(max_date).replace(tzinfo=pytz.UTC), save_date)

            for cache_obj in CommCareCache.objects.filter(project=project):
                cache_obj.last_updated = save_date
                cache_obj.save()

            self.stdout.write('Successfully fetched project %s from CommCare' %
                              conf.project)