Пример #1
0
    def load_broken_notifications(self):
        """Load broken notifications by type"""

        broken_notice_info = None
        for model_name, type_id_list in get_dv_object_to_object_id_map().items():
            #   Get a list of object ids for this model type
            #   that were not emailed--e.g. should show up
            #   on the notifications pages
            #
            msgt('check: %s %s' % (model_name, type_id_list))

            # If there's a selected_model_name, then only process that model
            #
            if self.selected_model_name is None:
                pass    # check all models
            elif model_name != self.selected_model_name:
                # We have a selected_model_name and this isn't it!
                continue

            model_user_id_list = UserNotification.objects.select_related('user'\
                                        ).filter(\
                                        object_type__in=type_id_list,
                                        ).values_list('objectid', 'user__id')

            if len(model_user_id_list) == 0:
                continue

            # retrieve the object ids only
            model_id_list = [x[0] for x in model_user_id_list]
            unique_id_list = list(set(model_id_list))

            # Next line is a hack - Need to upgrade Django apps
            #   to not use this method
            model_class = eval(model_name)
            if model_name in ['DvObject', 'DatasetVersion', 'FileMetadata']:
                existing_ids = model_class.objects.filter(id__in=unique_id_list\
                                            ).values_list('id', flat=True\
                                            ).distinct()
            else:
                existing_ids = model_class.objects.select_related('dvobject'\
                                    ).filter(dvobject__id__in=unique_id_list\
                                    ).values_list('dvobject__id', flat=True\
                                    ).distinct()

            if len(unique_id_list) == len(existing_ids):
                # Looks good!
                # No notifications where object no longer exists
                continue

            # Create a list of the missing ids
            #
            missing_ids = list(set(unique_id_list) - set(existing_ids))

            # Record broken notification info
            #
            broken_notice_info = BrokenNotificationInfo(\
                                    model_name,
                                    list(model_user_id_list),
                                    missing_ids)
            self.broken_info_list.append(broken_notice_info)
Пример #2
0
def download_file(url_to_file):
    """Download a Dataverse file and return the filename"""
    """
    import re
    d = r.headers['content-disposition']
    fname = re.findall("filename=(.+)", d)
    """
    file_handle, filepath = tempfile.mkstemp()

    msgt('download file: %s' % url_to_file)

    r = requests.get(url_to_file, stream=True)

    if r.status_code != 200:
        msg('bad status: %s' % r.status_code)
        if isfile(filepath):
            make_sure_file_deleted(filepath)
        return None, None

    file_ext = None
    content_dict = r.headers['content-disposition']
    #print 'content_dict', content_dict
    #fname = re.findall("filename=(.+)", content_dict)
    fname = format_file_name(content_dict)
    if fname:
        file_ext = fname.split('.')[-1].lower()
    print 'file_ext', file_ext

    with os.fdopen(file_handle, 'wb') as tmp:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                tmp.write(chunk)

    msg('File downloaded: %s' % filepath)
    return filepath, file_ext
Пример #3
0
    def show_diffs(self):
        """Debug: print out differences"""

        section = None
        for diff_obj in self.diff_list:
            if section != diff_obj.section:
                section = diff_obj.section
                msgt(section)
            diff_obj.show(show_section=False)
Пример #4
0
    def show_diffs(self):
        """Debug: print out differences"""

        section = None
        for diff_obj in self.diff_list:
            if section != diff_obj.section:
                section = diff_obj.section
                msgt(section)
            diff_obj.show(show_section=False)
Пример #5
0
    def show(self, show_section=True):
        """print info"""

        if show_section:
            msgt('%s: [%s] %s' % (self.section, self.attr_name, self.note))

        msg('attribute: %s' % self.attr_name)
        msg('\nnew: %s' % self.new_val)
        msg('\nold: %s' % self.old_val)
        dashes()
Пример #6
0
    def show(self, show_section=True):
        """print info"""

        if show_section:
            msgt('%s: [%s] %s' % (self.section, self.attr_name, self.note))

        msg('attribute: %s' % self.attr_name)
        msg('\nnew: %s' % self.new_val)
        msg('\nold: %s' % self.old_val)
        dashes()
Пример #7
0
    def show_elapsed_time(self, start_time):
        """From http://stackoverflow.com/questions/1345827/how-do-i-find-the-time-difference-between-two-datetime-objects-in-python"""
        time_now = int(time.time()) # epoch seconds

        days = divmod(time_now - start_time, 86400)  # days

        hours = divmod(days[1], 3600)  # hours
        minutes = divmod(hours[1],60)  # minutes
        seconds = minutes[1]  # seconds

        msgt('Elapsed time: %d day(s), %d hour(s), %d minute(s), %d second(s)' % (days[0],hours[0],minutes[0],seconds))
Пример #8
0
    def load_file_as_dict(fname):
        """Load a file as a python dict"""
        msgt('load file: %s' % fname)
        assert isfile(fname), '%s is not file' % fname

        fcontent = open(fname, 'r').read()
        fcontent = fcontent.replace('\r\n', '\\r\\n')

        dict_info = json.loads(fcontent, object_pairs_hook=OrderedDict)
        if dict_info.has_key('data'):
            return dict_info['data']

        return dict_info
Пример #9
0
    def load_file_as_dict(fname):
        """Load a file as a python dict"""
        msgt('load file: %s' % fname)
        assert isfile(fname), '%s is not file' % fname

        fcontent = open(fname, 'r').read()
        fcontent = fcontent.replace('\r\n', '\\r\\n')

        dict_info = json.loads(fcontent, object_pairs_hook=OrderedDict)
        if dict_info.has_key('data'):
            return dict_info['data']

        return dict_info
Пример #10
0
    def show_elapsed_time(self, start_time):
        """From http://stackoverflow.com/questions/1345827/how-do-i-find-the-time-difference-between-two-datetime-objects-in-python"""
        time_now = int(time.time())  # epoch seconds

        days = divmod(time_now - start_time, 86400)  # days

        hours = divmod(days[1], 3600)  # hours
        minutes = divmod(hours[1], 60)  # minutes
        seconds = minutes[1]  # seconds

        msgt(
            'Elapsed time: %d day(s), %d hour(s), %d minute(s), %d second(s)' %
            (days[0], hours[0], minutes[0], seconds))
Пример #11
0
    def write_files_to_mongo(self, **kwargs):
        """Write the saved dataset files to Mongo"""
        client = MongoClient()
        db = client.dataverse_database
        collection = db.datasets

        # look at kwargs
        #
        dataset_start_id = kwargs.get('dataset_start_id', 0)
        delete_all = kwargs.get('delete_all', False)

        # If appropriate, Delete existing records
        #
        if delete_all:
            msgt('Deleting current records')
            result = collection.delete_many({})
            msg('result.deleted_count: %s' % result.deleted_count)
            return

        fnames = os.listdir(self.output_dir)
        fnames = [
            x for x in fnames if x.endswith('.json') and x.startswith('ds_')
        ]
        fnames.sort()

        start_time = int(time.time())  # epoch seconds

        cnt = 0
        for fname in fnames:
            cnt += 1
            ds_id = int(fname.split('.')[0].split('_')[1])

            msgt('(%d) process dataset %s (%s)' % (cnt, ds_id, fname))

            if ds_id < dataset_start_id:
                msg('skipping it')
                continue

            content = open(join(self.output_dir, fname), 'r').read()
            content = update_json_text(content)
            content_doc = json.loads(content, object_pairs_hook=OrderedDict)
            content_doc['_id'] = ds_id
            content_doc['dtype'] = 'dataset'

            #doc_id = collection.insert_one(content_doc).inserted_id
            #doc_id = collection.save(content_doc)   #.inserted_id
            doc_id = collection.save(content_doc)
            if cnt % 500 == 0:
                self.show_elapsed_time(start_time)
        self.show_elapsed_time(start_time)
Пример #12
0
    def write_files_to_mongo(self, **kwargs):
        """Write the saved dataset files to Mongo"""
        client = MongoClient()
        db = client.dataverse_database
        collection = db.datasets

        # look at kwargs
        #
        dataset_start_id = kwargs.get('dataset_start_id', 0)
        delete_all = kwargs.get('delete_all', False)

        # If appropriate, Delete existing records
        #
        if delete_all:
            msgt('Deleting current records')
            result = collection.delete_many({})
            msg('result.deleted_count: %s' %  result.deleted_count)
            return

        fnames = os.listdir(self.output_dir)
        fnames = [x for x in fnames if x.endswith('.json') and x.startswith('ds_')]
        fnames.sort()

        start_time = int(time.time()) # epoch seconds

        cnt = 0
        for fname in fnames:
            cnt += 1
            ds_id = int(fname.split('.')[0].split('_')[1])

            msgt('(%d) process dataset %s (%s)' % (cnt, ds_id, fname))

            if ds_id < dataset_start_id:
                msg('skipping it')
                continue

            content = open(join(self.output_dir, fname), 'r').read()
            content = update_json_text(content)
            content_doc = json.loads(content, object_pairs_hook=OrderedDict)
            content_doc['_id'] = ds_id
            content_doc['dtype'] = 'dataset'

            #doc_id = collection.insert_one(content_doc).inserted_id
            #doc_id = collection.save(content_doc)   #.inserted_id
            doc_id = collection.save(content_doc)
            if cnt % 500 == 0:
                self.show_elapsed_time(start_time)
        self.show_elapsed_time(start_time)
Пример #13
0
    def test_search_mongo(self, term='law'):
        """Test searches"""
        client = MongoClient()
        db = client.dataverse_database
        collection = db.datasets

        # Compass:
        #
        # {"title": {$regex: "(^Law| Law | Law$)"}}
        """
        {"title":{"$regex":"(^Law| Law | Law$)","$options":"i"},"metadata_blocks.citation.dsDescription.dsDescriptionValue": {"$regex":"(^Law| Law | Law$)","$options":"i"}}
        """
        field_names = [
            'metadata_blocks.citation.dsDescription.dsDescriptionValue',
            #'title',
            #'metadata_blocks.citation.subject',                 #'metadata_blocks.citation.keyword.keywordValue',
        ]

        qlist = []
        for field_name in field_names:
            qlist.append({
                field_name: {
                    '$regex': '(^{0}|\s{0}\s|\s{0}$)'.format(term),
                    '$options': 'i'
                }
            })
        docs = collection.find({"$or": qlist})

        # -----------------------------
        #field_name = 'title'
        #field_name = 'metadata_blocks.citation.dsDescription.dsDescriptionValue'
        #docs = collection.find({field_name:{'$regex':'(^Law|\sLaw\s|\sLaw$)', '$options':'i'}})
        #docs = collection.find({'title':{'$regex':'(^Law|\sLaw\s|\sLaw$)', '$options':'i'}})
        from dict_map_util import DictMapUtil

        cnt = 0
        for doc in docs:
            cnt += 1
            msgt('(%d) %s' % (cnt, doc['title']))

            dmap_str = 'dmap.' + field_names[0]
            print 'dmap_str', dmap_str
            m = DictMapUtil(doc)
            import ipdb
            ipdb.set_trace()
            #print eval(dmap_str)
            break
            """
Пример #14
0
    def test_search_mongo(self, term='law'):
        """Test searches"""
        client = MongoClient()
        db = client.dataverse_database
        collection = db.datasets

        # Compass:
        #
        # {"title": {$regex: "(^Law| Law | Law$)"}}
        """
        {"title":{"$regex":"(^Law| Law | Law$)","$options":"i"},"metadata_blocks.citation.dsDescription.dsDescriptionValue": {"$regex":"(^Law| Law | Law$)","$options":"i"}}
        """
        field_names = [
                    'metadata_blocks.citation.dsDescription.dsDescriptionValue',
                    #'title',
                    #'metadata_blocks.citation.subject',                 #'metadata_blocks.citation.keyword.keywordValue',
                    ]

        qlist = []
        for field_name in field_names:
            qlist.append({ field_name: {'$regex':'(^{0}|\s{0}\s|\s{0}$)'.format(term),
                                        '$options':'i'}}
                        )
        docs = collection.find({"$or": qlist})


        # -----------------------------
        #field_name = 'title'
        #field_name = 'metadata_blocks.citation.dsDescription.dsDescriptionValue'
        #docs = collection.find({field_name:{'$regex':'(^Law|\sLaw\s|\sLaw$)', '$options':'i'}})
        #docs = collection.find({'title':{'$regex':'(^Law|\sLaw\s|\sLaw$)', '$options':'i'}})
        from dict_map_util import DictMapUtil

        cnt = 0
        for doc in docs:
            cnt += 1
            msgt('(%d) %s' % (cnt, doc['title']))

            dmap_str = 'dmap.' + field_names[0]
            print 'dmap_str', dmap_str
            m = DictMapUtil(doc)
            #print eval(dmap_str)
            break
            """
Пример #15
0
    def run_comparison(self):
        """Compare the two JSON datasets"""
        msgt(self.run_comparison.__doc__)

        # Run a quick check to see if the dicts are the same.
        #
        if cmp(self.old_ds, self.new_ds) == 0:
            msg('No differences!')
            return

        new_files_list = self.new_ds.pop('files', [])
        old_files_list = self.old_ds.pop('files', [])
        #print 'new_files_list', new_files_list

        self.compare_dicts(\
                    '',
                    self.new_ds,
                    self.old_ds)

        self.compare_file_lists(\
                    new_files_list,
                    old_files_list)
Пример #16
0
def download_file(url_to_file):
    """Download a Dataverse file and return the filename"""

    file_handle, filepath = tempfile.mkstemp()

    msgt('download file: %s' % url_to_file)

    r = requests.get(url_to_file, stream=True)

    if r.status_code != 200:
        msg('bad status: %s' % r.status_code)
        if isfile(filepath):
            make_sure_file_deleted(filepath)
        return None

    with os.fdopen(file_handle, 'wb') as tmp:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                tmp.write(chunk)

    msg('File downloaded: %s' % filepath)
    return filepath
Пример #17
0
    def run_comparison(self):
        """Compare the two JSON datasets"""
        msgt(self.run_comparison.__doc__)

        # Run a quick check to see if the dicts are the same.
        #
        if cmp(self.old_ds, self.new_ds) == 0:
            msg('No differences!')
            return

        new_files_list = self.new_ds.pop('files', [])
        old_files_list = self.old_ds.pop('files', [])
        #print 'new_files_list', new_files_list

        self.compare_dicts(\
                    '',
                    self.new_ds,
                    self.old_ds)

        self.compare_file_lists(\
                    new_files_list,
                    old_files_list)
Пример #18
0
def download_file(url_to_file):
    """Download a Dataverse file and return the filename"""


    """
    import re
    d = r.headers['content-disposition']
    fname = re.findall("filename=(.+)", d)
    """
    file_handle, filepath = tempfile.mkstemp()

    msgt('download file: %s' % url_to_file)

    r = requests.get(url_to_file, stream=True)

    if r.status_code != 200:
        msg('bad status: %s' % r.status_code)
        if isfile(filepath):
            make_sure_file_deleted(filepath)
        return None, None

    file_ext = None
    content_dict = r.headers['content-disposition']
    #print 'content_dict', content_dict
    #fname = re.findall("filename=(.+)", content_dict)
    fname = format_file_name(content_dict)
    if fname:
        file_ext = fname.split('.')[-1].lower()
    print 'file_ext', file_ext

    with os.fdopen(file_handle, 'wb') as tmp:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                tmp.write(chunk)

    msg('File downloaded: %s' % filepath)
    return filepath, file_ext
Пример #19
0
    def make_json_files(self):

        # Set publication status
        #
        filters = {}
        if self.published_only:
            filters.update(query_helper.get_is_published_filter_param())

        # Query for dataset ids
        #
        ds_id_query = Dataset.objects.filter(**filters\
                            ).annotate(ds_id=F('dvobject__id')\
                            ).values_list('ds_id', flat=True\
                            ).order_by('ds_id')

        # Iterate through dataset ids
        #
        #start_time = datetime.now()
        start_time = int(time.time()) # epoch seconds

        cnt = 0
        no_versions_found_list = [45900]

        for ds_id in ds_id_query:
            cnt += 1
            msgt('(%d) Checking dataset id %s' % (cnt, ds_id))
            if ds_id < self.dataset_start_id:
                msg('skipping...(start at dataset id: %d)' % self.dataset_start_id)
                continue

            # Create file name
            #
            fname = 'ds_%s.json' % (str(ds_id).zfill(8))
            full_fname = join(OUTPUT_DIR, fname)

            # Should we overwrite the existing file?
            #
            if isfile(full_fname) and not self.overwrite_existing_files:
                msg('skipping...file already exists')
                continue

            dataset_version = get_latest_dataset_version(ds_id)

            if dataset_version is None:
                msg("Could not find dataset_version!")
                no_versions_found_list.append(ds_id)
                continue

            dataset_as_json = DatasetSerializer(dataset_version).as_json()

            open(full_fname, 'w').write(json.dumps(dataset_as_json, indent=4))
            msg('File written: %s' % full_fname)

            if cnt % 500 == 0:
                self.show_elapsed_time(start_time)
            #if cnt > 10:
            #    self.show_elapsed_time(start_time)
            #    break

        self.show_elapsed_time(start_time)
        print 'no_versions_found_list: %s' % no_versions_found_list
Пример #20
0
    def load_broken_notifications(self):
        """Load broken notifications by type"""

        broken_notice_info = None
        for model_name, type_id_list in get_dv_object_to_object_id_map().items(
        ):
            #   Get a list of object ids for this model type
            #   that were not emailed--e.g. should show up
            #   on the notifications pages
            #
            msgt('check: %s %s' % (model_name, type_id_list))

            # If there's a selected_model_name, then only process that model
            #
            if self.selected_model_name is None:
                pass  # check all models
            elif model_name != self.selected_model_name:
                # We have a selected_model_name and this isn't it!
                continue

            model_user_id_list = UserNotification.objects.select_related('user'\
                                        ).filter(\
                                        object_type__in=type_id_list,
                                        ).values_list('objectid', 'user__id')

            if len(model_user_id_list) == 0:
                continue

            # retrieve the object ids only
            model_id_list = [x[0] for x in model_user_id_list]
            unique_id_list = list(set(model_id_list))

            # Next line is a hack - Need to upgrade Django apps
            #   to not use this method
            model_class = eval(model_name)
            if model_name in ['DvObject', 'DatasetVersion', 'FileMetadata']:
                existing_ids = model_class.objects.filter(id__in=unique_id_list\
                                            ).values_list('id', flat=True\
                                            ).distinct()
            else:
                existing_ids = model_class.objects.select_related('dvobject'\
                                    ).filter(dvobject__id__in=unique_id_list\
                                    ).values_list('dvobject__id', flat=True\
                                    ).distinct()

            if len(unique_id_list) == len(existing_ids):
                # Looks good!
                # No notifications where object no longer exists
                continue

            # Create a list of the missing ids
            #
            missing_ids = list(set(unique_id_list) - set(existing_ids))

            # Record broken notification info
            #
            broken_notice_info = BrokenNotificationInfo(\
                                    model_name,
                                    list(model_user_id_list),
                                    missing_ids)
            self.broken_info_list.append(broken_notice_info)
Пример #21
0
    def get_count_broken_notifications():
        """
        Query each object type and make sure notifications aren't broken

        Example map
        { 'DvObject': [1],
          'Dataverse': [2],
          'Dataset': [14, 11], 'DatasetVersion': [13, 12, 7],
          'DataFile': [9]
         }

        """
        broken_cnt = 0
        user_ids = []
        for model_name, type_id_list in get_dv_object_to_object_id_map().items(
        ):

            #   Get a list of object ids for this model type
            #   that were not emailed--e.g. should show up
            #   on the notifications pages
            #
            msgt('check: %s %s' % (model_name, type_id_list))
            model_user_id_list = UserNotification.objects.select_related('user'\
                                        ).filter(\
                                        object_type__in=type_id_list,
                                        ).values_list('objectid', 'user__id')

            model_id_list = [x[0] for x in model_user_id_list]

            user_ids += [x[1] for x in model_user_id_list]

            msg('model_id_list len: %s' % len(model_id_list))
            if len(model_id_list) == 0:
                continue

            # Used for later "bad notice" counts
            notice_counter = Counter(model_id_list)
            msg('notice_counter len: %s' % len(notice_counter))

            unique_id_list = list(set(model_id_list))
            msg('unique_id_list len: %s' % len(unique_id_list))

            # Need to upgrade apps files and not use this method
            model_class = eval(model_name)
            if model_name in ['DvObject', 'DatasetVersion', 'FileMetadata']:
                existing_ids = model_class.objects.filter(id__in=unique_id_list\
                                            ).values_list('id', flat=True\
                                            ).distinct()
            else:
                existing_ids = model_class.objects.select_related('dvobject'\
                                    ).filter(dvobject__id__in=unique_id_list\
                                    ).values_list('dvobject__id', flat=True\
                                    ).distinct()

            msg('existing_ids len: %s' % len(existing_ids))

            if len(unique_id_list) == len(existing_ids):
                # Looks good!
                continue

            missing_ids = list(set(unique_id_list) - set(existing_ids))
            for missing_id in missing_ids:
                broken_cnt += notice_counter.get(missing_id, 0)

        unique_user_ids = len(set(user_ids))

        return (broken_cnt, unique_user_ids)
Пример #22
0
    def make_json_files(self):

        # Set publication status
        #
        filters = {}
        if self.published_only:
            filters.update(query_helper.get_is_published_filter_param())

        # Query for dataset ids
        #
        ds_id_query = Dataset.objects.filter(**filters\
                            ).annotate(ds_id=F('dvobject__id')\
                            ).values_list('ds_id', flat=True\
                            ).order_by('ds_id')

        # Iterate through dataset ids
        #
        #start_time = datetime.now()
        start_time = int(time.time())  # epoch seconds

        cnt = 0
        no_versions_found_list = [45900]

        for ds_id in ds_id_query:
            cnt += 1
            msgt('(%d) Checking dataset id %s' % (cnt, ds_id))
            if ds_id < self.dataset_start_id:
                msg('skipping...(start at dataset id: %d)' %
                    self.dataset_start_id)
                continue

            # Create file name
            #
            fname = 'ds_%s.json' % (str(ds_id).zfill(8))
            full_fname = join(OUTPUT_DIR, fname)

            # Should we overwrite the existing file?
            #
            if isfile(full_fname) and not self.overwrite_existing_files:
                msg('skipping...file already exists')
                continue

            dataset_version = get_latest_dataset_version(ds_id)

            if dataset_version is None:
                msg("Could not find dataset_version!")
                no_versions_found_list.append(ds_id)
                continue

            dataset_as_json = DatasetSerializer(dataset_version).as_json()

            open(full_fname, 'w').write(json.dumps(dataset_as_json, indent=4))
            msg('File written: %s' % full_fname)

            if cnt % 500 == 0:
                self.show_elapsed_time(start_time)
            #if cnt > 10:
            #    self.show_elapsed_time(start_time)
            #    break

        self.show_elapsed_time(start_time)
        print 'no_versions_found_list: %s' % no_versions_found_list
Пример #23
0
    def get_data_rows(self, as_json=False, pretty_print=False):
        """
        Return information as JSON
            {
                "data" :
                    "total_row_count" : 117
                    "preview_row_count" : 50
                    "column_names" : ["Name", "Position", "Office"]
                    "rows" : [
                        [
                          "Tiger Nixon",
                          "System Architect",
                          "Edinburgh"
                        ],
                        [
                          "Garrett Winters",
                          "Accountant",
                          "Tokyo"
                        ]
                    ]
            }
        """
        if self.has_error():
            return None

        # Read the table
        try:
            if self.is_excel:
                msgt('Excel!')

                df = pd.read_excel(self.filepath)
                #error_bad_lines=False)
            else:
                df = pd.read_table(self.filepath)
        except Exception as ex_obj:
            msg(ex_obj)
            msgt('Failed to open file via pandas!')
            temp_file_helper.make_sure_file_deleted(self.filepath)
            if self.is_excel:
                self.add_error('Failed to open Excel file via pandas. [%s]' %
                               ex_obj)
            else:
                self.add_error(
                    '<b>Probably not a tabular file!</b> Failed to open file via pandas. [%s]'
                    % ex_obj)
            return None

        self.describe_as_html = df.describe().to_html()
        json_string = df.describe().to_json()
        self.describe_as_dict = json.loads(json_string,
                                           object_pairs_hook=OrderedDict)

        # Retrieve the columns
        self.column_names = df.columns.tolist()

        # Retrieve the rows
        self.data_rows = df[:self.num_preview_rows].values.tolist()

        #print 'rows', json.dumps(rows)

        # Format the response
        info_dict = OrderedDict()

        info_dict['total_row_count'] = len(df.index)
        info_dict['preview_row_count'] = len(self.data_rows)
        info_dict['column_names'] = self.column_names
        info_dict['rows'] = self.data_rows
        info_dict['describe_as_html'] = self.describe_as_html
        info_dict['describe_as_dict'] = self.describe_as_dict

        if as_json:
            if pretty_print:
                return json.dumps(info_dict, indent=4)
            return json.dumps(info_dict)

        return info_dict
Пример #24
0
    def get_count_broken_notifications():
        """
        Query each object type and make sure notifications aren't broken

        Example map
        { 'DvObject': [1],
          'Dataverse': [2],
          'Dataset': [14, 11], 'DatasetVersion': [13, 12, 7],
          'DataFile': [9]
         }

        """
        broken_cnt = 0
        user_ids = []
        for model_name, type_id_list in get_dv_object_to_object_id_map().items():

            #   Get a list of object ids for this model type
            #   that were not emailed--e.g. should show up
            #   on the notifications pages
            #
            msgt('check: %s %s' % (model_name, type_id_list))
            model_user_id_list = UserNotification.objects.select_related('user'\
                                        ).filter(\
                                        object_type__in=type_id_list,
                                        ).values_list('objectid', 'user__id')

            model_id_list = [x[0] for x in model_user_id_list]

            user_ids += [x[1] for x in model_user_id_list]

            msg('model_id_list len: %s' % len(model_id_list))
            if len(model_id_list) == 0:
                continue

            # Used for later "bad notice" counts
            notice_counter = Counter(model_id_list)
            msg('notice_counter len: %s' % len(notice_counter))

            unique_id_list = list(set(model_id_list))
            msg('unique_id_list len: %s' % len(unique_id_list))

            # Need to upgrade apps files and not use this method
            model_class = eval(model_name)
            if model_name in ['DvObject', 'DatasetVersion', 'FileMetadata']:
                existing_ids = model_class.objects.filter(id__in=unique_id_list\
                                            ).values_list('id', flat=True\
                                            ).distinct()
            else:
                existing_ids = model_class.objects.select_related('dvobject'\
                                    ).filter(dvobject__id__in=unique_id_list\
                                    ).values_list('dvobject__id', flat=True\
                                    ).distinct()

            msg('existing_ids len: %s' % len(existing_ids))

            if len(unique_id_list) == len(existing_ids):
                # Looks good!
                continue

            missing_ids = list(set(unique_id_list) - set(existing_ids))
            for missing_id in missing_ids:
                broken_cnt += notice_counter.get(missing_id, 0)

        unique_user_ids = len(set(user_ids))

        return (broken_cnt, unique_user_ids)