Пример #1
0
def notify_editor_inactivity():
    User = apps.get_model('user.User')

    last_week = timezone.now() - timedelta(days=7)
    editors = User.objects.editors()
    inactive_contributors = editors.annotate(
        paper_count=Count(
            'id',
            filter=Q(
                contributions__contribution_type=Contribution.SUBMITTER,
                contributions__created_date__gte=last_week
            )
        ),
        comment_count=Count(
            'id',
            filter=Q(
                contributions__contribution_type=Contribution.COMMENTER,
                contributions__created_date__gte=last_week
            )
        ),
        total_contributions=F('paper_count') + F('comment_count')
    ).filter(
        total_contributions__lt=3
    )

    logging = []
    for inactive_contributor in inactive_contributors.iterator():
        paper_count = inactive_contributor.paper_count
        comment_count = inactive_contributor.comment_count
        logging.append(
            (
                inactive_contributor.email,
                f'Paper count: {paper_count}',
                f'Comment count: {comment_count}'
            )
        )
        inactive_contributor.notify_inactivity(
            paper_count,
            comment_count
        )
    log_info(logging)
Пример #2
0
def radiologist_labels_cleaning(radiologist_labels_df):
    """
    Some rows have duplicates even once distilled to Normal vs. Abormal, in the absence of any distinguishing 
    features (such as an obvious indication that one is a correction) any rows with these accession numbers
    must be removed.
    """
    logging = []
    logging.append(len(radiologist_labels_df))
    radiologist_labels_df['is_normal_radiologist'] = radiologist_labels_df['classes'].apply(lambda x: \
                                                                                        1 if x[0] == 'Normal' else 0)
    radiologist_labels_df.drop(columns=['classes', 'dicom_elements.name'],
                               inplace=True)

    # gets distinct rows in cases where the row is perfectly duplicated
    radiologist_labels_df.drop_duplicates(keep='first', inplace=True)
    logging.append(len(radiologist_labels_df))

    # two of the same sample with different labels -> impossible to know which is the 'correct' label.
    radiologist_labels_df.drop_duplicates(subset=['dicom_elements.value'],
                                          keep=False,
                                          inplace=True)
    logging.append(len(radiologist_labels_df))

    radiologist_labels_df.set_index('dicom_elements.value', inplace=True)
    return radiologist_labels_df, logging
Пример #3
0
def hospital_records_cleaning(hospital_records_df):
    """
    Remove duplicates from hospital_records and model_outputs_df and standarsize the probabilistic classifications.
    """
    logging = []
    logging.append(len(hospital_records_df))
    # select single row for 'true duplicates'
    hospital_records_df.drop_duplicates(keep='first', inplace=True)
    logging.append(len(hospital_records_df))

    # removing complete duplicates
    hospital_records_df.drop_duplicates(subset=['Accession Number'],
                                        keep=False,
                                        inplace=True)  # has no effect
    logging.append(len(hospital_records_df))

    hospital_records_df.rename(columns={'Normal': 'Normal_hard_class'},
                               inplace=True)
    hospital_records_df.set_index('Accession Number', inplace=True)
    return hospital_records_df, logging
Пример #4
0
def model_outputs_cleaning(model_outputs_df):
    logging = []

    # do you even need this line? -> yes hospital_records_df has boolean output not probabilities
    model_outputs_df['Normal'] = model_outputs_df[['Abnormal','Normal']].apply(lambda x: x[1] if not pd.isnull(x[1]) \
                                                                                                else 1 - x[0], axis=1)
    # isolate useful columns.
    model_outputs_df = model_outputs_df[['Normal', 'accession_number']]

    logging.append(len(model_outputs_df))
    # salvage single row for 'true duplicates'
    model_outputs_df.drop_duplicates(keep='first', inplace=True)
    logging.append(len(model_outputs_df))

    model_outputs_dirty = model_outputs_df.sort_values('accession_number')

    # two of the same sample with different labels -> impossible to know which is the 'correct' label.
    model_outputs_df.drop_duplicates(subset=['accession_number'],
                                     keep=False,
                                     inplace=True)
    logging.append(len(model_outputs_df))

    model_outputs_df.set_index('accession_number', inplace=True)
    return model_outputs_df, logging, model_outputs_dirty
Пример #5
0
 def _logger(line):
     """
     Callback function to log embeddedqemu output.
     """
     log.append(line)
Пример #6
0
 def f(v):
     l.append(v)
Пример #7
0
 def _logger(line):
     """
     Callback function to log libvirtd output.
     """
     log.append(line)