示例#1
0
文件: tests.py 项目: Sohojoe/damon
 def score_mc(d_ak, **kwargs):
     d = d_ak['data']
     ak_ = d_ak['anskey']
     keys = tools.getkeys(ak_, 'Row', 'Core')
     correct = ak_.core_col['Correct']
     ak = ['Cols', dict(zip(keys, correct))]
     d.score_mc(ak, **kwargs)
     d = core.Damon(d.score_mc_out, 'datadict', 'whole', verbose=None)
     return d.whole
示例#2
0
    def get_next_item(self, resps):
        """Get next item(s) to deliver to student."""

        resps_ = pd.read_json(resps)
        stud = resps_.columns.values[0]

              

        if stud in self.persons:
            df = self.persons[stud]
            resps_['iter'] = df.iloc[-1, 1] + 1
            
            df = df.append(resps_)
        else:
            df = resps_
            df['iter'] = 0
            self.persons[stud] = df

        data = df.loc[:, stud].to_frame(stud).transpose()
        d = core.Damon(data, 'dataframe', 'RCD_dicts_whole',
                       validchars=self.validchars,
                       verbose=None)

        # Score multiple choice if available
        try:
            d.score_mc(anskey=self.bank)
        except damon1.utils.score_mc_Error:
            pass

        # Standardize if available
        try:
            d.standardize(std_params=self.bank)
        except damon1.utils.standardize_Error:
            pass

        print('\ndf=\n', df)
        print('\nd=\n', d)



        eng = self.engine
        getattr(d, eng[0].__name__)(**eng[1])


        print('d.rasch_out=\n', d.rasch_out)
        sys.exit()
示例#3
0
文件: tests.py 项目: Sohojoe/damon
    def coord(data, **kwargs):
        d = data
        bankfile = TEMP_PATH + 'ibank.pkl'

        if kwargs['anchors'] is not None:
            try:
                os.remove(bankfile)
            except:
                pass
            kwargs_ = kwargs.copy()
            kwargs_['anchors'] = None
            d.coord(**kwargs_)
            d.bank(bankfile)
        d.coord(**kwargs)
        d = core.Damon(d.coord_out['fac0coord'],
                       'datadict',
                       'RCD',
                       verbose=None)
        return d.coredata
示例#4
0
文件: tests.py 项目: Sohojoe/damon
    def rasch(data, **kwargs):
        d = data
        bankfile = TEMP_PATH + 'ibank.pkl'

        # Build bank first
        if kwargs['anchors'] is not None:
            try:
                os.remove(bankfile)
            except:
                pass
            kwargs_ = kwargs.copy()
            kwargs_['anchors'] = None
            d.rasch(**kwargs_)
            d.bank(bankfile)

        d.rasch(**kwargs)
        d = core.Damon(d.rasch_out['estimates'],
                       'datadict',
                       'whole',
                       verbose=None)
        return d.whole
示例#5
0
文件: tests.py 项目: Sohojoe/damon
 def extract_valid(data, **kwargs):
     d = data
     d.extract_valid(**kwargs)
     d = core.Damon(d.extract_valid_out, 'datadict', 'whole', verbose=None)
     return d.whole
示例#6
0
文件: tests.py 项目: Sohojoe/damon
 def merge_info(data_info, target_axis, get_validchars):
     d = data_info['data']
     info = data_info['anskey']
     d.merge_info(info, target_axis, get_validchars)
     d = core.Damon(d.merge_info_out, 'datadict', 'whole', verbose=None)
     return d.whole
示例#7
0
def read_winsteps(data):
    """Convert Winsteps control file in Damon object

    Returns
    -------
        {'data':Damon object,
         'anskey':answer key
         }

    Comments
    --------
        This function was a quick and dirty effort to
        read a Winsteps control file for a particular case.
        It probably won't work on your files without some
        editing.  Save a copy and edit it to fit your situation.

    Arguments
    ---------
        "data" is a path name to a Winsteps control file that
        contains both specifications and data.

    """

    clean_lines = []

    # Get clean list of lines, capturing some variables
    with open(data, 'rb') as f:
        lines = f.readlines()

        for i, line in enumerate(lines):
            line = line.replace('"', "").strip()
            clean_lines.append(line)

            if 'Item1' in line:
                start_resp = int(line[line.find('Item1') + 6:]) - 1

            if 'Name1' in line:
                start_name = int(line[line.find('Name1') + 6:]) - 1

            if 'Codes' in line:
                validchars_ = line[line.find('Codes') + 6:]

            if 'Key' in line:
                key = line[line.find('Key') + 4:]

            if '&END' in line:
                start_items = i + 1

            if 'END NAMES' in line:
                stop_items = i
                start_data = i + 1

    # Get varianbles
    items = clean_lines[start_items:stop_items]
    validchars = ['All', list(validchars_)]
    anskey = dict(zip(items, list(key)))

    data_lines = clean_lines[start_data:]

    persons = []
    person_resps = []
    nitems = len(items)

    # Read the data file, parse out persons
    for line in data_lines:
        x = line[start_name:start_resp].strip()
        person = x.replace(' ', '')  # Remove gaps in person ids (temp)
        persons.append(person)

        resps = list(line[start_resp:start_resp + nitems])
        person_resps.append(resps)

    # Convert into arrays
    persons.insert(0, 'id')
    items.insert(0, 'id')

    rowlabels = np.array(persons)[:, np.newaxis]
    collabels = np.array(items)[np.newaxis, :]
    coredata = np.array(person_resps)

    # Build datadict for Damon
    datadict = {
        'rowlabels': rowlabels,
        'collabels': collabels,
        'coredata': coredata,
        'nheaders4rows': 1,
        'key4rows': 0,
        'rowkeytype': 'S60',
        'nheaders4cols': 1,
        'key4cols': 0,
        'colkeytype': 'S60',
        'validchars': validchars,
        'nanval': '-999',
    }

    d = dmn.Damon(datadict, 'datadict', verbose=True)

    return {'data': d, 'anskey': anskey}
示例#8
0
文件: rasch_0.py 项目: Sohojoe/damon
data = dmn.Damon(
    data=
    'a_data_rasch_0_example.csv',  # [<array, file, [file list], datadict, Damon object, hd5 file>  => data in format specified by format_=]
    format_=
    'textfile',  # [<'textfile', ['textfiles'],'array','datadict','datadict_link','datadict_whole','Damon','hd5','pickle'>]
    workformat=
    'RCD_dicts_whole',  # [<'RCD','whole','RCD_whole','RCD_dicts','RCD_dicts_whole'>]
    validchars=[
        'All', [0, 1], 'Num'
    ],  # [<None,['All',[valid chars],<'Num','Guess','SkipCheck',omitted>],['Cols',{'ID1':['a','b'],'ID2':['All'],'ID3':['1.2 -- 3.5'],'ID4':['0 -- '],...}]>]
    nheaders4rows=1,  # [number of columns to hold row labels]
    key4rows=0,  # [<None, nth column from left which holds row keys>]
    rowkeytype=int,  # [<None, type of row keys>]
    nheaders4cols=1,  # [number of rows to hold column labels]
    key4cols=0,  # [<None, nth row from top which holds column keys>]
    colkeytype=int,  # [<None, type of column keys>]
    check_dups=
    'warn',  # [<None,'warn','stop'> => response to duplicate row/col keys]
    dtype=[
        object, 3
    ],  #[object, None], # [[type of 'whole' matrix, <None, int number of decimals>], e.g. ['S60',8],[object,None] ]
    nanval=
    -999,  # [Value to which non-numeric/invalid characters should be converted.]
    missingchars=None,  # [<None, [list of elements to make missing]>]
    miss4headers=
    None,  # [<None, [[list of elements to make missing in headers]>]
    recode=
    None,  # [<None,{0:[[slice(StartRow,EndRow),slice(StartCol,EndCol)],{RecodeFrom:RecodeTo,...}],...}>]
    cols2left=
    None,  # [<None, [ordered list of col keys, to shift to left and use as rowlabels]>]
    selectrange=None,  # [<None,[slice(StartRow,EndRow),slice(StartCol,EndCol)]>]
    delimiter=
    ',',  # [<None, character to delimit input file columns (e.g. ',' for .csv and '  ' for .txt tab-delimited files)]
    pytables=
    None,  # [<None,'filename.hd5'> => Name of .hd5 file to hold Damon outputs]
    verbose=True,  # [<None, True> => report method calls]
)
示例#9
0
文件: rasch_1.py 项目: Sohojoe/damon
# looks to the validchars parameter to figure out the rating scale.
vc = {}
for item in range(1, ncols + 1):
    if item <= ncols / 2.0:
        vc[item] = [0, 1]
    else:
        vc[item] = [0, 1, 2]
validchars = ['Cols', vc, 'Num']

# Load dataset using Damon. The missing parameters go to their defaults.
data = dmn.Damon(data = 'a_data_rasch_1_example.csv',    # [<array, file, [file list], datadict, Damon object, hd5 file>  => data in format specified by format_=]
                  format_ = 'textfile',    # [<'textfile', ['textfiles'],'array','datadict','datadict_link','datadict_whole','Damon','hd5','pickle'>]
                  workformat = 'RCD_dicts_whole',   # [<'RCD','whole','RCD_whole','RCD_dicts','RCD_dicts_whole'>]
                  validchars = validchars,   # [<None,['All',[valid chars],<'Num','Guess','SkipCheck',omitted>],['Cols',{'ID1':['a','b'],'ID2':['All'],'ID3':['1.2 -- 3.5'],'ID4':['0 -- '],...}]>]
                  nheaders4rows = 1,  # [number of columns to hold row labels]
                  key4rows = 0,   # [<None, nth column from left which holds row keys>]
                  rowkeytype = int,     # [<None, type of row keys>]
                  nheaders4cols = 2,  # [number of rows to hold column labels]
                  key4cols = 0, # [<None, nth row from top which holds column keys>]
                  colkeytype = int
                  )

# Analyze with Rasch model. Note the groups parameter.
data.rasch(groups = {'row':1},    # [<None, {'row':int row of group labels}, ['key', {'group0':['i1', i2'],...}], ['index', {'group0':[0, 1],...}]> => identify groups]
              anchors = None,   # [<None, {'Bank':<pickle file>, 'row_ents':[<None,'All',row entity list>], 'col_ents':[<None,'All',col entity list>]}> ]
              runspecs = [0.0001,20],  # [<[stop_when_change, max_iteration]> => iteration stopping conditions ]
              minvar = 0.001,  # [<decimal> => minimum row/col variance allowed during iteration]
              maxchange = 10,  # [<+num> => maximum change allowed per iteration]
              )

示例#10
0
def build_strat_table(loaded,
                      item,
                      raw_score='RawScore',
                      group='Sex',
                      strata='all_scores'):
    "Build ability stratified table of score counts by group"

    # Get scores, raw scores, groups
    d = loaded
    scores = d.core_col[item]
    sum_scores = d.core_col[raw_score]
    groups = d.rl_col[group]
    unique_raws = np.unique(sum_scores[sum_scores != d.nanval])

    # Associate raw scores with strata
    if strata == 'all_scores':
        interval = 1
        strata = np.arange(len(unique_raws))
    else:
        interval = int(len(unique_raws) / float(strata))
        strata = np.arange(strata)

    # Strata bins are of equal length, except the bottom bin which captures the remainder.
    strat_vals = np.zeros(np.shape(unique_raws))
    svals = np.repeat(strata, interval)
    strat_vals[-len(svals):] = svals
    strat_lookup = dict(zip(unique_raws, strat_vals))

    # Get stratum for each person
    person_strat = np.zeros(np.shape(sum_scores))

    for score in unique_raws:
        person_strat[sum_scores == score] = strat_lookup[score]

    # Get rating categories
    cats = np.unique(scores[scores != d.nanval])

    # Build stratum and group arrays
    groups_ = np.unique(groups)
    stratum = np.repeat(strata, len(groups_))
    group = np.tile(groups_, len(strata))

    # Labels
    corner = np.array([['ID', 'Stratum', 'Group']])
    collabels = np.append(corner, np.array([cats.astype(int)]), axis=1)
    rowlabels = np.zeros((len(stratum) + 1, 3), dtype='S20')
    rowlabels[0, :] = corner
    rowlabels[1:, 0] = np.arange(len(stratum))
    rowlabels[1:, 1] = stratum
    rowlabels[1:, 2] = group

    # Build counts table
    core = np.zeros((len(stratum), len(cats)))

    for row, strat in enumerate(stratum):
        for col, cat in enumerate(cats):
            gr = group[row]
            core[row, col] = count_cats(person_strat, groups, scores, strat,
                                        gr, cat)

    # Build Damon object
    counts = {
        'rowlabels': rowlabels,
        'collabels': collabels,
        'coredata': core,
        'key4rows': 0,
        'rowkeytype': int,
        'key4cols': 0,
        'colkeytype': 'S60',
        'nanval': d.nanval,
        'validchars': ['All', ['All'], 'Num']
    }

    counts = dmn.Damon(counts, 'datadict', 'RCD_dicts_whole', verbose=None)

    # Check that each stratum has sufficient counts, > 1
    for strat in strata:
        cats = counts.extract(counts,
                              getrows={
                                  'Get': 'NoneExcept',
                                  'Labels': 'Stratum',
                                  'Rows': [strat]
                              })
        if np.sum(cats['coredata']) <= 1:
            exc = 'Insufficient data for one of the strata for an item.'
            raise dif_stats_Error(exc)

    return counts
示例#11
0
def dif_stats(
        filename,  # [<'my/file.txt',...> => name of scored data file]
        student_id='Student_ID',  # [<'Student_ID', ...> => student id column label]
        group=[
            'Sex', {
                'focal': 0,
                'ref': 1
            }
        ],  # [<e.g.'Sex', {'focal':'female', 'ref':'male'}]> => column label with assignment to focal and reference]
        raw_score='RawScore',  # [<'RawScore',...> => raw score column label]
        items='All',  # [<'All', ['item1', 'item3',...]> => items for which to get stats]
        stats='All',  # [<'All', [see list in docs]> => desired statistics]
        strata=(
            'all_scores',
            4),  # [<'all_scores', int> => number of raw score strata to apply]
        getrows=None,  # [<None, {'Get':_,'Labels':_,'Rows':_}> => select rows using extract() syntax]
        getcols=None,  # [<None, {'Get':_,'Labels':_,'Cols':_}> => select cols using extract() syntax]
        delimiter='\t',  # [<',', '\t'> => column delimiter]
):
    "Calculate DIF stats for each in a range of items"

    # Load data
    d = load_scores(filename=filename,
                    getrows=getrows,
                    getcols=getcols,
                    labelcols=[student_id, group[0]],
                    key4rows=[student_id, 'S60', 'warn_dups'],
                    delimiter=delimiter)

    if items == 'All':
        items = dmnt.getkeys(d, 'Col', 'Core', 'Auto', None)
        items = items[items != raw_score]
    else:
        items = np.array(items)

    if stats == 'All':
        stats = [
            'MH_alpha', 'MH_dif', 'MH_d-dif', 'MH_var', 'MH_d-var', 'MH_z',
            'MH_pval', 'MH_chisq', 'MH_chisq_pval', 'M_dif', 'M_var', 'M_z',
            'M_pval', 'M_chisq', 'M_chisq_pval', 'SMD_dif', 'SMD_var', 'SMD_z',
            'SMD_pval', 'SMD_chisq', 'SMD_chisq_pval', 'SD', 'SMD/SD', 'Flag',
            'Counts'
        ]

    if 'Flag' in stats:
        flag_stats = [
            'MH_d-dif', 'MH_var', 'MH_pval', 'SMD_dif', 'SD', 'SMD/SD',
            'M_chisq_pval'
        ]
        for stat in flag_stats:
            if stat not in stats:
                stats.append(stat)

    if 'SMD/SD' in stats:
        smd_sd_stats = ['SMD_dif', 'SD']
        for stat in smd_sd_stats:
            if stat not in stats:
                stats.append(stat)

    if 'Counts' in stats:
        count_stats = [
            'Count_Ref',
            'Count_Focal',
            'Count_All',
        ]
        for stat in count_stats:
            if stat not in stats:
                stats.insert(0, stat)
        stats.remove('Counts')

    # Initialize DIF table
    corner = np.array([['Item', 'N_Cats']])
    collabels = np.append(corner, np.array([stats]), axis=1)
    rowlabels = np.zeros((len(items) + 1, 2), dtype='S60')
    rowlabels[0, :] = corner[0]
    rowlabels[1:, 0] = np.array(items)
    core = np.zeros((len(items), len(stats)))

    # Get stats for each item
    for i, item in enumerate(items):
        try:
            tab = build_strat_table(loaded=d,
                                    item=item,
                                    raw_score=raw_score,
                                    group=group[0],
                                    strata=strata[0])
        except (damon1.utils.Damon_Error, dif_stats_Error):
            # Try with backup strata parameter
            try:
                tab = build_strat_table(loaded=d,
                                        item=item,
                                        raw_score=raw_score,
                                        group=group[0],
                                        strata=strata[1])
            except (damon1.utils.Damon_Error, dif_stats_Error):
                print(
                    'Warning in tools.dif_stats(): Unable to build a '
                    'stratification table for: '
                    'stratum=', strata, 'item=', item)
                core[i, :] = d.nanval
                continue

        ncats = np.size(tab.coredata, axis=1)
        continuity_correction = True if ncats == 2 else False
        rowlabels[i + 1, 1] = ncats

        # Flag needed DIF functions
        run_dif_MH = False
        MH_stats = []
        for stat in stats:
            if 'MH_' in stat and ncats <= 2:
                MH_stats.append(stat)
                run_dif_MH = True

        run_dif_M = False
        M_stats = []
        for stat in stats:
            if 'M_' in stat:
                M_stats.append(stat)
                run_dif_M = True

        run_dif_smd = False
        smd_stats = []
        for stat in stats:
            if 'SMD' in stat and ncats > 2:
                smd_stats.append(stat)
                run_dif_smd = True

        run_sd = False
        for stat in stats:
            if 'SD' in stat:
                run_sd = True

        run_counts = False
        for stat in stats:
            if 'Count' in stat:
                run_counts = True

        # Get item standard deviation
        stat_ = {}
        if run_sd is True:
            ivals = d.core_col[item]
            item_sd = np.std(ivals[ivals != d.nanval])
            stat_['SD'] = item_sd

        # Get counts
        if run_counts is True:
            ivals = d.core_col[item]
            gvals = d.rl_col[group[0]]
            valid = ivals != d.nanval
            stat_['Count_All'] = np.sum(valid)
            stat_['Count_Focal'] = np.sum((valid)
                                          & (gvals == str(group[1]['focal'])))
            stat_['Count_Ref'] = np.sum((valid)
                                        & (gvals == str(group[1]['ref'])))

        # Calculate MH DIF
        if run_dif_MH is True:
            dif_MH_out = dif_MH(tab, group[1]['focal'], group[1]['ref'])
            for stat in MH_stats:
                stat_[stat] = dif_MH_out[stat]

        # Calculate M DIF
        if run_dif_M is True:
            dif_M_out = dif_M(tab, group[1]['focal'], group[1]['ref'],
                              continuity_correction)
            for stat in M_stats:
                stat_[stat] = dif_M_out[stat]

        # Calculate SMD DIF
        if run_dif_smd is True:
            dif_smd_out = dif_smd(tab, group[1]['focal'], group[1]['ref'])

            for stat in smd_stats:
                if stat != 'SMD/SD':
                    stat_[stat] = dif_smd_out[stat]
                else:
                    stat_[stat] = dif_smd_out['SMD_dif'] / item_sd

        # Calculate DIF flag
        if 'Flag' in stats:
            if ncats == 2:
                d_dif = np.abs(stat_['MH_d-dif'])
                se = np.sqrt(stat_['MH_var'])
                pval = stat_['MH_pval']
                z_crit = (d_dif - 1.0) / se

                if d_dif > 1.5 and z_crit > 1.645:
                    stat_['Flag'] = 2
                elif d_dif < 1.0 or pval > 0.05:
                    stat_['Flag'] = 0
                else:
                    stat_['Flag'] = 1
            else:
                smd_sd = np.abs(stat_['SMD/SD'])
                p_val = stat_['M_chisq_pval']

                if smd_sd > 0.25 and p_val < 0.05:
                    stat_['Flag'] = 2
                else:
                    stat_['Flag'] = 0

        # Populate table
        for j, stat in enumerate(stats):
            if 'MH_' in stat and ncats > 2:
                core[i, j] = d.nanval
            elif 'SMD' in stat and ncats <= 2:
                core[i, j] = d.nanval
            else:
                core[i, j] = stat_[stat]

    # Build table
    tab_dict = {
        'rowlabels': rowlabels,
        'collabels': collabels,
        'coredata': core,
        'key4rows': 0,
        'rowkeytype': 'S60',
        'key4cols': 0,
        'colkeytype': 'S60',
        'nanval': d.nanval,
        'validchars': ['All', ['All'], 'Num']
    }

    tab_obj = dmn.Damon(tab_dict, 'datadict', 'RCD_dicts_whole', verbose=None)

    return tab_obj