Пример #1
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)
        # cleanup subjects
        self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)
        # self.data['ID'] = self.data['ID'].apply(lambda x: getID(x))
        if self.info is None:
            self.info = {'prefix': 'GDN', 'xsitype': 'opex:godin'}
        # Replace field headers
        self.fields = ['strenuous', 'moderate', 'light', 'total', 'sweat']
        fields = ['strenuous', 'moderate', 'light', 'total', 'sweat']
        cols = [
            'ID', 'Strenuous', 'Moderate', 'Light',
            'Totalleisureactivityscore', 'Sweat(1,2,or3)'
        ]
        ncols = ['SubjectID']
        renamecols = dict(list(zip(cols, ncols + fields)))

        df = self.data.iloc[:, 0:7]
        df.dropna(axis=0, how='any', thresh=5,
                  inplace=True)  # remove all empty rows
        df.fillna(999, inplace=True)  # replace any remaining na with 999
        df.rename(columns=renamecols, inplace=True)
        df.reindex()
        self.data = df
        # sort subjects
        self.sortSubjects('SubjectID')
        print('Data load complete')
Пример #2
0
    def __init__(self, *args, **kwargs):
        DataParser.__init__(self, *args, **kwargs)
        if self.data is None:
            raise ValueError('IPAQ Parser: Data not loaded')
        self.type = ''
        # if 'type' in kwargs:
        #     self.type = kwargs.get('type')
        #     self.fields = self.dbi.getFields(self.type)
        #     self.info = self.dbi.getInfo(self.type)
        # elif self.etype is not None:
        #     self.type = self.etype

        self.data.dropna(axis=1, how='all', inplace=True)
        # self.data = self.data.filter(regex="^[^78]")
        fields = [
            'sitting', 'walking_days', 'walking_time', 'moderate_days',
            'moderate_time', 'vigorous_days', 'vigorous_time', 'pa', 'mvpa'
        ]
        self.fields = fields
        self.data = self.data[['ID'] + fields]
        # self.data.columns = ['ID'] + fields
        self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)
        # self.data['ID'] = self.data['ID'].apply(lambda x: getID(x))
        self.sortSubjects('ID')
        print('Data load complete')

        if self.info is None:
            self.info = {'prefix': 'IP', 'xsitype': 'opex:ipaq'}
Пример #3
0
 def __init__(self, *args):
     DataParser.__init__(self, *args)
     fields = join(self.resource_dir, "dexa_fields.xlsx")
     #Replace field headers
     if access(fields, R_OK):
         self.fields = pd.read_excel(fields, header=0, sheetname='dexa_fields')
         df_header = pd.read_excel(fields, header=0, sheetname='dexa_header')
         self.header = df_header['concatenated'].tolist()
         self.data.columns = self.header
         print(("Loaded rows=", len(self.data['ID'])))
         #extract subject info
         df_subj = self.data.iloc[:,0:4]
         df_subj['SubjectID'] = df_subj.apply(lambda x: stripspaces(x, 'ID'), axis=1)
         #Split data into intervals
         self.intervals = {0:'BASELINE', 3:'MIDPOINT',6:'ENDPOINT', 9:'MID-FOLLOW-UP', 12:'FOLLOW-UP'}
         self.df = dict()
         for i,intval in list(self.intervals.items()):
             cols = [c for c in self.header if c.startswith(intval)]
             simplecols = []
             for col in cols:
                 cparts = col.split("_")
                 simplecols.append("_".join(cparts[1:]))
             self.df[i] = pd.concat([df_subj,self.data[cols]], axis=1)
             self.df[i].columns = df_subj.columns.tolist() + simplecols
             #self.df[i].reindex(df_subj.columns.tolist() + simplecols, fill_value='')
             if DEBUG:
                 msg ="Interval=%s data=%d" % (intval, len(self.df[i]))
                 print(msg)
         self.sortSubjects('SubjectID')
     else:
         raise ValueError("Cannot access fields file: %s" % fields)
Пример #4
0
 def __loadEfficiencydata(self, datafile):
     # Load efficiency data from single file
     effdata = pd.read_excel(datafile, sheet_name=0, header=1)
     effdata.drop(effdata.index[0], inplace=True)
     effdata['SubjectID'] = effdata.apply(lambda x: stripspaces(x, 'ID'),
                                          axis=1)
     logging.info("Loaded Efficiency: %d", len(effdata))
     return effdata
Пример #5
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)
        # cleanup subjects
        self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)
        # self.data['ID'] = self.data['ID'].apply(lambda x: getID(x))
        df = self.data

        # Replace field headers
        self.fields = ['current', 'past', 'total']
        columns = ['CurrentResult', 'PastResult', 'TotalResult']
        df.rename(columns=dict(list(zip(columns, self.fields))), inplace=True)
        self.data = df
        self.sortSubjects('ID')
        print('Data load complete')
Пример #6
0
def create_cutoffs(inputdir):
    dates = pd.read_excel(join(inputdir, 'Dates.xlsx'))
    dates['Subject'] = dates.apply(lambda x: stripspaces(x, 'ID'), axis=1)
    dates.set_index('Subject', inplace=True)

    intervals = [1, 2, 4, 5]
    for i in intervals:
        cols = [
            c for c in dates.columns.tolist()
            if bool(re.search('^{} month'.format(str(i)), c))
        ]
        dates[str(i) + ' month assessment'] = dates[cols].values.max(axis=1)

    assessments = ['Assessment B', 'Training start date'] + [
        str(i) + ' month assessment' for i in list(range(1, 7)) + [9, 12]
    ]
    dates[assessments].to_csv(join(inputdir, 'dates_cutoff.csv'))
Пример #7
0
    def __init__(self, *args, **kwargs):
        DataParser.__init__(self, *args)
        #Maybe empty sheet
        if self.data.empty or len(self.data.columns) <= 1:
            msg = "No data available"
            raise ValueError(msg)
        # cleanup subjects
        self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)

        if self.info is None:
            self.info = {'prefix': 'INS', 'xsitype': 'opex:insomnia'}
        # Replace field headers
        self.fieldmap = {
            'q1': 'Q1',
            'q2': 'Q2',
            'q3': 'Q3',
            'q4': 'Q4',
            'q5': 'Q5',
            'q6': 'Q6',
            'q7': 'Q7',
            'total': 'TotalScore'
        }
        cols = [
            'ID', self.fieldmap['q1'], self.fieldmap['q2'],
            self.fieldmap['q3'], self.fieldmap['q4'], self.fieldmap['q5'],
            self.fieldmap['q6'], self.fieldmap['q7'], self.fieldmap['total']
        ]
        # self.fieldmap = {'total': 'TotalScore'}
        self.fields = list(self.fieldmap.keys())
        ncols = ['SubjectID'] + self.fields
        # cols = ['ID', self.fieldmap['total']]
        # zeros have been entered when should be blank
        self.data[self.fieldmap['total']] = self.data.apply(
            lambda x: self.nodatarow(x, self.fieldmap['total']), axis=1)
        self.data[cols[1:]] = self.data.apply(
            lambda x: self.nodatarow(x, cols[1:]), axis=1)
        df = self.data[cols]
        df = df.astype(object)  # convert to object
        df.columns = ncols
        df.reindex()

        self.data = df
        # sort subjects
        self.sortSubjects('SubjectID')
        print('Data load complete')
Пример #8
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)
        # cleanup subjects
        self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)
        # self.data['ID'] = self.data['ID'].apply(lambda x: getID(x))

        if self.info is None:
            self.info = {'prefix': 'GDN', 'xsitype': 'opex:paces'}
        # Replace field headers
        self.fields = ['q' + str(i)
                       for i in range(1, 9)] + ['total', 'enjoy_percent']
        columns = [
            'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'SumTotal',
            '%Enjoyment'
        ]
        renamecols = dict(list(zip(columns, self.fields)))

        self.data.rename(columns=renamecols, inplace=True)

        self.sortSubjects('ID')
        print('Data load complete')
Пример #9
0
 def __init__(self, *args):
     DataParser.__init__(self, *args)
     # Maybe empty sheet
     if self.data.empty or len(self.data.columns) <= 1:
         msg = "No data available"
         raise ValueError(msg)
     # cleanup subjects
     self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)
     # self.data['ID'] = self.data['ID'].apply(lambda x: getID(x))
     if self.info is None:
         self.info = {'prefix': 'PSQ', 'xsitype': 'opex:psqi'}
     # Replace field headers
     self.fields = ['c'+str(i) for i in range(1,8)] + ['total']
     ncols = ['SubjectID'] + self.fields
     cols = ['ID'] + [c for c in self.data.columns if (isinstance(c,str) or isinstance(c,str)) and c.startswith('Component')] + ['total']
     df = self.data[cols]
     df.columns = ncols
     df.reindex()
     self.data = df
     # sort subjects
     self.sortSubjects('SubjectID')
     print('Data load complete')