def ReadVariables(): """Reads Stata dictionary files for NSFG data. returns: DataFrame that maps variables names to descriptions """ vars1 = thinkstats2.ReadStataDct('2002FemPreg.dct').variables vars2 = thinkstats2.ReadStataDct('2002FemResp.dct').variables all_vars = vars1.append(vars2) all_vars.index = all_vars.name return all_vars
def read_resp_file(dct_file="2002FemResp.dct", dat_file="2002FemResp.dat.gz"): # Using similar code as the ReadFemPreg from the textbook. Page 5 of the PDF version. dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip') # Leaving out CleanFemPreg(df) as this doesn't seem to actually do anything. # The solution also just has 'pass' which indicates this function is incomplete/does not do anything. return df
def read_2002_fem_resp(stata_file='2002FemResp.dct', data_file='2002FemResp.dat.gz'): fixed_width_object = thinkstats2.ReadStataDct(stata_file) actual_data = fixed_width_object.ReadFixedWidth(data_file, compression='gzip', nrows=None) return actual_data
def ReadFemResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz', nrows=None): dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows) CleanFemResp(df) return df
def readFemaleResponses(dictionaryFile='./data/2002FemResp.dct', dataFile='./data/2002FemResp.dat.gz', nrows=None): dictionary = thinkstats2.ReadStataDct(dictionaryFile) femResp = dictionary.ReadFixedWidth(dataFile, compression='gzip', nrows=nrows) return femResp
def ReadFemResp( dct_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemResp.dct', dat_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemResp.dat.gz', nrows=None): dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows) nsfg.CleanFemResp(df) return df
def ReadFemResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz', nrows=None): #reads dct_file dct = thinkstats2.ReadStataDct(dct_file) #creates a data frame df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows) # returns the data frame return df
def readfunc(): dct_file = '2002FemResp.dct' dat_file = '2002FemResp.dat.gz' dct = thinkstats2.ReadStataDct(dct_file) dataframe = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=None) print(dataframe['pregnum'].value_counts().sort_index()) print(len(dataframe))
def ReadMale2002(dct_file='2002Male.dct', dat_file='2002Male.dat.gz'): """Reads male data from NSFG Cycle 6. returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1') male = dct.ReadFixedWidth(dat_file, compression='gzip') CleanResp(male) return male
def ReadGss(dirname): """Reads GSS files from the given directory. dirname: string returns: DataFrame """ dct = thinkstats2.ReadStataDct(dirname + '/GSS.dct') gss = dct.ReadFixedWidth(dirname + '/GSS.dat') return gss
def ReadFemPreg(dct_file='data/2011_2013_FemPregSetup.dct', dat_file='data/2011_2013_FemPregData.dat.gz'): """Reads the NSFG pregnancy data. dct_file: string file name dat_file: string file name returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1') df = dct.ReadFixedWidth(dat_file, compression='gzip') CleanFemPreg(df) return df
def ReadResp(dct_file, dat_file, **options): """Reads the NSFG respondent data. dct_file: string file name dat_file: string file name returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1') df = dct.ReadFixedWidth(dat_file, compression='gzip', **options) return df
def ReadFemResp2002(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz'): """Reads respondent data from NSFG Cycle 6. returns: DataFrame """ usecols = ['caseid', 'cmbirth', 'cmintvw', 'finalwgt', 'age_r'] dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1') resp = dct.ReadFixedWidth(dat_file, compression='gzip', usecols=usecols) CleanResp(resp) return resp
def ReadFemResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz'): """ This function read female pregnant information :param dct_file: the format of the file documented in 2002FemPreg.dct :param dat_file: a gzip-compressed data file in plain text, with fixed width columns :return: a dataframe """ dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip') CleanFemResp(df) return df
def ReadFemResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz', nrows=None): ''' (str file name, str file name) -> DataFrame Reads the NSFG respondents data file and returns a DataFrame. ''' dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows) CleanFemResp(df) return df
def ReadFemPreg(dct_file, dat_file): """Reads the NSFG 2006-2010 pregnancy data. dct_file: string file name dat_file: string file name returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1') df = dct.ReadFixedWidth(dat_file, compression='gzip') CleanFemPreg(df) return df
def read_fem_resp(datfile='2002FemResp.dat.gz', dctfile='2002FemResp.dct'): """ Reads the respondent file filename and returns the DataFrame :type datfile: str :type dctfile: str :rtype: pd.DataFrame """ dct = thinkstats2.ReadStataDct(dctfile) df = dct.ReadFixedWidth(datfile, compression='gzip') clean_fem_resp(df) return df
def ReadMale2010(dct_file='2006_2010_MaleSetup.dct', dat_file='2006_2010_Male.dat.gz'): """Reads male data from NSFG Cycle 7. returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1') male = dct.ReadFixedWidth(dat_file, compression='gzip') male['finalwgt'] = male.wgtq1q16 return male
def ReadFemResp(dct_file, dat_file, nrows=None): """Reads the NSFG respondent data. dct_file: string file name dat_file: string file name returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows) CleanFemResp(df) return df
def ReadFemPreg(dct_file='2002FemPreg.dct', dat_file='2002FemPreg.dat.gz'): """Reads the NSFG pregnancy data. dct_file: string file name dat_file: string file name returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip') CleanFemPreg(df) return df
def ReadFemPreg(dct_file='2002FemPreg.dct', dat_file='2002FemPreg.dat.gz', nrows=None): """Reads the NSFG respondent data. dct_file: string file name dat_file: string file name returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows) return df
def ReadFemPreg(dct_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemPreg.dct', dat_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemPreg.dat.gz'): """Reads the NSFG pregnancy data. dct_file: string file name dat_file: string file name returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip') CleanFemPreg(df) return df
def ReadFemResp(file1, file2, nrows=None): """ This function takes the dct and dat files (NSFG respondent data), reads them and returns a dataframe of the data. :param file1: (str) dct filename :param file2: (str) dat filename :param nrows: option :return: df - dataframe of NSFG respondent data """ dct = thinkstats2.ReadStataDct(file1) df = dct.ReadFixedWidth(file2, compression="gzip", nrows=nrows) return df
def readNplotNUMKDHH(dctfile='2002FemResp.dct', datfile='2002FemResp.dat.gz', compression='gzip'): """ read data to fram and plot NUMKDHH PMF""" dct = thinkstats2.ReadStataDct(dctfile) df = dct.ReadFixedWidth(datfile, compression=compression) pmf = thinkstats2.Pmf(df.numkdhh) thinkplot.Pmf(pmf, label='numkdhh') pmf2 = BiasedPmf(pmf) thinkplot.Pmf(pmf2, label='biased') thinkplot.show()
def ReadFemResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz', nrows=None): """ Read the FemRecp data file. dct_file: string file name dat_file: string file name returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows) CleanFemResp(df) return df
def ReadFemResp(dct_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemResp.dct', dat_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemResp.dat.gz', nrows=None): """Reads the NSFG respondent data. dct_file: string file name dat_file: string file name returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows) CleanFemResp(df) return df
def ReadFemResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz', **options): """Reads the NSFG respondent data. dct_file: string file name dat_file: string file name returns: DataFrame """ dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip', **options) CleanData(df) return df
def ReadFemResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz'): dct = thinkstats2.ReadStataDct(dct_file) df = dct.ReadFixedWidth(dat_file, compression='gzip') return df
def testReadStataDct(self): dct = thinkstats2.ReadStataDct('2002FemPreg.dct') self.assertEqual(len(dct.variables), 243) self.assertEqual(len(dct.colspecs), 243) self.assertEqual(len(dct.names), 243) self.assertEqual(dct.colspecs[-1][1], -1)
def readResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz'): resp_dict = thinkstats2.ReadStataDct(dct_file) resp_df = resp_dict.ReadFixedWidth(dat_file, compression='gzip') return resp_df