Exemplo n.º 1
0
def load():
	df = pd.read_csv(os.path.join(basedir, 'Businesses_Registered_in_San_Francisco_-_Active.csv'))
	df.rename(columns = lambda x: re.sub(' ', '_', x), inplace = True)
	df['Class_Code'] = np.int32(df.Class_Code)
	df['PBC_Code'] = np.int32(df.PBC_Code)

	'''Convert lat, long from string to float'''
	p = re.compile('\d*\.\d*')
	LocX = []; LocY = []; ix = []
	for i, j in df.iterrows():
		if pd.notnull(j['Location']):
			x, y = np.float32(p.findall(j['Location']))
			LocX.append(x); LocY.append(y)
			ix.append(i)
	df['LocX'] = pd.Series(LocX, index = ix)
	df['LocY'] = pd.Series(LocY, index = ix)

	'''Add description field from PBC code descriptions'''
	pbc2descr = dict(np.loadtxt('pbc_codes.csv', 'S', delimiter = ','))
	Descr = []
	for i in df.PBC_Code:
		try:
			Descr.append(pbc2descr[str(i)])
		except:
			Descr.append('UNKNOWN')
	df['Descript'] = Descr

	'''Add founded date from str to datetime'''
	tmp = [(i, misc.str2date(str(j['DBA_Start_Date']), delimiter = '', format = 'YYYYMMDD')) for i, j in df.iterrows()]
	ix, dates = zip(*tmp)
	df['Founded'] = pd.Series(dates, index = ix)

	return df
Exemplo n.º 2
0
Arquivo: PPI.py Projeto: r-b-g-b/Lab
def fileconvert_all(studydir):
    '''
    Converts all of the text file outputs in */Gap/data and converts them to pandas formatted hdf5 files.
    Stores the results in */Gap/fileconversion
    '''

    # dobs = pd.read_csv(os.path.join(studydir, 'dobs.csv'))
    pairpulsedir = os.path.join(studydir, 'pairpulse')
    if not os.path.exists(pairpulsedir):
        os.mkdir(pairpulsedir)

    animalpaths = glob.glob(os.path.join(studydir, 'data', 'PPI', '[0-9]*'))
    for animalpath in animalpaths:
        fpaths = glob.glob(os.path.join(animalpath, '[A-Za-z]*.txt'))
        for fpath in fpaths:

            absol, relat = os.path.split(fpath)
            animalID, gen, condition, mo, da, yr, _ = relat.split('_')

            animalinfo = get_animalinfo(animalID, studydir)
            dob_str = animalinfo.DOB.values[0]

            dob = misc.str2date(dob_str, delimiter = '/', format = 'MMDDYYYY')

            if hasattr(animalinfo, 'date1'):
                date1_str = animalinfo.date1.values[0]
                date1 = misc.str2date(date1_str, delimiter = '/', format = 'MMDDYYYY')

            mo = '%2.2u' % int(mo)
            da = '%2.2u' % int(da)
            yr = '%4.4u' % int(yr)

            sess_str = '_'.join((yr, mo, da))
            sess_date = misc.str2date(sess_str, delimiter = '_', format = 'YYYYMMDD')
            age = (sess_date - dob).days
            if hasattr(animalinfo, 'date1'):
                postdate1 = (sess_date - date1).days
            outpath = os.path.join(pairpulsedir, '%s.csv' % '_'.join((animalID, gen, condition, yr, mo, da)))

            if not os.path.exists(outpath):
                gapratio = fileconvert(fpath)
                if hasattr(animalinfo, 'date1'):
                    df = pd.DataFrame(dict(gapratio = gapratio, animalID = animalID, gen = gen, condition = condition, sess = sess_str, age = age, postdate1 = postdate1))
                else:
                    df = pd.DataFrame(dict(gapratio = gapratio, animalID = animalID, gen = gen, condition = condition, sess = sess_str, age = age))
                df.to_csv(outpath)
Exemplo n.º 3
0
Arquivo: Gap.py Projeto: r-b-g-b/Lab
def fileconvert_processed(studydir):
    
    freqs = [5000, 7071, 10000, 14142, 20000, 28284]
    gapdetectiondir = os.path.join(studydir, 'gapdetection')
    if not os.path.exists(gapdetectiondir):
        os.mkdir(gapdetectiondir)

    cagepaths = glob.glob(os.path.join(studydir, 'data', 'Gap', '[0-9]*'))

    dobs = pd.read_csv(os.path.join(studydir, 'dobs.csv'))
    for cagepath in cagepaths:

        absol, cageID = os.path.split(cagepath)
        animalIDs = [i for i in dobs.animalID if cageID in i]
        fpaths = glob.glob(os.path.join(cagepath, '*.txt'))
        
        for fpath in fpaths:

            df = pd.read_csv(fpath, usecols=range(6), sep='\t', header=None, nrows=len(animalIDs))
            df.index = animalIDs

            absol, relat = os.path.split(fpath)
            if relat.startswith('_'):
                _, gen, condition, mo, da, yr, _ = relat[1:].split('_')
            else:
                _, gen, condition, mo, da, yr, _ = relat.split('_')

            for animalID in animalIDs:

                newrelat = '%s.csv' % '_'.join((animalID, gen, condition, yr, mo, da))
                if relat.startswith('_'):
                    newrelat = '_' + newrelat
                outpath = os.path.join(studydir, 'gapdetection', newrelat)

                # skip if the file already exists
                if os.path.exists(outpath): continue

                gapratio = df.ix[animalID].values

                # start building the dataframe dict
                d = OrderedDict()
                # add required fields (gapratio, animalID, genotype, condition, session date, age)
                d.update(dict(freq=freqs, gapratio=gapratio, animalID=animalID, gen=gen, condition=condition))

                # load the animal DOB, group, etc. from the dobs.csv file
                animalinfo = get_animalinfo(animalID, studydir)

                # get the date of birth
                dob_str = animalinfo.DOB.values[0]
                dob = misc.str2date(dob_str, delimiter = '/', format = 'MMDDYYYY')
                animalinfo['DOB'] = dob

                # how old was the animal when this session was run?
                sess_str = '_'.join((yr, mo, da))
                sess_date = misc.str2date(sess_str, delimiter = '_', format = 'YYYYMMDD')
                age = (sess_date - dob).days

                d.update(dict(sess = sess_date, age = age))

                # how many days was this session from each "date" column in the animalinfo?
                dateinfo = animalinfo.filter(regex='date*')
                d_postdate = OrderedDict()
                for key, value in dateinfo.iteritems():
                    date = misc.str2date(value.values[0], delimiter='/', format='MMDDYYYY')
                    d_postdate.update({'post'+key: (sess_date-date).days})

                d.update(d_postdate)

                # add all supplementary animalinfo fields
                for key, value in animalinfo.iteritems():
                    d.update({key: value.values[0]})

                pd.DataFrame(d).to_csv(outpath)
Exemplo n.º 4
0
Arquivo: Gap.py Projeto: r-b-g-b/Lab
def fileconvert_all(studydir):
    '''
    Converts all of the text file outputs in */Gap/data and converts them to pandas formatted hdf5 files.
    Stores the results in */Gap/fileconversion
    '''

    gapdetectiondir = os.path.join(studydir, 'gapdetection')
    if not os.path.exists(gapdetectiondir):
        os.mkdir(gapdetectiondir)

    # loop through all animals
    animalpaths = glob.glob(os.path.join(studydir, 'data', 'Gap', '[0-9]*'))
    for animalpath in animalpaths:
        fpaths = glob.glob(os.path.join(animalpath, '*.txt'))
        
        for fpath in fpaths:

            absol, relat = os.path.split(fpath)
            if relat.startswith('_'):
                animalID, gen, condition, mo, da, yr, _ = relat[1:].split('_')
            else:
                animalID, gen, condition, mo, da, yr, _ = relat.split('_')

            newrelat = '%s.csv' % '_'.join((animalID, gen, condition, yr, mo, da))
            if relat.startswith('_'):
                newrelat = '_' + newrelat
            outpath = os.path.join(studydir, 'gapdetection', newrelat)

            # skip if the file already exists
            if os.path.exists(outpath): continue

            # calculate gapratio
            df = fileconvert(fpath)

            df['animalID'] = animalID
            df['gen'] = gen
            df['condition'] = condition

            # load the animal DOB, group, etc. from the dobs.csv file
            animalinfo = get_animalinfo(animalID, studydir)

            # get the date of birth
            dob_str = animalinfo.DOB.values[0]
            dob = misc.str2date(dob_str, delimiter = '/', format = 'MMDDYYYY')
            animalinfo['DOB'] = dob

            # how old was the animal when this session was run?
            sess_str = '_'.join((yr, mo, da))
            sess_date = misc.str2date(sess_str, delimiter = '_', format = 'YYYYMMDD')
            age = (sess_date - dob).days

            df['sess'] = sess_date
            df['age'] = age
            # d.update(dict(sess = sess_date, age = age))


            # how many days was this session from each "date" column in the animalinfo?
            dateinfo = animalinfo.filter(regex='date*')
            d_postdate = OrderedDict()
            for key, value in dateinfo.iteritems():
                date = misc.str2date(value.values[0], delimiter='/', format='MMDDYYYY')
                df['post'+key] = (sess_date-date).days

            # add all supplementary animalinfo fields
            for key, value in animalinfo.iteritems():
                df[key] = value.values[0]

            df.to_csv(outpath)