示例#1
0
def begin_session(path_to_csv, baseline, mean_fxn, var_fxn):
    if (os.path.isfile(path_to_csv)):
        history_df = pd.read_csv(path_to_csv)
        nr = history_df.shape[0]
    else:
        nr = 0

    mu = mean_fxn(nr)
    sigma = var_fxn(nr, baseline)

    #sample from dist
    mins = baseline + max(-baseline + 1, np.random.normal(mu, sigma))

    #Session
    os.system("say Begin Practice")
    print(mins)
    time.sleep(60 * mins)
    os.system("say End Practice")

    #record stages
    stage = input(
        "Which stage of meditation did you enter during this practice at its acme?"
    )
    date = input("What is the date?")

    newrow = {"Date": [date], "Stage": [stage], "Duration": [mins]}
    if (nr == 0):
        history_df = newrow
    else:
        history_df = history_df.append(newrow)

    pd.write_csv(history_df, path_to_csv)
    return
def get_target_annotations(pset, annot_dir):
    """
    Annotate a the 'TARGET' in the 'drug' slot of a PSet object using mapping from the UniProt idenitifer
    mapping tool API.

    :param pset:
    :param annot_dir:
    :return:
    """
    # Read in drug target annotations and gene annotations
    drug_targets = pd.read_csv(
        os.path.join(annot_dir, 'drugbank_drug_targets_all.csv'))
    rnaseq_df = pset.get("molecularProfiles").get(
        "Kallisto_0.46.1.rnaseq").get("elementMetadata")

    # Map genes to drugbank drug ids
    genes_to_drugs = pd.merge(
        drug_targets.loc[:, ['Name', 'Gene Name', 'Drug IDs']],
        rnaseq_df.loc[:, ['gene_name', 'gene_id']],
        left_on='Gene Name',
        right_on='gene_name')

    # Annotate the genes

    # Expand list columns into rows and annotate drugs
    genes_to_drugs['Drug IDs'] = [
        str.split(ids, '; ') for ids in genes_to_drugs['Drug IDs'].values
    ]
    genes_to_drugs = genes_to_drugs.explode('Drug IDs')

    # Write to disk if necessary.
    file_path = os.path.join(annot_dir, 'drugbank_drug_to_gene_mappings.csv')
    if not os.isfile(file_path):
        pd.write_csv(genes_to_drugs, file_path)
    pass
示例#3
0
def wide_to_long_county():
    df = pd.read_csv("~/Documents/GitHub/lqycovid/data-scripts/_1p3a/_working/testing_county_raw.csv")
    df.set_index(['GEOID', 'criteria'], inplace=True)
    df = df.stack(level = 0).reset_index(level = 1, drop = False).reset_index()
    df['level_1'] = df.level_1.astype(str)
    df.rename(columns={'level_1':'date', 0:'testing'}, inplace = True)
    df['date'] =  pd.to_datetime(df['date'], format = '%Y-%m-%d')
    df['date'] = df.date.astype(str)
    pd.write_csv(df, "testing_county_raw.csv")
    
    
    dp = pd.read_csv("~/Documents/GitHub/lqycovid/data-scripts/_1p3a/_working/county_positivity_raw.csv")
    dp.set_index(['GEOID', 'criteria'], inplace=True)
    dp = dp.stack(level = 0).reset_index(level = 1, drop = False).reset_index()
    dp['level_1'] = dp.level_1.astype(str)
    dp.rename(columns={'level_1':'date', 0:'positivity'}, inplace = True)
    dp['date'] =  pd.to_datetime(dp['date'], format = '%Y-%m-%d')
    dp['date'] = dp.date.astype(str)
    pd.write_csv(dp, "county_positivity_raw.csv")
示例#4
0
class MidTerm(self):

	def __init__(self):

		# Form training set
		ts = pd.read_csv('training_set.csv')#self.form_training_set()

		# Upload to AWS S3 bucket
    	training_set.csv = pd.write_csv(ts)
    	self.upload(training_set.csv, sys.argv[1], sys.argv[2])

		# Data cleaning
		ts = self.clean(ts)
示例#5
0
def ReadSQLServertoFileStore(file, schema, tableName, engine):
    df = pd.read_sql(tableName, schema=schema, con=engine)

    df = pd.write_csv(file)
    return df
示例#6
0
# nltk.download('wordnet')
# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')
from nltk.stem import WordNetLemmatizer
xDIR = 'C:/Users/V574361/Documents/bigdata/files'

#def tdm():
# Create some very short sample documents
# Initialize class to create term-document matrix
tdm = textmining.TermDocumentMatrix()
# Add the documents
for f in os.listdir(xDIR):
    #print(f)
    tdm.add_doc(open(os.path.join(xDIR,f), encoding="utf8").read())
#for row in tdm.rows(cutoff=1):
#    print(row)
#print(tdm)
#    return tdmm
#x = tdm()

tdm.write_csv(filename="C:/Users/V574361/Documents/bigdata/tdm.test.csv")


import pandas
import pandas as pd
cereal_df = pd.read_csv("C:/Users/V574361/Documents/bigdata/tdm.test.csv")
tdm=cereal_df.T
pd.write_csv(tdm,"C:/Users/V574361/Documents/bigdata/tdm.test1.csv")
tdm.to_csv("C:/Users/V574361/Documents/bigdata/tdm.test1.csv", sep=',', encoding='utf-8')

示例#7
0
#coding:utf-8
#author:L.P 
import pandas as pd

file = pd.read_table("test.txt",names=["ID","string"])
for s in file["string"]:
	file["string"] = 1
	
sub = pd.write_csv("submit.csv",sep=",")
示例#8
0
plantings = np.load('Plantings_NEW.npy')

ids_ordered = np.load('Ecotype_ids_NEW_ORDERED.npy')
ids_unique = np.unique(ids_ordered)
logical = ibs.columns.isin(ids_unique.flatten(
))  #just keep those columns whose ecotypes we have in the dataset
ibs = ibs.loc[logical, logical]

k_matrix = pd.DataFrame(data=None, index=ids_ordered, columns=ids_ordered)

for i in ibs.columns:
    for j in ibs.index:
        a = ibs.loc[j, i]  #get the data value
        k_matrix.loc[j, i] = a

pd.write_csv('k2030_emmaxibsmatrix.csv', k_matrix)
k_np = np.array(k_matrix)
np.save('k2030_emmaxibsmatrix.npy', k_matrix)

#Conforming the EMMAX aBN (Balding-Nichols) matrix
import os
os.chdir(
    "/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/EMMAX"
)
import pandas as pd
ibs = pd.read_csv('k2029_emmax.aBN.csv', sep='\t', header=None)
tfam = pd.read_csv('k2029_emmax.tfam', sep=' ', header=None)
ecotype_ids = tfam.loc[:, 0]
ibs.columns = ecotype_ids
ibs.index = ecotype_ids
def filewrite(file):
    data = pd.write_csv(file)
    return data
示例#10
0
 def write_csv(csv_file):
     return pd.write_csv(csv_file)
示例#11
0
    if 'regeocode' in response.text:
        #address_detail0=re.search('"addressComponent":(.*?)',response.text).group(1)
        address_detail=re.search('"addressComponent":(.*),(.*),(.*),(.*)',response.text).group(1)
        address_dict['city']=json.loads(address_detail)['city']
        address_dict["province"]=json.loads(address_detail)["province"]
        address_dict["district"]=json.loads(address_detail)["district"]
        address_dict["township"]=json.loads(address_detail)["township"]
    else:
        pass
    return address_dict 

data['city']=data.loncationdetail.apply(get_data).apply(lambda x:x['city'])
data['province']=data.loncationdetail.apply(get_data).apply(lambda x:x['province'])
data['district']=data.loncationdetail.apply(get_data).apply(lambda x:x['district'])
data['township']=data.loncationdetail.apply(get_data).apply(lambda x:x['township'])
pd.write_csv()
data.head()
#显示含有空值的列,每一列有多少个缺失值
#data[['city','province','district','township']].isnull().sum()他的结果显示没有空值
#应该是查询含有空列表的数目,data['city']获得序列,data[['city']]获得数据框
#在数据框上使用apply返回每一列或每一行的最后汇总值,在序列上使用apply返回每一个数的执行结果
#(data[['city']].apply(len)==0).sum();Out[256]: 0
#(data['city'].apply(len)==0).sum();Out[257]: 294

np.isnan(data['city']).sum()#返回的是有几个数,不是每个数的长度
for i in ['city','province','district','township']:#查看含有空列表的列
        print(str(i),':',(data[i].apply(lambda x:len(x)==0)).sum())
        
#查看含有city列为空列表的行索引,并且说明缺失是因为直辖市的原因        
pd.Series((data[data.province.str.contains('市')].index)==data[data['city'].apply(lambda x:len(x)==0)].index).value_counts()
#补全city为空列表的值
示例#12
0
    else:
        0.0
prices_shrt['return'] + 1 
prices_shrt.groupby(by='ticker')['return'].prod()

#too long
#EXPLORE OPTION 2, FOR LOOPING EVERY TICKER AND USING SHIFT(1) METHOD!!!!!!!

prices_shrt['return'] = 1.0
%%time
for i in range(0,len(prices_shrt)):
    if prices_shrt['ticker'][i-1] == prices_shrt['ticker'][i]:
        prices_shrt['return'][i] = prices_shrt["adj_close"][i]/prices_shrt["adj_close"][i-1]
    else:
        0.0
pd.write_csv(prices_shrt,"returns.csv")
fif.groupby(by='ticker')['return'].prod()


#http://stackoverflow.com/questions/40273251/pandas-groupby-with-pct-change
fif['return'] = fif.groupby(['ticker', 'year']).pct_change()
prices_shrt['return'] = prices_shrt.groupby(['ticker', 'year']).pct_change()


fuit = df.groupby(['ticker','year','month'],as_index=False).count()
fuit['attributes'] = 'price'
fuit = pd.merge(fuit,avgP, on = ['ticker','year','month'], how='left')
fuit = fuit.drop(['value'],axis=1)
fuit.columns = ['ticker','year','month','attributes','value']
df = df.append(fuit)