示例#1
0
def read_archive(input_archive, archive_type, mode, sample2cat, input_dir):
    """read archive"""
    if not os.path.exists(input_dir):
        os.mkdir(input_dir)
    if archive_type == "zip":
        archive = zipfile.open(input_archive)
        namelist = archive.namelist()
    if archive_type == "tar.gz":
        archive = tarfile.open(input_archive, "r:gz")
        namelist = archive.getnames()
    sample_file_names, sample_names = get_sample_names(namelist)
    for tf, sfn in zip(namelist, sample_file_names):
        extracted = archive.extractfile(tf)
        with open("%s/%s" % (input_dir, sfn), 'w') as sample_file_out:
            for line in extracted:
                sample_file_out.write(line)
        extracted.close()

    #create sample table
    if sample2cat is not None:
        sample_cat = pd.read_cvs(sample2cat, index_col=0, sep="\t")
        #replace index with cleaned file names
        sample_cat.index.rename(
            str,
            dict([(tf, sfn) for sfn, tf in zip(sample_file_names, namelist)]))
        sample_table = pd.DataFrame(
            [sample_file_names, sample_cat.loc[sample_file_names, ]])
        sample_table.columns = ["sample_file_name", "category"]
    else:
        sample_table = pd.DataFrame(sample_file_names)
        sample_table.columns = ["sample_file_name"]
    sample_table.index = sample_names
    sample_table.index.name = "sample_name"
    sample_table.to_csv("%s/sample_table.txt" % input_dir, sep="\t")
示例#2
0
def readEdgeList(filename):
	edgelist = pd.read_csv(filename)
	if len(edgelist.columns) != 2 :
		return "Edge list should have 2 columns"
		edgelist = pd.read_cvs(filename, usecols = [0,1])
		dataframe = pd.DataFrame(edgelist)
	else:
		dataframe = pd.DataFrame(edgelist)
	return dataframe 
示例#3
0
import pandas as pd

print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print("")
print("=======================")
print("== Running importcsv ==")
print("=======================")
print("")

fields=['ID','Name','Payment Type','Amount','Pay To','Date','Details']

print("---- Reading from Payments.cvs ----")
paymentsdf=pd.read_cvs('Payments.cvs',skipinitialspace=True,usecols=fields,encoding="ISO-8859-1")

print(paymentsdf)
import pandas as pd
import pandas_datareader.data as web
import datetime as dt

start = dt.datetime(2017, 1, 1)
end = dt.date.today()

apple_stock = web.DataReader('AAPL', 'yahoo', start, end)

# Download sp500 stock
ticker = []
data = pd.read_cvs('SP500.csv', header=None)

for item in data[0]:  # column 0 of data
    ticker.append(item)

for name in ticker:
    vars()[name] = web.DataReader(name, 'yahoo', start, end)
示例#5
0
import pandas as pd
from Bio.Blast.Applications import NcbiblastxCommandline
seq_desconhecida = input("sequencia desconhecida:")
seq_proteinas = input("proteina Trypanosoma cruzi:")
blast_x = "/Users/Juliana/Documents/Juliana/programacao/blastx.exe"
arquivo_blast = r"/Users/Juliana/Documents/Juliana/programacao/Arquivo Blasta.TAC3.txt"
comparacao = NcbiblastxCommandline(cmd = blast_x ,query = seq_desconhecida, subject = seq_proteinas, evalue = 0.05, outfmt = 6, out = arquivo_blast)
stdout, stdeer = comparacao()
blast_resultado = pd.read_cvs("/Users/Juliana/Documents/Juliana/programacao/Arquivo Blasta.TAC3.txt",)
maximo = blast_resultado.sort_values("Bitscore")
print(maximo.iloc[[-1]])

示例#6
0
#################################################################
#IO_ext Ej_08;
print("Ingreso lectura de bloque de datos con metodo WITH  desde otro archivo JavaScript_Object_Notation read - rw");
with open("JavaScript_Object_Notation_with.json","r") as read_file:# abre el archivo JavaScript_Object_Notation para lectura en bloque
	json_en_memoria2 = json.load(read_file);
print(json_en_memoria2);
nuevo(13);
#################################################################

#################################################################
#IO_ext Ej_08;
####################                  CSV(texto separado por comas)
print("""
╔═════════════════════════════════════════════════════════════════════════════╗ 
║                                                                             ║
║                                       CVS                                   ║
║                                                                             ║
╚═════════════════════════════════════════════════════════════════════════════╝
""");
import pandas as pd
ejemplo_desde_plan_calculo.csv


#################################################################
#IO_ext Ej_08;
print("Ingreso lectura de bloque de datos desde otro archivo CSV read - rw");
archivo_de_csv=pd.read_cvs("ejemplo_desde_plan_calculo.csv",header=0)#		abre el archivo JavaScript_Object_Notation para lectura en bloque
print(archivo_de_csv);
print(Dato_3);
nuevo(14,"fin");
示例#7
0
文件: my_nlp.py 项目: Black751/nlp
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer, TfidVectorizer
from sklearn.model_selection import train_test_split, KFold
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
import matplotlib
from matplotlib import pyplot as plt 

%matplotlib inline
%config InLineBackend.figure_format = retina


data = pd.read_cvs("../input/train.csv")
示例#8
0
def append_stock_cvs(file_name, dataframe):
	df = pd.read_cvs(file_name)
	df.append(dataframe)
	df.to_csv('stock_info.csv')
示例#9
0
import pandas
from pandas.tools.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression

url = "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_cvs(url, names=names)

print(dataset.shape)
print("\n")
print(dataset.describe())
示例#10
0
    header_dic = {}
    for header in unique_header:
        r = re.compile(header + ".+")
        header_match = list(filter(r.match, table_headers))
        header_dic[header] = header_match
    data_frame_dic = {}
    for header in header_dic:
        data_frame_list = []
        for file in header_dic[header]:
            data_frame_list.append(master_index[file])
        data_frame_dic[header] = data_frame_list
    for header in data_frame_dic:
        combine_frame = pd.concat(data_frame_dic[header])
        print combine_frame
        #combine_frame= combine_frame.fillna(0)
        combine_frame.to_csv("combined_data/" + str(header) + ".csv",
                             index=False)


combiner()
path = os.path.expanduser("~/Documents/senior_project/providers.txt")
for f in os.listdir(path):
    if os.stat(f).st_mtime < now - 7 * 86400:
        if os.path.isfile(f):
            os.remove(os.path.join(path, f))
down_detector = pd.read_csv("downdetector.csv")
internet_traffic_report = pd.read_csv("internettrafficreport.csv")
is_it_down_right_now = pd.read_csv("isitdownrightnow.csv")
is_the_service_down = pd.read_csv("istheservicedown.cvs")
outage_report = pd.read_cvs("outage.csv")
示例#11
0
from keras.wrappers.scikit_learn import KerasClassifier  #
from sklearn.model_selection import cross_val_score  #train test şekilde parçalayıp accury değeri üretiyor
from keras.models import Sequential  # yapay sinir Ağ oluşturmak için gerekli
from keras.layers import Dense, Input, Dropout, Activation  # katmanları inşa etmemizi sağlayan yapı
import keras
from keras.optimizers import SGD
from sklearn.impute import SimpleImputer

import pandas as pd
import numpy as np

veri = pd.read_cvs("kanserTespiti.data")
veri.replace('?', -9999, inplace="true")
veriYeni = veri.drop(["1000025"], axis=1)
imp = SimpleImputer(missing_values=-9999, strategy="mean")
veriYeni = imp.fit_transform(veriYeni)

giris = veriYeni[:, 0:8]
cikis = veriYeni[:, 9]
model = Sequential()
model.add(Dense(64, input_dim=8))
model.add(Activation("relu"))
model.add(Dense(64))
model.add(Activation("relu"))
model.add(Dense(32))
model.add(Activation("softmax"))

model.compile(optimizer="adam",
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.fit(giris, cikis, epochs=5, batch_size=32, validation_split=0.13)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df=pd.read_cvs('Realestate.csv')
df.head()



#train data destribution
plt.scatter(train.X1 transaction date,train.Y house price of unit area,color='blue')
plt.xlabel("X1 transaction date")
plt.ylabel("Y house price of unit area")
plt.show()

#Modeling
#using sklearn package to model data
from sklearn import linear_model
regr = linear_model.LinearRegression()
train_x = np.asanarrray(train[['X1 transaction date']])
train_y = np.asanarray(train[['Y house price of unit area']])
regr.fit(train_x,train_y)

#Plot output
plt.scatter(train.X1 transaction date,train.Y house price of unit area,color='blue')
plt.xlabel("X1 transaction date")
plt.ylabel("Y house price of unit area")
plt.show()
示例#13
0
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go

# Load CVS file from the Datasets folder
df = pd.read_cvs('../Datasets/CoronaTimeSeries.cvs')
df['Date'] = pd.to_datetime(df['Date'])

# Preparing data
data = [go.Scatter(x=df['Date'], y=df['Confirmed'], mode='lines', name='Death')]

# Preparing layout
layout = go.Layout(title='Corona Virus Confirmed Cases From 2020-01-22 to 2020-03-17',
                   xaxis_title="Date", yaxis_title="Number of cases")

# Plot the figure and saving in a html file
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='linechart.html')
示例#14
0
#install tensorflow
pip install tensorflow

#import required libraries 

import tensorflow as tf
from tensorflow import keras 
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn import preprocessing 

# Obtain data from kaggle of pretrained pekon "pikachu" fro prediction of our model
#read the cvs file using the pandas
df = pd.read_cvs('C:\Users\Owe\Desktop\tf\pokemon')
#check for boolean  0/1 for the legendary player 
df['isLegendary'] = df['isLegendary'].astype(int)

#create dummy variables to hold the attacks techniques such as water n grass etc
#pd.get_dummies creates a new dataFrame from the categories
#the dummy variables makes sure we don't alloct intergers not relevant to the game properties
def dummy_creation(df, dummy_categories):
    for i in dummy_categories:
        df_dummy = pd.get_dummies(df[i])
        df = pd.concat([df,df_dummy],axis=1)
        df = df.drop(i, axis=1)
    return(df)
df = dummy_creation(df, ['Egg_Group_1', 'Body_Style', 'Color','Type_1', 'Type_2'])

#Upnext Splitting and normalizing data
#we will split our data into training and testing data, Let's use pekomen generation of
示例#15
0
from flask import Flask
import pandas as pd

app = Flask(__name__)
dataset = pd.read_cvs('cardio_train.cvs')


@app.route("/", methods=['POST'])
def predictRisks():
    return render_template('insurer_profile.html', predictiontext="hello{}")


if __name__ == "__main__":
    app.run()
示例#16
0
文件: PCA.py 项目: hweyhsic/CS-199
import pandas as pd
url = "https://archive.ics.uci.edu/ml/machine-learning-database/iris/iris.data"

#load dataset into Pandas DataFrame
df = pd.read_cvs(url,
                 names=[
                     'sepal length', 'sepal width', 'petal length',
                     'petal width', 'target'
                 ])

#Standardize the Data
from sklearn.preprocessing import StandardScaler

features = [
    'sepal length', 'sepal width', 'petal length', 'petal width', 'target'
]

#Separating out features
x = df.loc[:, features].values
#Separating out the target
y = df.loc[:, ['target']].values
#Standardizing the features
x = StandardScaler().fit_transformation(x)

#See Jupyter notebook
示例#17
0
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 10 14:02:57 2018

@author: 김아람
"""

import pandas as pd

train = pd.read_cvs('input/train.cvs')
test = pd.read_csv('input/test.cvs')

train.head()
train.info()
import pandas as pd

#read csv file
df = pd.read_cvs('example.csv')

df.to_csv('My_output', index=False)
pd.read_cvs('My_output')

#read excel
pd.read_excel('Excel_Sample.xlsx', sheet_name='Sheet1')

#save excel file
df.to_excel('Excel_Sample2.xlsx', sheet_name='NewSheet')

#read html
data = pd.read_html('http://www.fdic.gov/bank/individual/failed/banklist.html')

#work with SQL
from sqlalchemy import create_engine
engine = create_engine('sqlite:///:memory:')
df.to_sql('my_table', engine)
sqldf = pd.read_sql('my_table', con=engine)
# Import data from .csv file
fname = os.path.join("earthpy-downloads",
                     "avg-precip-months-seasons.csv")

avg_monthly_precip = pd.read_csv(fname)

avg_monthly_precip
#%%
filename = 'streamflow_week4.txt'
filepath = os.path.join('../data',filename)


print(os.getcwd())
print(filename)

avg_monthly_precip = pd.read_cvs(filepath)
# %%
data.columns
#All of the data. Not most helpful, only shows 10 rows (first 5 last 5)
#Does include number or rows and columns though
print (data)
#The first 5 rows
data.head()
#Last five rows
data.tail()
#Super helpful! Tells us what each column is made up of

data.info()

#normal, tells us the dimensions of array. 
data.shape