Exemplo n.º 1
0
import os
import gc
import myfunc
import re
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from fuzzywuzzy import process

######################################导入源数据
data = pd.read_excel('e:\\jm\\附件1.xlsx')
data.to_csv('e:\\jm\\data1.csv')

data1 = pd.read_csv('e:\\jm\\data1.csv')
data2 = pd.read_sas('e:\\jm\\data.sas7bdat')

myfunc.search('map')
#######################################导入数据字典
lineData = []
with open('e:\\jm\\数据字典.txt') as txtData:
    lines = txtData.readlines()
    for line in lines:
        lineData = lineData + [line.strip()]


def var_dic(n):
    var_lx = lineData[n].replace(')', ')').replace('(',
                                                   '(').replace(' ',
                                                                '').split('、')
    t = [re.split('[()]', x) for x in var_lx]
    var_lx = dict()
    for x in t:
Exemplo n.º 2
0
from queue import Queue
import threading
import os
import datetime
import pandas as pd
import tushare as ts
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy import types
import myfunc
from imp import reload

reload(myfunc)

myfunc.search('strp')

# # 获取所有股票数据,利用股票代码获取复权数据
# stock_basics = ts.get_stock_basics()
# cal_dates = ts.trade_cal()  # 返回交易所日历,类型为DataFrame, calendarDate  isOpen

# stock_basics.to_csv('e:\\data\\stock_basics.csv',encoding='GBK')
# cal_dates.to_csv('e:\\data\\cal_dates.csv',encoding='GBK')
stock_basics = pd.read_csv('e:\\data\\stock_basics.csv', encoding='GBK')
stock_basics.index = stock_basics.code.astype('str').str.zfill(6)
cal_dates = pd.read_csv('e:\\data\\cal_dates.csv', encoding='GBK').iloc[:, -2:]


# 本地实现判断市场开市函数
# date: str类型日期eg.'2017-11-23'
def get_date_list(begin_date, end_date):
    date_list = []
Exemplo n.º 3
0
import tushare as ts
import numpy as np
import pandas as pd
import os
import gc
import myfunc
np.nan<1

os.chdir("e:\\stock")  # 修改当前工作目录
from datetime import datetime, timedelta
import time
help(pd.DataFrame.isnull)
help(pd.DataFrame.count)

myfunc.search('corr')


b'\xe6\xb1\xbd\xe8\xbd\xa6'.decode('utf8')
1319-1491
myfunc.search('eval')
a='search'
b='myfunc'
eval(b).search('s')
a=['a','f']
a.append(['d','d'])
list(['d','f'])
list('f')
a['g']=[3,3]

a=1
# 保存
Exemplo n.º 4
0
import numpy as np
from time import time
import pandas as pd
import math
import datetime
import tushare as ts
from arch import arch_model
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=12)
from urllib.request import urlretrieve
import myfunc
import os
myfunc.search('pytorch')
# 下载光大银行 2017.4.5之前的股价走势图


# import tushare as ts
# df = ts.lpr_data() #取当前年份的数据
# #df = ts.lpr_data(2014) #取2014年的数据
# df.sort('date', ascending=False).head(10)

# 导入转债数据,缺失值填充
file = pd.read_excel(r'e:\bond\2018年上市转债数据.xlsx')
file = file[:-2]
file.iloc[:,8].fillna(70,inplace=True)

###### 无风险利率
shibor = pd.read_csv(r'e:\bond\Shibor1Y.csv',encoding='GBK')
Exemplo n.º 5
0
                nums[i][j] += nums[i-1][j]
            else:
                nums[i][j] += min(nums[i-1][j],nums[i][j-1])
    nums[i][j]

class ListNode:
    def __init__(self, x):
        self.val = x
        self.next = None

a=ListNode([1,2,3,54])
a.val
a.next

import myfunc
myfunc.search('sort_values')
import pandas as pd
a=pd.DataFrame([[1,2],[1,3]],columns=list('ab'))

a.sort_values('a').drop_duplicates('a')
[1,2,3,1,2,3,5].count(5)

import numpy as np
a=np.matrix([[1,2],[3,4]])
a*a.T

X=np.array([1,2,3,4,5])
X=X.reshape(-1, 1)
Y=np.array([0,2,3,4,6])
Y=Y.reshape(-1, 1)
from sklearn.linear_model import LinearRegression
Exemplo n.º 6
0
d = a.sort_values('nkill').tail(1000)
d = np.mat(d.values)
N = len(d)

# 归一化
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
d2 = min_max_scaler.fit_transform(d)
# 正太化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(d)
X = scaler.transform(d)

X.shape
myfunc.search('font')
################绘制频率直方图
t = X[:, -1]
t = t[t < 0.3]
from matplotlib.font_manager import FontProperties
font_set = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=12)
fig, (ax0, ax1) = plt.subplots(nrows=2, figsize=(9, 6))
ax0.hist(t, 80, normed=1, histtype='bar', facecolor='yellowgreen', alpha=0.75)
##pdf概率分布图,一万个数落在某个区间内的数有多少个
ax0.set_title('恐怖袭击死亡人数pdf', fontproperties=font_set)
ax1.hist(t,
         20,
         normed=1,
         histtype='bar',
         facecolor='pink',
         alpha=0.75,
Exemplo n.º 7
0
def prim(G):
    n = len(x) - 1  # 这个图的顶点个数,prim算法的边数就是顶点数减一
    v = 0  # 从第一个顶点开始
    s = {v}  # 这个集合避免重复的顶点被重复执行,造成无限循环
    edges = []  # 存放边的值
    # 存放结果
    res = []
    for _ in range(n - 1):
        # 对字典进行解包
        for u, w in enumerate(x):
            # print(u,w)
            if u != v:
                w = abs(w - x[v])
                heapq.heappush(edges, (w, v, u))
        while edges:
            w, p, q = heapq.heappop(edges)

            if q not in s:
                s.add(q)
                res.append(((p, q), w))
                v = q
                break
    return res


import queue
import stack
import myfunc

myfunc.search('ShowProcess')
Exemplo n.º 8
0

b=pd.read_sas('e:\\wind\\hold.sas7bdat')
# b.drop('VAR3',axis=1,inplace=True)

b.VAR2 = b.VAR2.str.decode('utf8')
#str(b.VAR2, encoding = "utf8")
b.VAR4=b.VAR4.astype('int').astype(str)

# 选出 6.30  12.31 的
b['date'] = pd.to_datetime(b.VAR4,format='%Y%m%d')

b1=b[b.VAR4.str.contains('0630') | b.VAR4.str.contains('1231')]
b1 = b1.iloc[:,:3]
b1.to_csv('e:/wind/manager.csv',index=False)
myfunc.search('to_csv')

help(pd.DataFrame.to_csv)
len(b)
len(b1)


t=['000650.XSHE', '002014.XSHE', '002088.XSHE', '002107.XSHE', '002117.XSHE', '002136.XSHE', '002148.XSHE', '002270.XSHE', '002365.XSHE', '002561.XSHE', '002553.XSHE', '002706.XSHE', '002718.XSHE', '002732.XSHE', '002749.XSHE', '002763.XSHE', '002793.XSHE', '002790.XSHE', '002836.XSHE', '002853.XSHE', '002849.XSHE', '002900.XSHE', '002880.XSHE', '002887.XSHE', '002896.XSHE', '002913.XSHE', '002868.XSHE', '002882.XSHE', '300075.XSHE', '300081.XSHE', '300231.XSHE', '300258.XSHE', '300286.XSHE', '300295.XSHE', '300341.XSHE', '300371.XSHE', '300396.XSHE', '300445.XSHE', '300410.XSHE', '300428.XSHE', '300446.XSHE', '300427.XSHE', '300394.XSHE', '300547.XSHE', '300519.XSHE', '300548.XSHE', '300561.XSHE', '300600.XSHE', '300637.XSHE', '300635.XSHE', '300648.XSHE', '300656.XSHE', '300687.XSHE', '300590.XSHE', '300599.XSHE', '300605.XSHE', '300695.XSHE', '300701.XSHE', '300696.XSHE', '300709.XSHE', '300727.XSHE', '300735.XSHE', '300723.XSHE', '300707.XSHE', '300739.XSHE', '300711.XSHE', '300738.XSHE', '300732.XSHE', '600305.XSHG', '600345.XSHG', '600419.XSHG', '600668.XSHG', '600732.XSHG', '600844.XSHG', '600892.XSHG', '600995.XSHG', '603023.XSHG', '603037.XSHG', '603158.XSHG', '603266.XSHG', '603086.XSHG', '603079.XSHG', '603110.XSHG', '603226.XSHG', '603106.XSHG', '603136.XSHG', '603283.XSHG', '603058.XSHG', '603599.XSHG', '603339.XSHG', '603306.XSHG', '603585.XSHG', '603535.XSHG', '603326.XSHG', '603500.XSHG', '603496.XSHG', '603608.XSHG', '603639.XSHG', '603808.XSHG', '603669.XSHG', '603809.XSHG', '603829.XSHG', '603722.XSHG', '603607.XSHG', '603617.XSHG', '603856.XSHG', '603889.XSHG', '603987.XSHG', '603968.XSHG', '603928.XSHG', '603978.XSHG', '603938.XSHG', '603960.XSHG']
len(t)

############################## 吉姆.史莱特 (Jim Slater) 祖鲁原则投资法 ##############################
# 选股标准:
# 1.	总市值 < 市场平均总市值*1.0。
# 2.	过去五年税后净利皆为正值。
# 3.	过去三年税后净利成长率皆 >= 15%。
# 4.	预估税后净利成长率 >= 15%。
Exemplo n.º 9
0
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import datetime
import tushare as ts
import os
import gc
import myfunc
os.chdir("e:\\stock\\siyinzi")
myfunc.search('sas', 1)

#导入前复权收盘价
close = pd.read_csv('closeADJ.csv')
close = close.iloc[:, 1:]
close.ticker = close.ticker.astype(str)
close.ticker = close.ticker.str.zfill(6)
close1 = pd.pivot_table(close,
                        columns='ticker',
                        values='closePrice',
                        index='tradeDate')
mad = lambda x: x[1] / x[0] - 1
close2 = close1.rolling(window=2).apply(mad)
close = close2.stack()
close.name = 'rate'
close = close.reset_index()

#导入市值 PB
shizhi_PE = pd.read_csv('shizhi_PE.csv')
shizhi_PE = shizhi_PE.iloc[:, 1:]
shizhi_PE.ticker = shizhi_PE.ticker.astype(str)
shizhi_PE.ticker = shizhi_PE.ticker.str.zfill(6)
Exemplo n.º 10
0
    return alldata
def init():
    line.set_data([], [])
    return line
def animate(i):
    axes.set_xlim(0, i + 10)
    x = range(i + 1)
    y = dapan('sh000001')
    line.set_data(x, y)
    return line

anim = animation.FuncAnimation(fig, animate, init_func=init, frames=10000, interval=5000)
plt.show()

import myfunc
myfunc.search('sys.')
##############################
ts.get_hist_data('600848',start='2018-01-05',end='2018-01-09')

from pandas import DataFrame, Series
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import dates as mdates
from matplotlib import ticker as mticker
from matplotlib.finance import candlestick_ohlc
from matplotlib.dates import DateFormatter, WeekdayLocator, DayLocator, MONDAY, YEARLY
from matplotlib.dates import MonthLocator, MONTHLY
import datetime as dt
import pylab

daylinefilespath = 'G:\\dayline\\'
Exemplo n.º 11
0
z.pop(1)
final = Final(z)

final1 = final[final['Unnamed: 1'].str.contains('资产总计')
               | final['Unnamed: 1'].str.contains('49\.')
               | final['Unnamed: 1'].str.contains('59\.')
               | final['Unnamed: 1'].str.contains('6\d\.')]
t = pivot(final1, [4], col='Unnamed: 1')
add_sheet(t, 'temp', '16')
################################################
################################################
################################################
t.str.replace(r'\d\.\d*', '')

import myfunc
myfunc.search('正则', dirFlag=1)


def pivot(final1, x, col='name', ind='year'):
    for i in x:
        final1.iloc[:, i] = final1.iloc[:, i].astype(float)
    a = pd.pivot_table(final1,
                       columns=col,
                       values=final1.columns[x],
                       index=ind)
    return a


def Final(z):
    df = pd.DataFrame({
        'num': get_num(z),
Exemplo n.º 12
0
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import datetime
import tushare as ts
import os
import gc
os.chdir("e:\\temp")

import myfunc
myfunc.search('filter', )

#以读入文件为例:
f = open("shiti.txt")  #二进制格式读文件
a = []
lines = f.readlines()

for line in lines:
    a.append(line)

b = pd.DataFrame({'a': a})

c = b.a.str.contains('考试')
c1 = np.cumsum(c)
t1 = np.where(c)

b['c'] = c1

c = b.a.str.contains('(参考答案)|(真题(卷二)答案)')
t2 = np.where(c)
Exemplo n.º 13
0
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import datetime
import tushare as ts
import os
import gc
os.chdir("e:\\stock\\temp")

import myfunc
from importlib import reload
reload(myfunc)

myfunc.search('npy')
##############股票列表
data=ts.get_stock_basics()
code=data.index
code=code.sort_values()
np.save('code.npy',code)
code = np.load('code.npy')

data=pd.DataFrame();bug=[]
process_bar = myfunc.ShowProcess(len(code))
for x in code:
    t=ts.get_hist_data(x,start='2018-03-01',end='2018-05-09')
    #t = ts.get_hist_data('601965') 为空
    if t is None:bug.append(x)
    else:
        t = ts.get_hist_data(x,start='2018-03-01',end='2018-05-09').iloc[:,:5]
        t['code']=x
        data = data.append(t)
Exemplo n.º 14
0
import psm
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import datetime
import tushare as ts
import os
import gc
import myfunc
os.chdir("e:\\stock\\siyinzi")
myfunc.search('gbk')

import scipy as sp
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

x = np.loadtxt("wine.data",
               delimiter=",",
               usecols=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13))  # 获取属性集
y = np.loadtxt("wine.data", delimiter=",", usecols=(0))  # 获取标签集
print(x)  # 查看样本
# 加载数据集,切分数据集80%训练,20%测试
x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                    test_size=0.2)  # 切分数据集
# 调用逻辑斯特回归
model = LogisticRegression()
model.fit(x_train, y_train)
print(model)  # 输出模型
# make predictions