版權(quán)說(shuō)明:本文檔由用戶提供并上傳,收益歸屬內(nèi)容提供方,若內(nèi)容存在侵權(quán),請(qǐng)進(jìn)行舉報(bào)或認(rèn)領(lǐng)
文檔簡(jiǎn)介
1.商業(yè)數(shù)據(jù)分析及其工具
importpandasaspd
#讀取本地?cái)?shù)據(jù)
df=pd.readcsvC,/2glkx/data/al2-l.csv*)
#讀取網(wǎng)上數(shù)據(jù)
importpandasaspd
data_url=
“https:〃raw.githubusercontent.com/alstat/Analysis-with-Programming/master/2014
/Python/Numorical-Dcscriptions-of-the-Data/data.csv”
df=pd.read_csv(data_url)
print(df.head())
print(df.tai1())
print(df.columns)
Index([u'Abra',u'Apayao',u'Benguet',u*Ifugao*,u*Kalingcf],dtype=,object*)
#Extractingrownamesortheindex
print(df.index)
Rangeindex(start=0,stop=79,step=l)
#Transposedata
print(df.T)
print(df.ix[:,0].head())現(xiàn)在改版ix用loc
print(df.ix[10:20,0:3])
print(df.drop(df.columns[[2,3]],axis=1).head())
print(df.describe0)
fromscipyimportstatsasss
#Performonesamplet-testusing1500asthetruemean
print(ss.ttest1samp(a=df.ix[:,'Abra'],popmean=15000))
TtestlsampResult(statistical.1281738488299586,pvalue=0.26270472069109496)
print(ss.ttcst_lsamp(a=df,popmean=15000))
importmatplotlib.pyplotaspit
pit.show(df.plot(kind='box'))
1
importseabornassns
#Dotheboxplot
pit.show(sns.boxplot(df))
defadd_2int(x,y):
returnx+y
print(add_2int(2,2))
2Python商業(yè)數(shù)據(jù)的存取
importpandasaspd
importnumpyasnp
apple*,*pear','watch*,'money,]
b=[[l,2,3,4,5],[5,7,8,9,0],[1,3,5,7,9],[2,4,6,8,0]]
d=dict(zip(a,b))
d
p=pd.DataFramc(d)
P
p.to_csv(*F:\\2glkx\\data\\IBM.csv*)
pd.readcsv('F:\\2glkx\\data\\al2-l.csv')
importpandasaspd
importnumpyasnp
df=pd.read_excel(*F:\\2glkx\\data\\al2-2.xls')
df.headO
importpandasaspd
importnumpyasnp
#讀取數(shù)據(jù)并創(chuàng)建數(shù)據(jù)表,名稱為data。
data=pd.DataFrame(pd.readexcel(JG:\\2glkx\\data\\al2-2.xls,))
#查看數(shù)據(jù)表前5行的內(nèi)容
data,head()
importtushareasts
df=ts.get_hist_data(?000875,)#從網(wǎng)上取數(shù)據(jù)
#直接保存
#df.to_csv(,:/2glkx/data/000875.csv')
#選擇數(shù)據(jù)保存
df.to_csv('F:/2glkx/data/000875.csv',columns=[,open','high','low','close*])
importpandasaspd
importnumpyasnp
df=pd.readcsv(*F:/2glkx/data/000875.csv')
2
df.head()
importtushareasts
importos
filename=*F:/2glkx/data/bigfile.csv)
forcodeinf000875,,*600848,,'000981']:
df=ts.gethistdata(code)
ifos.path,exists(filename):
itdf.to_csv(filename,mode='a',header二None)
df.to_csv(filoname,modo=,a')
else:
df.to_csv(filename)
importpandasaspd
importnumpyasnp
df=pd.rcadexccl('F:/2glkx/data/000875.xls')
df.head()
importtushareasts#需先安裝tushare程序包
#此程序包的安裝命令:pipinstalltushare
importpandasaspd
importnumpyasnp象中
data=pd.DataFrame()
datal=ts.gct_hist_data(,600000,)
并需要修改上面的時(shí)間
datal=datalfclose,]
datal=datal[::-l]#按日期從遠(yuǎn)到近結(jié)束
dataf600000']=datal
data2=ts.got_hist_data(,0009801)
data2=data2fclose*]
data2=data2[::-l]
data['000980']=data2
data3=ts.get_hist_data(,000981,)
data3=data3['close']
data3=data3[::-l]
dataf00098T]=data3
data,info0#查看數(shù)據(jù)情況
data=dala.dropna0
data,info()
data,head0
data,tail()
3
data=dataEf600000),‘000981']]
data,head()
data.ix[l:4]#現(xiàn)在ix改為loc
data.iloc[:2,:3]
importtushareasts
importpandasaspd
pd.setoption(*expand_frame_repr,,False)#顯示所有列
ts.settokenCyourtoken*)#獲取token號(hào),需要先注冊(cè)
pro=_api0
stockdata=pro.daily(ts_code=,000001.SZ*,startdate=,20100101,,
end_date=,20190101')
stockdata,head()
importpandasdatareader.dataasweb
importdatetime
start=datetime.datetime(2017,1,1)#獲取數(shù)據(jù)的時(shí)間段起始時(shí)間
end=datetime,date.today()#獲取數(shù)據(jù)的時(shí)間段-結(jié)束時(shí)間
stock=web.DataReader(,,600797.SS^,“yahoo”,start,end)
#獲取浙大網(wǎng)新2017年1月1日至今的股票數(shù)
stock.head()#打印DataFrame數(shù)據(jù)前5行
importpandasaspd
frompandasimportSeries,DataFramc
importnumpyasnp
importmatplotlib.pyplotaspit
frompandas_datareaderimportdata,wh
fromdatetimeimportdatetime
end=datetime.no\v()
start=datetime(end.year-1,end.month,end.day)
df=data.DataReadcrC*600797.SS*,'yahoo',start,end)
dffAdjClose*].plot(legend=True,figsize=(10,4))
pit.show()
importnumpyasnp
importpandasaspd
importpandasdatareader.dataasweb
importdatetime
#獲取600797.SS浙大網(wǎng)新數(shù)據(jù)
4
dfcsvsave=
web.DataReader(“600018.SS","yahoo”,datetime.datetime(2019,1,1),datetime,date.to
day())
print(df_csvsave)
dfcsvsave.tocsv(r*F:\2glkx\data\600018.csv',columns=dfcsvsave,columns,index=
True)
importtushareasts
importpandasaspd
pd.set_option(,expand_frame_repr,,False)#顯示所有列
ts.set_token(,yourtoken')#獲取token號(hào)
pro=ts.proapi()
code」ist=f000001.SZ\*600000.SH1,'()00002.ST]
stockdata=pd.DataFrameO
forcodeincode_list:
print(code)
df=pro.daily(ts_codc=codc,start_date=,2018010T,end_date=,20180104))
stock_data=stock_datci.cippend(df,ignore_index=True)
print(stockdata)
grouped=stock_data.groupby(*ts_code')
print(grouped)
3Python商業(yè)數(shù)據(jù)的圖形繪制與可視化
importmatplotlib.pyplotaspit
importpandasaspd
importnumpyasnp
df=pd.readexcel("G:/2glkx/data/al3-1.xls〃)
#或者df=pd.read_excelCG:\\2glkx\\data\\al3-l.xls')
df.headO
fig=plt.figureO
ax=fig.addsubplot(1,L1)
ax.hist(df[JAge*],bins=7)
pit.show()
fig=plt.figureO
ax=fig.addsubplot(1,L1)
ax.hist(df[*Age'],bins=7)
pit.titleCAgedistribution')
5
pit.show()
fig=plt.figure()
ax=fig.add_subplot(l,1.1)
ax.hist(df[*Age*],bins=7)
plt.titleC1Agedistribution')
pit.xlabel(,Age*)
pit.ylabel(JttEmployee*)
pit.show()
fig=plt.figure()
ax二fig.add_subplot(l,1,1)
ax.scatter(dffAge*],df['Sales'])
#Youcanalsoaddmorevariablesheretorepresentcolorandsize.
pit.title(*Age&SalesScatterofEmployee1)
^Variable
pit.xlabel(*Age*)
pit.ylabel(*Sales,)
pit.show()
fig=plt.figure()
ax=fig.addsubplot(1,1,1)
ax.scatter(df[JAge'],df[,Sales*],s=df['Income,])
#Addedthirdvariableincomeassizeofthebubble
pit.xlabel(JAge*)
pit.ylabel('Sales*)
pit.show()
importmatplotlib.pyplotaspit
importpandasaspd
fig二pit.figure()
ax=fig.addsubplot(1,L1)
^Variable
ax.boxplot(dffAge1])
pit.BoxfigureofAge')
pit.show()
vars=[*Age'Sales']
data=df[vars]
pit.show(delta,plot(kind='box'))
importmatplotlib.pyplotaspit
importpandasaspd
6
importnumpyasnp
df=pd.read_excel(^G:/2glkx/data/al3-l.xls")
df.head()
var=df.groupby(['Gender>1).sum().stack()
temp=var.unstack()
x_list=temp[,Sales*]
label_list=temp.index
pit.axis("equal")
pit.pie(xlist)
pit.titie(,zPastafatianismexpenses")
pit.show()
frompylabimport*
#makeasquarefigureandaxes
figure(1,figsize=(6,6.))
ax=axcs([0.1,0.1,0.8,0.8])
fracs=[60,40]#每一塊占得比例,總和為100
explode=(0,0.08)#離開(kāi)整體的距離,看效果
labels='Male',*Female,#對(duì)應(yīng)每一塊的標(biāo)志
pie(fracs,explode=explode,labels=labels,autopct=,%l.If%%,,shadow=True,
startangle=90,colors=("g","r"))
title(*RateofMaleandFemale*)#標(biāo)題
show()
importmatplotlib.pyplotasplot
importpandasaspd
importnumpyasnp
df=pd.readexcel("G:/2glkx/data/a13T.xls")
df.headO
var=df.groupby(*Gender').Sales.sum()
^groupedsumofsalesatGenderlevel
fig=pit.figure0
axl=fig.addsubplot(1,1,1)
axl.set_xlabelCGender")
axl.set_ylabel('SumofSales')
axl.settitie(^GenderwiseSumofSales")
var.plot(kind='bar*)
var=df.groupby(fBMT,Gender']).Sales.sum()
var.unstack().plot(kind=,bar*,stacked=True,color=[,red','blue*])
importmatplotlib.pyplotasplot
7
importpandasaspd
importnumpyasnp
df=pd.readexcel(,ZG:/2glkx/data/al3-l.xls")
df.head()
var=df.groupbyCBMT).Sales,sum()
fig=plt.figure()
axl=fig.add_subplot(1,1,1)
axl.set_xlabel(*BMT)
axl.sctylabcl(*SumofSales*)
axl.set_title(,,BMIwiseSumofSales")
var.plot(kind』line')
importpandasaspd
importnumpyasnp
data二pd.DataFrame(pd.read_excel(*G:\\2glkx\\data\\al3-3.xls'))
data,head()
t=np.array(data[year*]])
x=np.array(data[[*total*]])
y=np.array(data[[*new*]])
importpylabaspl
pl.plot(t,x)
pl.plot(t,y)
pl.show()
importpylabaspl
pl.plot(t,x)
pl.plot(t,y)
pl.titleC5populationcensus')
pl.xlabel('Time')
pl.ylabel('PopulatioiV)
pl.show()
importpandasaspd
importnumpyasnp
data=pd.DataFrame(pd.read_excel(?G:\\2glkx\\data\\al3-4.xls'))
data,head()
t=np.array(data[fyear,]])
x=np.array(data[['number,]])
importpylabaspl
pl.plot(t,x)
pl.title(>1998-2015ofAlistedcompaniesinchina))
pl.xlabelCTime*)
pl.ylabel('companiesnumbers*)
8
pl.show()
importpandasaspd
importnumpyasnp
importpylabaspl
data=pd.DataFrame(pd.read_excelG:\\2glkx\\data\\al3-4.xls'))
data,head()
t=np.array(data[['year']])
x=np.array(data[['number']])
importpylabaspl
pl.plot(t,x,'ro')
pl.title(>1998-2015ofAlistedcompaniesinchina,)
pl.xlabel('Time')
pl.ylabelCcompaniesnumbers,)
pl.show()
importrandom
importnumpyasnp
importmatplotlibasmpl
importmatplotlib.pyplotaspit
importmatplotlib.datesasmdates
frommpl_toolkits.mplol3dimportAxes3I)
mpl.rcParams[,font,size*]=10
fig=pit.figure()
ax=fig.add_subplot(111,projection='3d')
forzin[2011,2012,2013,2014]:
xs=ranged,13)
ys=1000*np.random,rand(12)
color=plt.cm.Set2(random,choice(range(pit.cm.Set2.N)))
ax.bar(xs,ys,zs=z,zdir=,y,,color=color,alpha=0.8)
ax.xaxis.setmajorlocator(mpl.ticker.FixedLocator(xs))
ax.yeixis.set_major_locator(mpl.ticker.FixedLocator(ys))
ax.setxlabel('Month')
ax.set_ylabel('Year*)
ax.setzlabel(*SalesNet[usd]*)
pit.show()
frommpltoolkits.mplot3dimportAxes3D
frommatplotlibimportcm
importmatplotlib.pyplotaspit
importnumpyasnp
nangles=36
n_radii=8
#Anarrayofradii
9
#Doosnotincluderadiusr=0,thisistoeliminateduplicatepoints
radii=np.1inspace(0.125,1.0,n_radii)
#Anarrayofangles
angles=np.1inspace(0.2*np.pi,n_cingles,endpoint=False)
#Repeatallanglesforeachradius
angles=np.repeat(angles,np.newaxis],n_radii,axis=l)
#Convertpolar(radii,angles)coordstocartesian(x,y)coords
#(0,0)isaddedhere.Therearenoduplicatepointsinthe(x,y)plane
x=np.append(0,(radii*np.cos(tingles)).flatten())
y=np.append(0,(radii*np.sin(anglcs)).flatten())
#Pringlesurface
z=np.sin(-x*y)
fig=pit.figureO
ax=fig.gca(projection^3d1)
ax.plot_trisurf(x,y,z,cmap=cm.jet,1inewidth=O.2)
pit.show()
4Python描述性統(tǒng)計(jì)
#兩個(gè)常用的統(tǒng)計(jì)包
importscipy.statsasstats
importnumpyasnp
n我們拿兩個(gè)數(shù)據(jù)集來(lái)舉例
xl=[1,2,2,3,4,5,5,7]
x2=xl+[100]
print('xl的平均值:',sum(xl),',len(xl),,=,,np.mean(xl))
print('x2的平均值:',sum(x2),*/*,len(x2),,=,,np.mean(x2))
xl的平均值:29/8=3.625
x2的平均值:129/9=14.333333333333334
print(*xl的中位數(shù):',rp.median(xl))
print(*x2的中位數(shù):',r.p.median(x2))
print(,Onemodeofxl,stats,mode(xl)[0][0])
#因此我們自定義一個(gè)求眾數(shù)的函數(shù)
defmode(l):
#統(tǒng)計(jì)列表中每個(gè)元素出現(xiàn)的次數(shù)
counts={}
forein1:
ifeincounts:
counts[e]+=1
else:
10
counts[e]=1
#返回出現(xiàn)次數(shù)最多的元素
maxcount=0
modes-{)
for(key,value)incounts,iterns():
ifvalue>maxcount:
maxcount=value
modes={key}
clifvalue==maxcount:
modes,add(key)
ifmaxcount>1orlen(1)==1:
returnlist(modes)
return'Nomode,
print(*Allofthemodesofxl,mode(xl))
importscipy.statsasstats
importnumpyasnp
#獲取收益率數(shù)據(jù)并計(jì)算出mode
start=*2014-01-0r
end='2015-01-01'
pricing=D.historydata('000002.SZA',fields=[,close*],start_date=start,
end_date=end)['close*]
returns=pricing.pct_changc()[1:]
print('收益率眾數(shù):',stats,mode(returns))
#由于所有的收益率都是不同的,所以我們使用頻率分布來(lái)變相計(jì)算mode
hist,bins=np.histogram(returns,20)#將數(shù)據(jù)分成20個(gè)bin
maxfreq=max(hist)
n找出哪個(gè)bin里面出現(xiàn)的數(shù)據(jù)點(diǎn)次數(shù)最大,這個(gè)bin就當(dāng)做計(jì)算出來(lái)的mode
print(*Modeofbins:',[(bins[i],bins[i+l])fori,jinenumerate(hist)ifj==
maxfreq])
#使用Scipy包中的gmesn函數(shù)來(lái)計(jì)算幾何平均值
printCxl幾何平均值:stats,gmean(xl))
print(*x2幾何平均值:',stats,gmean(x2))
#在每個(gè)元素上增加1來(lái)計(jì)算幾何平均值
importscipy.statsasstats
importnumpyasnp
ratios=returns+np.ones(len(returns))
11
R_G=stats,gmcan(ratios)-1
print('收益率的幾何平均值:',RG)
T=len(returns)
initprice=pricingfO.
final_price=pricing[T]
print('最初價(jià)格:,,initprice)
print('最終價(jià)格:',final_price)
print('通過(guò)幾何平均收益率計(jì)算的最終價(jià)格:',init_price*(l+R_G)**T)
print(*xl的調(diào)和平均值:',stats,hmean(xl))
print(1x2的調(diào)和平均值:',stats.hmean(x2))
importnumpyasnp
np.random.seed(121)
#生成20個(gè)小于100的隨機(jī)整數(shù)
X=np.random,randint(100,size=20)
#Sortthem
X=np.sort(X)
printCX:%s'%(X))
mu=np.mean(X)
print('X的平均值:mu)
print(*RcingeofX:%s'%(np.ptp(X)))
abs_dispersion=[np.abs(mu-x)forxinX]
MAD=np.sum(abs_dispersion)/len(abs_dispersion)
print('X的平均絕對(duì)偏差:‘,MAD)
printCX的方差:',np.var(X))
print('X的標(biāo)準(zhǔn)差:',np.std(X))
k=1.25#隨便舉的一個(gè)k值
dist=k*np.std(X)
1=[xforxinXifabs(x-mu)<=dist]
print('k值',k,'在k倍標(biāo)準(zhǔn)差距離內(nèi)的樣本為:',內(nèi)
print('驗(yàn)證float(len(1))/len(X),J>>,1-l/k**2)
#沒(méi)有現(xiàn)成的計(jì)算下偏方差的函數(shù),因此我們手動(dòng)計(jì)算:
lows=[eforeinXife<=mu]
semivar=np.sum((lows-mu)**2)/len(lows)
print('X的下偏方差:',semivar)
print(*X的下偏標(biāo)準(zhǔn)差:’,np.sqrt(semivar))
12
B=19#目標(biāo)為19
lowsB=[eforeinXife<=B]
semivar_B=sum(map(lambdax:(x-B)**2,lows_B))/len(lows_B)
print(1X的目標(biāo)下偏方差:',semivar_B)
print('X的目標(biāo)下偏標(biāo)準(zhǔn)差:’,np.sqrt(semivar_B))
importmatplotlib.pyplotaspit
importnumpyasnp
importscipy.statsasstats
xs=np.1inspace(-6,6,300)
normal=stats,norm,pdf(xs)
pit.plot(xs,normal);
#產(chǎn)生數(shù)據(jù)
xs2=np.1inspace(stats.lognorm,ppf(0.01,.7,loc=-.1),stats,lognorm,ppf(0.99,.7,
loc=-.1),150)
#偏度>0
lognormal=stats,lognorm,pdf(xs2,.7)
pit.plot(xs2,lognormal,label='Skew>O')
#偏度<0
pit.plot(xs2,lognormal[::-l],label=,Skew<O')
pit.legendO;
并注:本程序在Bigquanl環(huán)境中運(yùn)行。
start='2016-01-01'
end='2018-01-01'
pricing=D.historydata(1000300.SHA',startdate=start,enddate=end,)['close*]
returns=pricing.pct_chango()[1:]
print(*Skew:),stats,skew(returns))
print(,Mean:*,np.mean(returns))
print(*Median,np.median(returns))
pit.hist(returns,30);
fromscipyimportstats
frompandas,coreimportdatetools
importstatsmodels.apiassm#統(tǒng)計(jì)相關(guān)的庫(kù)
importnumpyasnp
importpandasaspd
importmatplotlib.pyplotaspit
importtushareasIs#財(cái)經(jīng)數(shù)據(jù)接口包tushare
IndexData=ts.get_k_data(code=,hs300*,start=,2016-01-0T,end=,2018-08-0T)
13
IndcxData.index=pd.to_datetimc(IndexData.date)
close=IndexDcita.close
returns=(close-close,shift(1))/close.shift(l)
returns=returns.dropnaO
print('Skew:*,stats,skew(returns))
print('Mean:,np.mean(returns))
print(*Median:),np.median(returns))
pit.hist(returns,30)
pit.plot(xs,stats.laplace,pdf(xs),label=,Leptokurtic,)
print尖峰的超額峰度:',(stats,laplace,stats(moments=,k')))
pit.plot(xs,normal,label=,Mesokurtic(normal),)
print(J正態(tài)分布超額峰度:',(stats.norm,stats(moments=,k')))
pit.plot(xs,stats,cosine.pdf(xs),label=,Platykurtic*)
print('平峰超額峰度:',(stats,cosine,stats(moments=,k*)))
pit.legend();
fromstatsmodels.stats,stattoolsimportjarque_bera
pvalue,=jarquebera(returns)
ifpvalue>0.05:
printC滬深300收益率數(shù)據(jù)服從正態(tài)分布.’)
else:
printC滬深300收益率數(shù)據(jù)并不服從正態(tài)分布.’)
fields=[,fs_roc_0,]
start_date=>2017-04-21,
enddate=J2017-04-21
instruments=I),instruments(start_date,enddate)
roe=D.features(instruments,startdate,enddate,fields二fields)['fsroe01]
1描述性統(tǒng)計(jì)
print('均值:',roe.mean())
print('標(biāo)準(zhǔn)差:',roe.std())
roe.describe()
2繪制直方圖
roe.hist(bins=100)
1固定比例法
roe=D.features(instruments,startdatc,enddate,fields=fields)[*fspoeOl]
roe[roe>=roe.quantile(0.99)]=roe.quantile(0.99)
roe[roe<=roe.quantile(0.01)]=roe.quantile(0.01)
print('均值:,roe.mean。)
print('標(biāo)準(zhǔn)差:',roe.std())
14
roe.hist(bins=100)
2均值標(biāo)準(zhǔn)差方法
通常把三倍標(biāo)準(zhǔn)差之外的值都視為異常值,然后將這些異常值重新賦值
roe=0.features(instruments,start_date,end_date,fields:fields)['fs_roe_0']
roe[roe>=roe.mean()+3*roe.std()]=roe.mean()+3*roe.std()
roe[roe<=roe.mean()-3*roe.std()]=roe.mean()-3*roe.stdO
print('均值:',roe.mean。)
print('標(biāo)準(zhǔn)差:',roe.std())
roo.hist(bins=100)
3MAI)方法
roe=D.features(instruments,startdate,enddate,fields=fields)fsroeO1]
roe=roe.dropnaO
median=np.median(list(roe))
MAD=np.mean(abs(roe)-median)
roe=roe[abs(roe-median)/MAD<=6]#剔除偏離中位數(shù)6倍以上的數(shù)據(jù)
print('均值:',roe.mean。)
print('標(biāo)準(zhǔn)差:',roe.$td())
roe.hist(bins=100)
fromstatsmodels.stats,stattoolsimportmedcouple
roe=D.features(instruments,start_date,enddate,fields=fields)fs_roe_01]
roe=roe.dropnaO
defboxplot(data):
#mc可以使用statsmodels包中的medcouple函數(shù)直接進(jìn)行計(jì)算
me=medcouple(data)
data,sort()
ql=data[int(0.25*len(data))]
q3=data[int(0.75*len(data))]
iqr=q3-ql
ifme>=0:
1=ql-1.5*np.exp(-3.5*me)*iqr
u=q3+1.5*np.exp(4*me)*iqr
else:
1=ql-1.5*np.exp(-4*me)*iqr
u=q3+1.5*np.exp(3.5*me)*iqr
data=pd.Series(data)
data[data<1]=1
data[data>u]=u
returndata
print('均值',boxplot(list(roe)).mean())
printC標(biāo)準(zhǔn)差',boxplot(list(roe)).std())
boxplot(1ist(roe)).hist(bins=100)
15
5Python參數(shù)估計(jì)
importnumpyasnp
x=[l,1,0,1,0,0,1,0,1,LI,0,1,1,0,1,0,0,1,0,1,0,1,0,0,1,1,0,1,1,0,1]
theta=np.mean(x)
h=theta/(l-theta)
printCh=',h)
5.3Python單jf態(tài)總體均值區(qū)間估計(jì)
importnumpyasnp
importscipy.statsasss
n=6;p=0.025;sigma=np.sqrt(0.6)
x=[14.6,15.1,14.9,14.8、15.2,15.1]
xbar=np.mean(x)
low=xbar-ss.norm,ppf(q=1-p)*(sigma/np.sqrt(n))
up=xbar+ss.norm,ppf(q=1-p)*(sigma/np.sqrt(n))
print('low=',low)
print('up=,,up)
2.方差。2未知時(shí)〃的置信區(qū)間
importnumpyasnp
importscipy.statsasss
fromscipy.statsimportt
n=9;p=0.025;s=np.sqrt(1.47)
x=[99.3,98.7,100.5,101.2,98.3,99.7,99.5,102.1,100.5]
xbar二np.mean(x)
low=xbar-ss.t.ppf(l-p,n-l)*(s/np.sqrt(n))
up=xbar+ss.t.ppf(1-p,n-l)*(s/np.sqrt(n))
print('low=',low)
print('up=,,up)
fromscipyimportstats
importnumpyasnp
x=[99.3,98.7,100.5,101.2,98.3,99.7,99.5,102.1,100.5]
stats,t.interval(0.95,len(x)-l,np.mean(x),stats,sem(x))
(99.04599342616191,100.90956212939363)
5.4Python單正態(tài)總體方差區(qū)間估計(jì)
fromscipy.statsimportchi2
n=16;sq=0.0023;p=0.025
low=((n-l)*sq)/chi2.ppf(1-p,n-l)
up=((n-l)*sq)/chi2.ppf(p,n-l)
print(*low=,,low)
16
print('up=,,up)
5.5Python雙正態(tài)總體均值差區(qū)間估計(jì)
importnumpyasnp
importscipy.statsasss
x=[628,583,510,554,612,523,530,615]
y=[535,433,398,470,567,480,498,560,503,426]
nl=len(x);n2=len(y)
xbar=np.mean(x);ybar=np.mean(y)
sigmaql=2140;sigmaq2=3250;p=0.025
low=xbar-ybar-ss.norm,ppf(q=1-p)*np.sqrt(sigmaq1/n1+sigmaq2/n2)
up=xbar-ybar+ss.norm,ppf(q=1-p)*np.sqrt(sigmaql/nl+sigmaq2/n2)
print(*low=,,low)
printup=,,up)
2.兩方差都未知時(shí)兩均值的置信區(qū)間
importnumpyasnp
importscipy.statsusss
x=[628,583,510,554,612,523,530,615]
y=[535,433,398,470,567,480,498,560,503,426]
nl=l.0*len(x);n2=l.0*len(y)#轉(zhuǎn)為小數(shù)
sl=np.var(x);s2=np.var(y)
xbar=np.mean(x);ybar=np.mean(y)
p=0.025
sq=((nl-1)*s1+(n2-l)*s2)/(nl-l+n2-l)
low=xbar-ybar-ss.t.ppf(1-p,nl+n2-2)*np.sqrt(sq*(l/nl+l/n2))
up=xbar-ybar+ss.t.ppf(1-p,nl+n2-2)*np.sqrt(sq*(l/nl+l/n2))
print('low=,,low)
printup=,,up)
5.6Python雙正態(tài)總體方差比區(qū)間估計(jì)
importnumpyasnp
fromscipy.statsimportf
x=[20.5,19.8,19.7,20.4;20.1,20.0,19.0,19.93
y=[20.7,19.8,19.5,20.8,20.4,19.6,20.2]
sql=np.var(x);sq2=np.var(y)
nl=8;n2=7;p=0.025
f.ppf(0.025,nl-1,n2-l)
low=sql/sq2*l/f.ppf(1-p,nl-l,n2-l)
up=sql/sq2*l/f.ppf(p,nl-1,n2-l)
printClow=5,low)
print('up=',up)
low=0.142168867371
up=4/p>
17
6Python參數(shù)假設(shè)檢驗(yàn)
6.2Python單個(gè)樣本t檢驗(yàn)
importpandasaspd
importnumpyasnp
#讀取數(shù)據(jù)并創(chuàng)建數(shù)據(jù)表,名稱為data。
data=pd.DataFrame(pd.read_excel(*G:\\2glkx\\data\\al6-l.xls'))
#查看數(shù)據(jù)表前5行的內(nèi)容
data,head()
#取sale數(shù)據(jù)
x=np.array(data[[*sale,]])
mu=np.mean(x)
fromscipyimportstatsasss
printmu,ss.ttcst_lsamp(a=x,popmean=500)
6.3Python兩個(gè)獨(dú)立樣本t檢驗(yàn)
importnumpyasnp
#讀取數(shù)據(jù)并創(chuàng)建數(shù)據(jù)表,名稱為data。
data=pd.DataFrame(pd.readexcel(,G:\\2glkx\\data\\al6-2.xls'))
#查看數(shù)據(jù)表前5行的內(nèi)容
x=np.array(datafa*]])
y=np.array(data[['fb']])
fromscipy.statsimportttestind
t,p=ttest_ind(x,y)
print't=',t
print,p=,,p
6.4Python配對(duì)樣本t檢險(xiǎn)
importpandasaspd
importnumpyasnp
#讀取數(shù)據(jù)并創(chuàng)建數(shù)據(jù)表,名稱為data。
data二pd.DataFrame(pd.readexcel('G:\\2glkx\\data\\al6-3.xls*))
#查看數(shù)據(jù)表前5行的內(nèi)容
x=np.array(data[[*qian*]])
y=np.array(datahou,]])
fromscipy.statsimportttest_rel
18
t,p=ttest_rel(x,y)
print't=',t
print,p=,,p
6.5Python單樣本方差假設(shè)檢臉
importpandasaspd
importnumpyasnp
#讀取數(shù)據(jù)并創(chuàng)建數(shù)據(jù)表,名稱為data。
data=pd.DataFrame(pd.read_excelCG:\\2glkx\\data\\al6-4.xls'))
#查看數(shù)據(jù)表前5行的內(nèi)容
data,head0
#取收益率數(shù)據(jù)
importnumpyasnp
x=np.array(data[[,syl*]])
n=len(x)
#計(jì)算方差
s2=np.var(x)
#計(jì)算卡方值
chisquare=(n-l)*s2/0.01
printchisquare
python實(shí)現(xiàn)卡方分布
fromscipyimportstats
obs=[102,102,96,105,95,100]
exp=[100,100,1D0,100,100,100]
stats.chisquare(obs,fexp=exp)
6.6Python雙樣本方差假設(shè)檢驗(yàn)
importpandasaspd
importnumpyasnp
fromscipyimportstats
fromstatsmodels.formula.apiimportols
fromstatsmodels.stats.anovaimportanovalm
在目錄G:\2glkx\data下建立a16-5.xls數(shù)據(jù)文件后,取數(shù)的命令如下:
#讀取數(shù)據(jù)并創(chuàng)建數(shù)據(jù)表,名稱為data。
df=pd.DataFrame(pd.read_excelG:\\2glkx\\data\\al6-5.xls'))
#查看數(shù)據(jù)表前5行的內(nèi)容
df.head()
19
formula='rcturnA"returnB,《隔離因變量和自變量(左邊因變量,右邊自變展)
model=ols(formula,df).fit()#根據(jù)公式數(shù)據(jù)建模,擬合
results=anova_Im(model)#計(jì)算F和P
printresults
7Python相關(guān)分析
7.2使用模擬數(shù)據(jù)計(jì)算變量之間的相關(guān)系數(shù)和繪圖
#導(dǎo)入包
importnumpyasnp
importstatsmodels.tsa.stattoolsassts
importmatplotlib.pyplotaspit
importpandasaspd
importseabornassns
importstatsmodels.apiassm
(1)生成隨機(jī)變數(shù)并繪制圖形
X=np.random.randn(lODO)
Y=np.random.randn(lODO)
pit.scatter(X,Y)
pit.show()
print(^correlationofXandYis")
np.corrcoef(X,Y)[0,1]
X=np.random.randn(lOOO)
Y=X+np.random,normal(0,0.1,1000)
pit.scatter(X,Y)
pit.show()
print(^correlationofXandYis")
np.corrcoef(X,Y)[0,1]
7.3使用本地?cái)?shù)據(jù)計(jì)算變量之間的相關(guān)系數(shù)和繪圖
importpandasaspd
importnumpyasnp
#讀取數(shù)據(jù)并創(chuàng)建數(shù)據(jù)表,名稱為data。
data=pd.DataFrame(pd.read_excel(,G:\\2glkx\\data\\al7-l.xls,))
#查看數(shù)據(jù)表前5行的內(nèi)容
data,head0
timeadvsale
013550
20
1250100
2356120
3468180
4570175
#取adv和sale數(shù)據(jù)
x=np.array(data[fadv*]])
y=np.array(data[fsale*]])
importscipy.stats,statsasstats
r=stats.pearsonr(x.y)[0]
Print(r)
7.4使用網(wǎng)上數(shù)據(jù)計(jì)算變量之間的相關(guān)系數(shù)和繪圖
#本程序需在Bigquant平臺(tái)中運(yùn)行
#計(jì)算兩只股票的日收益率
#中國(guó)鐵建數(shù)據(jù)
Stockl=
D.history_data(["601186.SHA"],start_date=,2016-12-0T,end_dato=,2017-05-01,,fie
Ids=fclose*])Vclose1].pct_change()[1:]
#中國(guó)中鐵數(shù)據(jù)
Stock2=
D.history_data([z,60139D.SHA"],start_date=2016-12-01',end_date=,2017-05-01,,fie
Ids=fclose*])close*].pct_change()[1:]
pit.scatter(Stockl,Stock2)
pit.xlabel(z,601186.SHAdailyreturn*)
plI.ylabel(“601390.SHAdailyreturn*)
pit.show()
print(^thecorrlationfortwostocksis:")
Stock2.corr(
溫馨提示
- 1. 本站所有資源如無(wú)特殊說(shuō)明,都需要本地電腦安裝OFFICE2007和PDF閱讀器。圖紙軟件為CAD,CAXA,PROE,UG,SolidWorks等.壓縮文件請(qǐng)下載最新的WinRAR軟件解壓。
- 2. 本站的文檔不包含任何第三方提供的附件圖紙等,如果需要附件,請(qǐng)聯(lián)系上傳者。文件的所有權(quán)益歸上傳用戶所有。
- 3. 本站RAR壓縮包中若帶圖紙,網(wǎng)頁(yè)內(nèi)容里面會(huì)有圖紙預(yù)覽,若沒(méi)有圖紙預(yù)覽就沒(méi)有圖紙。
- 4. 未經(jīng)權(quán)益所有人同意不得將文件中的內(nèi)容挪作商業(yè)或盈利用途。
- 5. 人人文庫(kù)網(wǎng)僅提供信息存儲(chǔ)空間,僅對(duì)用戶上傳內(nèi)容的表現(xiàn)方式做保護(hù)處理,對(duì)用戶上傳分享的文檔內(nèi)容本身不做任何修改或編輯,并不能對(duì)任何下載內(nèi)容負(fù)責(zé)。
- 6. 下載文件中如有侵權(quán)或不適當(dāng)內(nèi)容,請(qǐng)與我們聯(lián)系,我們立即糾正。
- 7. 本站不保證下載資源的準(zhǔn)確性、安全性和完整性, 同時(shí)也不承擔(dān)用戶因使用這些下載資源對(duì)自己和他人造成任何形式的傷害或損失。
最新文檔
- 2026年黟縣國(guó)有投資集團(tuán)有限公司公開(kāi)招聘勞務(wù)派遣人員備考題庫(kù)及參考答案詳解1套
- 2026年紹興市上虞區(qū)教育體育局公開(kāi)招聘高水平體育教練員備考題庫(kù)及完整答案詳解一套
- 中學(xué)學(xué)生宿舍用電管理制度
- 2026年江蘇省人民醫(yī)院肺癌中心科研助理招聘?jìng)淇碱}庫(kù)完整答案詳解
- 南平市教育局關(guān)于2026年南平市教育類儲(chǔ)備人才引進(jìn)備考題庫(kù)及參考答案詳解1套
- 東莞市城建工程管理局2025年公開(kāi)招聘編外聘用人員備考題庫(kù)及一套完整答案詳解
- 企業(yè)員工培訓(xùn)與職業(yè)成長(zhǎng)路徑制度
- 企業(yè)內(nèi)部資料管理制度
- 2026年泉州市醫(yī)學(xué)會(huì)招聘工作人員的備考題庫(kù)參考答案詳解
- 2026年投資入股合同協(xié)議
- 學(xué)堂在線 雨課堂 學(xué)堂云 中國(guó)建筑史-元明清與民居 期末考試答案
- GB/T 45752-2025礦用車載滅火系統(tǒng)安全技術(shù)要求
- 安置房舉行活動(dòng)方案
- 國(guó)家開(kāi)放大學(xué)《理工英語(yǔ)4》期末機(jī)考題庫(kù)
- 貨車司機(jī)外包合同協(xié)議
- 游戲推廣合作協(xié)議書范本
- 房地產(chǎn)企業(yè)分紅權(quán)激勵(lì)方案
- 車輛維修安全培訓(xùn)
- 2025版國(guó)家開(kāi)放大學(xué)法學(xué)本科《知識(shí)產(chǎn)權(quán)法》期末紙質(zhì)考試總題庫(kù)
- 九宮數(shù)獨(dú)200題(附答案全)
- 部編版八年級(jí)上冊(cè)語(yǔ)文《期末考試卷》及答案
評(píng)論
0/150
提交評(píng)論