常用数据挖掘算法python实现
- 1、下载文档前请自行甄别文档内容的完整性,平台不提供额外的编辑、内容补充、找答案等附加服务。
- 2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
- 3、如文档侵犯您的权益,请联系客服反馈,我们会尽快为您处理(人工客服工作时间:9:00-18:30)。
✧Kmeans
import random
def km(d,k,e=1e-5):
center=[random.randint(0,len(d)-1),random.randint(0,len(d)-1)]
label=[random.randint(0,1)for i in range(len(d))]
for iter in range(1000):
center_new=[0,0]
count=0
for i in range(len(d)):
dis1 =abs(d[i]-center[0])
dis2 =abs(d[i]-center[1])
if dis1 < dis2:
center_new[0]+= dis1
label[i]=1
count+=1
else:
label[i]=0
center_new[1]+= dis2
center_new[0]/= float(count)
center_new[1]/= float(len(d)-count)
if abs(center_new[0]-center[0])+abs(center_new[1]-center[1])< e:break center=center_new[:]
return label
d =[0,1,1,2,3,2,4,20,21,27,25]
print km(d,2)
✧NaiveBayesian
import numpy as np
from collections import defaultdict
def nbayesianTrain(x,y):
dim=x.shape
model=[]
for i in range(dim[1]):
m =defaultdict(dict)
classcount=defaultdict(int)
for j in range(dim[0]):
c = y[j]
d =x[j,i]
if d in m[c]:
m[c][d]+=1
else:
m[c][d]=1
classcount[c]+=1
for c in m:
for d in m[c]:
m[c][d]/= float(classcount[c])
model.append(m)
return model
x =np.array([['a','1'],['a','2'],['a','2'],['b','1']]) y =np.array([1,1,1,0])
m =nbayesianTrain(x,y)