常用数据挖掘算法python实现

合集下载
  1. 1、下载文档前请自行甄别文档内容的完整性,平台不提供额外的编辑、内容补充、找答案等附加服务。
  2. 2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
  3. 3、如文档侵犯您的权益,请联系客服反馈,我们会尽快为您处理(人工客服工作时间:9:00-18:30)。

✧Kmeans

import random

def km(d,k,e=1e-5):

center=[random.randint(0,len(d)-1),random.randint(0,len(d)-1)]

label=[random.randint(0,1)for i in range(len(d))]

for iter in range(1000):

center_new=[0,0]

count=0

for i in range(len(d)):

dis1 =abs(d[i]-center[0])

dis2 =abs(d[i]-center[1])

if dis1 < dis2:

center_new[0]+= dis1

label[i]=1

count+=1

else:

label[i]=0

center_new[1]+= dis2

center_new[0]/= float(count)

center_new[1]/= float(len(d)-count)

if abs(center_new[0]-center[0])+abs(center_new[1]-center[1])< e:break center=center_new[:]

return label

d =[0,1,1,2,3,2,4,20,21,27,25]

print km(d,2)

✧NaiveBayesian

import numpy as np

from collections import defaultdict

def nbayesianTrain(x,y):

dim=x.shape

model=[]

for i in range(dim[1]):

m =defaultdict(dict)

classcount=defaultdict(int)

for j in range(dim[0]):

c = y[j]

d =x[j,i]

if d in m[c]:

m[c][d]+=1

else:

m[c][d]=1

classcount[c]+=1

for c in m:

for d in m[c]:

m[c][d]/= float(classcount[c])

model.append(m)

return model

x =np.array([['a','1'],['a','2'],['a','2'],['b','1']]) y =np.array([1,1,1,0])

m =nbayesianTrain(x,y)

相关文档
最新文档