knn算法

原理:返回欧式距离中*近的k个点中 分类数目*多的那个label

#定义一个knn函数:
class Knnclassify:
def __init__(self,k):
assert k>0,’error’
self.k = k
self._xtrain = None
self._ytrain = None

def fit(self,xdata,ydata):
assert xdata.shape[0] == ydata.shape[0],’error’
assert self.k <= xdata.shape[0],’error’

self._xtrain = xdata
self._ytrain = ydata

return self

def predict(self,target):
assert self._xtrain is not None and self._ytrain is not None,’error’
assert target.shape[1] == self._xtrain.shape[1],’error’

res = [self._predict(i) for i in target]
return np.array(res)

def _predict(self,x):
assert x.shape[0] == self._xtrain.shape[1],’error’
dis = [sqrt(np.sum((i-x)**2)) for i in x_train]
nearest = np.argsort(dis)
top = [y_train[j] for j in nearest[:k]]
return Counter(top).most_common(1)[0][0]

def __repr__(self):
return f’knn(k={self.k})’

调用sklearn中封装好的knn:

from sklearn.neighbors import KNeighborsClassifier
#传入k的值
knn_classifier = KNeighborsClassifier(n_neighbors=6)
#传入数据集进行拟合
knn_classifier.fit(x_train,y_train)

#传入目标值,必须是一个矩阵形式
x = np.array([])
knn_classifier.predict(x.reshape(1,-1))