Policy Information
ML之LoR:LoR之二分类之线性决策算法实现根据两课成绩分数~预测期末通过率(合格还是不合格)
目录
LoR之二分类算法实现预测期末考试成绩合格还是不合格
LoR回归函数
- import pandas as pd
- import numpy as np
- import matplotlib as mpl
- import matplotlib.pyplot as plt
-
- from scipy.optimize import minimize
-
- from sklearn.preprocessing import PolynomialFeatures
-
- pd.set_option('display.notebook_repr_html', False)
- pd.set_option('display.max_columns', None)
- pd.set_option('display.max_rows', 150)
- pd.set_option('display.max_seq_items', None)
-
-
- import seaborn as sns
- sns.set_context('notebook')
- sns.set_style('white')
-
- def loaddata(file, delimeter):
- data = np.loadtxt(file, delimiter=delimeter)
- print('Dimensions: ',data.shape)
- print(data[1:6,:])
- return(data)
-
- def plotData(data, label_x, label_y, label_pos, label_neg, axes=None):
- 获得正负样本的下标(即哪些是正样本,哪些是负样本)
- neg = data[:,2] == 0
- pos = data[:,2] == 1
-
- if axes == None:
- axes = plt.gca()
- axes.scatter(data[pos][:,0], data[pos][:,1], marker='^', c='b', s=60, linewidth=2, label=label_pos)
- axes.scatter(data[neg][:,0], data[neg][:,1], c='y', s=60, label=label_neg)
- axes.set_xlabel(label_x)
- axes.set_ylabel(label_y)
- axes.legend(frameon= True, fancybox = True);
-
- data = loaddata('data1.txt', ',')
- X = np.c_[np.ones((data.shape[0],1)), data[:,0:2]]
- y = np.c_[data[:,2]]
- plotData(data, 'Exam 1 score', 'Exam 2 score', 'Pass', 'Fail') 绘图
-
-
-
-
- 定义sigmoid函数
- def sigmoid(z):
- return(1 / (1 + np.exp(-z)))
-
- 定义损失函数
- def costFunction(theta, X, y):
- m = y.size
- h = sigmoid(X.dot(theta))
-
- J = -1*(1/m)*(np.log(h).T.dot(y)+np.log(1-h).T.dot(1-y))
-
- if np.isnan(J[0]):
- return(np.inf)
- return(J[0])
-
- 求解梯度
- def gradient(theta, X, y):
- m = y.size
- h = sigmoid(X.dot(theta.reshape(-1,1)))
-
- grad =(1/m)*X.T.dot(h-y)
-
- return(grad.flatten())
-
- initial_theta = np.zeros(X.shape[1])
- cost = costFunction(initial_theta, X, y)
- grad = gradient(initial_theta, X, y)
- print('Cost: \n', cost)
- print('Grad: \n', grad)
-
- 最小化损失函数(梯度下降),直接调用scipy里面的最小化损失函数的minimize函数
- res = minimize(costFunction, initial_theta, args=(X,y), method=None, jac=gradient, options={'maxiter':400})
-
- 进行预测
- def predict(theta, X, threshold=0.5):
- p = sigmoid(X.dot(theta.T)) >= threshold
- return(p.astype('int'))
-
- 第一门课45分,第二门课85分的同学,拿到通过考试的概率
- sigmoid(np.array([1, 45, 85]).dot(res.x.T))
- p = predict(res.x, X)
- print('Train accuracy {}%'.format(100*sum(p == y.ravel())/p.size))
-
- 绘制二分类决策边界
- plt.scatter(45, 85, s=60, c='r', marker='v', label='(45, 85)')
- plotData(data, 'Exam 1 score', 'Exam 2 score', 'Pass', 'Failed')
- x1_min, x1_max = X[:,1].min(), X[:,1].max(),
- x2_min, x2_max = X[:,2].min(), X[:,2].max(),
- xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
- h = sigmoid(np.c_[np.ones((xx1.ravel().shape[0],1)), xx1.ravel(), xx2.ravel()].dot(res.x))
- h = h.reshape(xx1.shape)
- plt.contour(xx1, xx2, h, [0.5], linewidths=1, colors='b');
-
评论