y = b0 + b1 * x1 + b2 * x2 + ...

b = b - learning_rate * error * x

7,0.27,0.36,20.7,0.045,45,170,1.001,3,0.45,8.8,6

6.3,0.3,0.34,1.6,0.049,14,132,0.994,3.3,0.49,9.5,6

8.1,0.28,0.4,6.9,0.05,30,97,0.9951,3.26,0.44,10.1,6

7.2,0.23,0.32,8.5,0.058,47,186,0.9956,3.19,0.4,9.9,6

7.2,0.23,0.32,8.5,0.058,47,186,0.9956,3.19,0.4,9.9,6

1. 预测

2. 估计系数

3. 葡萄酒品质预测

1. 预测

# Make a prediction with coefficients

def predict(row, coefficients):

yhat = coefficients[0]

for i in range(len(row)-1):

yhat += coefficients[i + 1] * row[i]

return yhat

x, y

1, 1

2, 3

4, 3

3, 2

5, 5

# Make a prediction with coefficients

def predict(row, coefficients):

yhat = coefficients[0]

for i in range(len(row)-1):

yhat += coefficients[i + 1] * row[i]

return yhat

dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]] coef = [0.4, 0.8]

for row in dataset:

yhat = predict(row, coef)

print("Expected=%.3f, Predicted=%.3f" % (row[-1], yhat))

y = b0 + b1 * x

y = 0.4 + 0.8 * x

Expected=1.000, Predicted=1.200

Expected=3.000, Predicted=2.000

Expected=3.000, Predicted=3.600

Expected=2.000, Predicted=2.800

Expected=5.000, Predicted=4.400

2. 估计系数

Epochs：更新系数的同时运行训练集的次数。

1. 单次 epoch 循环

2. 单次 epoch 中训练集中的每行循环

3. 单次 epoch 中每个系数循环并为每一行更新它

error = prediction - expected

b1(t+1) = b1(t) - learning_rate * error(t) * x1(t)

b0(t+1) = b0(t) - learning_rate * error(t)

# Estimate linear regression coefficients using stochastic gradient descent

def coefficients_sgd(train, l_rate, n_epoch):

coef = [0.0 for i in range(len(train[0]))]

for epoch in range(n_epoch):

sum_error = 0

for row in train:

yhat = predict(row, coef)

error = yhat - row[-1]

sum_error += error**2

coef[0] = coef[0] - l_rate * error

for i in range(len(row)-1):

coef[i + 1] = coef[i + 1] - l_rate * error * row[i]

print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))

return coef

# Make a prediction with coefficients def predict(row, coefficients): yhat = coefficients[0] for i in range(len(row)-1): yhat += coefficients[i + 1] * row[i] return yhat # Estimate linear regression coefficients using stochastic gradient descent def coefficients_sgd(train, l_rate, n_epoch): coef = [0.0 for i in range(len(train[0]))] for epoch in range(n_epoch): sum_error = 0 for row in train: yhat = predict(row, coef) error = yhat - row[-1] sum_error += error**2 coef[0] = coef[0] - l_rate * error for i in range(len(row)-1): coef[i + 1] = coef[i + 1] - l_rate * error * row[i] print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error)) return coef # Calculate coefficients dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]] l_rate = 0.001 n_epoch = 50 coef = coefficients_sgd(dataset, l_rate, n_epoch) print(coef)

>epoch=45, lrate=0.001, error=2.650

>epoch=46, lrate=0.001, error=2.627

>epoch=47, lrate=0.001, error=2.607

>epoch=48, lrate=0.001, error=2.589

>epoch=49, lrate=0.001, error=2.573

[0.22998234937311363, 0.8017220304137576]

3. 葡萄酒品质预测

# Linear Regression With Stochastic Gradient Descent for Wine Quality

from random import seed

from random import randrange

from csv import reader

from math import sqrt

# Load a CSV file

def load_csv(filename):

dataset = list()

with open(filename, 'r') as file:

csv_reader = reader(file)

for row in csv_reader:

if not row:

continue

dataset.append(row)

return dataset

# Convert string column to float

def str_column_to_float(dataset, column):

for row in dataset:

row[column] = float(row[column].strip())

# Find the min and max values for each column

def dataset_minmax(dataset):

minmax = list()

for i in range(len(dataset[0])): col_values = [row[i] for row in dataset]

value_min = min(col_values)

value_max = max(col_values)

minmax.append([value_min, value_max])

return minmax

# Rescale dataset columns to the range 0-1

def normalize_dataset(dataset, minmax):

for row in dataset:

for i in range(len(row)):

row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

# Split a dataset into k folds

def cross_validation_split(dataset, n_folds):

dataset_split = list()

dataset_copy = list(dataset)

fold_size = len(dataset) / n_folds

for i in range(n_folds):

fold = list()

while len(fold) < fold_size:

index = randrange(len(dataset_copy))

fold.append(dataset_copy.pop(index))

dataset_split.append(fold)

return dataset_split

# Calculate root mean squared error

def rmse_metric(actual, predicted):

sum_error = 0.0

for i in range(len(actual)):

prediction_error = predicted[i] - actual[i]

sum_error += (prediction_error ** 2)

mean_error = sum_error / float(len(actual))

return sqrt(mean_error)

# Evaluate an algorithm using a cross validation split

def evaluate_algorithm(dataset, algorithm, n_folds, *args):

folds = cross_validation_split(dataset, n_folds)

scores = list()

for fold in folds:

train_set = list(folds)

train_set.remove(fold)

train_set = sum(train_set, [])

test_set = list()

for row in fold:

row_copy = list(row)

test_set.append(row_copy)

row_copy[-1] = None

predicted = algorithm(train_set, test_set, *args)

actual = [row[-1] for row in fold]

rmse = rmse_metric(actual, predicted)

scores.append(rmse)

return scores

# Make a prediction with coefficients

def predict(row, coefficients):

yhat = coefficients[0]

for i in range(len(row)-1):

yhat += coefficients[i + 1] * row[i]

return yhat

# Estimate linear regression coefficients using stochastic gradient descent

def coefficients_sgd(train, l_rate, n_epoch):

coef = [0.0 for i in range(len(train[0]))]

for epoch in range(n_epoch):

for row in train:

yhat = predict(row, coef)

error = yhat - row[-1] coef[0] = coef[0] - l_rate * error

for i in range(len(row)-1):

coef[i + 1] = coef[i + 1] - l_rate * error * row[i]

# print(l_rate, n_epoch, error)

return coef

# Linear Regression Algorithm With Stochastic Gradient Descent

def linear_regression_sgd(train, test, l_rate, n_epoch):

predictions = list()

coef = coefficients_sgd(train, l_rate, n_epoch)

for row in test:

yhat = predict(row, coef)

predictions.append(yhat)

return(predictions)

# Linear Regression on wine quality dataset

seed(1)

# load and prepare data

filename = 'winequality-white.csv'

dataset = load_csv(filename)

for i in range(len(dataset[0])):

str_column_to_float(dataset, i)

# normalize

minmax = dataset_minmax(dataset)

normalize_dataset(dataset, minmax)

# evaluate algorithm

n_folds = 5

l_rate = 0.01

n_epoch = 50

scores = evaluate_algorithm(dataset, linear_regression_sgd, n_folds, l_rate, n_epoch)

print('Scores: %s' % scores)

print('Mean RMSE: %.3f' % (sum(scores)/float(len(scores))))

Scores: [0.12259834231519767, 0.12733924130891316, 0.12610773846663892, 0.1289950071681572, 0.1272180783291014]

Mean RMSE: 0.126

tags: row,dataset,error,epoch,coefficients,coef,yhat,rate,range,len,系数,train,def

1.凡CodeSecTeam转载的文章,均出自其它媒体或其他官网介绍,目的在于传递更多的信息,并不代表本站赞同其观点和其真实性负责；
2.转载的文章仅代表原创作者观点,与本站无关。其原创性以及文中陈述文字和内容未经本站证实,本站对该文以及其中全部或者部分内容、文字的真实性、完整性、及时性，不作出任何保证或承若；
3.如本站转载稿涉及版权等问题,请作者及时联系本站,我们会及时处理。