-
Notifications
You must be signed in to change notification settings - Fork 0
/
ml_softmaxTrain.m
61 lines (46 loc) · 1.6 KB
/
ml_softmaxTrain.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
function [W1, W2] = ml_softmaxTrain(T, X, lambda, W1init, W2init, options)
%
% What it does: It trains using backpropagation the Weights of a NN with
% 1 hidden layer
%
% Inputs:
% T: N x K binary output data matrix indicating the classes
% X: N x (D+1) input data vector with ones already added in the first column
% lambda: the positive regularizarion parameter
% W1init: M x (D+1) matrix of the initial values of the parameters
% W2init: K x (M+1) matrix of the initial values of the parameters
% options: options(1) is the maximum number of iterations
% options(2) is the tolerance
% options(3) is the learning rate eta
% Outputs:
% W1, W2: the trained weight matrices
%
% George Kyziridis & Ioannis Chios (2016)
W1 = W1init; W2 = W2init;
%K = size(W2,1);
%M = size(W1,1);
% Maximum number of iteration of gradient ascend
iter = options(1);
% Tolerance
tol = options(2);
% Learning rate
eta = options(3);
%feedfor
Ewold = -Inf;
for it=1:iter
% FeedForward
% Cost and its gradient for last layer
[E, gradEw2,gradientW1 ] = costgrad_softmax(X, T,W1, W2, lambda);
% [E1, gradientW1] = cost_w1(W1, W2, Y,T, X , lambda);
fprintf('Iteration: %d, Cost: %f\n',it, E);
% Break if you achieve the desired accuracy in the cost function
if abs(E - Ewold) < tol
break;
end
% Update parameters based on gradient ascent
W1 = W1 + eta*gradientW1;
W2 = W2 + eta*gradEw2;
% keep the old value of the cost for next iteration
Ewold = E;
%
end