-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprincipal_component_analysis.m
43 lines (36 loc) · 1.53 KB
/
principal_component_analysis.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
function [ pca_data ] = principal_component_analysis( data )
%ANALYSE_DATA
% (http://www.mathworks.com/help/stats/principal-component-analysis-pca.html)
% Perform PCA (Principal Component Analysis) of the data and plot with the
% corresponding model
% extra info: https://documents.software.dell.com/statistics/textbook/principal-components-factor-analysis
global KAISER_CRITERIA_FLAG KAISER_CRITERIA_THRESHOLD
global SCREE_TEST_FLAG SCREE_TEST_THRESHOLD;
model = pca(data.X);
data_projection = linproj(data.X, model);
% new dim -> kaiser test output OR Scree Test
% The Kaiser criterion sometimes retains too many factors,
% while the Scree test sometimes retains too few.
if KAISER_CRITERIA_FLAG
% The Kaiser criterion. First, we can retain only factors with
% eigenvalues greater than 1. In essence this is like saying that,
% unless a factor extracts at least as much as the equivalent of one
% original variable, we drop it.
new_dim = length(find(model.eigval >= KAISER_CRITERIA_THRESHOLD));
elseif SCREE_TEST_FLAG
% Scree Test. The Cumulative sum of the dimension's representability
% (eigenvalue) is greater or equal to a threshold regarding the sum of all eigenvalues.
proportion_of_variance = cumsum(model.eigval) ./ sum(model.eigval);
new_dim = length(find(proportion_of_variance >= SCREE_TEST_THRESHOLD));
else
% same number of dimensions
new_dim = data.dim;
end
if new_dim < 1
new_dim = 1;
end
pca_data = data;
pca_data.X = data_projection(1:new_dim,:);
pca_data.dim = new_dim;
end
%EOF