src/math/tools/DataAnalysis/PrincipalComponentAnalysis.js
/**
* The script is part of konpeito.
*
* AUTHOR:
* natade (http://twitter.com/natadea)
*
* LICENSE:
* The MIT license https://opensource.org/licenses/MIT
*/
import Matrix from "../../core/Matrix.js";
import Complex from "../../core/Complex.js";
/**
* Settings for principal component analysis.
* @typedef {Object} KPrincipalComponentAnalysisSettings
* @property {import("../../core/Matrix.js").KMatrixInputData} samples explanatory variable. (Each column is a parameters and each row is a samples.)
* @property {boolean} [is_unbiased=true] Use unbiased variance when calculating variance from samples.
* @property {boolean} [is_standardised=false] Use standardized explanatory variables. Use the correlation matrix instead of the covariance matrix.
*/
/**
* @typedef {Object} KPrincipalComponent
* @property {number} eigen_value Contribution. Eigen value. Variance of principal components.
* @property {number[]} factor_loading Factor loading. Eigen vector. Principal component coefficients.
* @property {number[]} factor_loading_contribution_rate Factor loading contribution rate.
* @property {number} cumulative_contribution_ratio Cumulative contribution ratio.
* @property {number} contribution_ratio Contribution ratio.
* @property {number[]} score Principal component score.
*/
/**
* Output for principal component analysis.
* @typedef {Object} KPrincipalComponentAnalysisOutput
* @property {KPrincipalComponent[]} principal_component Principal component.
*/
/**
* Principal component analysis.
*/
export default class PrincipalComponentAnalysis {
/**
* Principal component analysis.
* @param {KPrincipalComponentAnalysisSettings} settings - input data
* @returns {KPrincipalComponentAnalysisOutput} analyzed data
*/
static runPrincipalComponentAnalysis(settings) {
// 主成分分析を行う
// 参考文献
// [1] 図解でわかる多変量解析―データの山から本質を見抜く科学的分析ツール
// 涌井 良幸, 涌井 貞美, 日本実業出版社 (2001/01)
// samples 説明変量。行がサンプル。列が各値。
let samples = Matrix.create(settings.samples);
const is_unbiased = settings.is_unbiased === undefined ? false : settings.is_unbiased;
const correction = {correction : is_unbiased ? 1 : 0};
// true になっている場合は標準化を行う。
// つまり、共分散行列ではなく、相関行列で主成分分析することと同等である。
if(settings.is_standardised) {
samples = samples.standardization(correction);
}
// 共分散行列、あるいは相関行列を求める
const r = samples.cov(correction);
// 固有値(特異値)ベクトルを求める
const svd = r.svd();
// 固有値 = 主成分の分散 = 寄与度
const eigen_value = svd.S.diag();
// 固有ベクトル = 行ごとに、第○主成分の係数。= 因子負荷量
// 行が各主成分に相当し、各列にそのパラメータの係数
const factor_loading = svd.U.T().negate();
// 固有ベクトルの寄与率 = 行ごとに、第○主成分の係数の寄与率
// 行が各主成分に相当し、各列にそのパラメータの係数の寄与率
const factor_loading_contribution_rate = factor_loading.dotpow(2);
// 寄与率
const eigen_sum = eigen_value.sum();
const contribution_ratio = eigen_value.dotdiv(eigen_sum);
// 累積寄与率
let x = Complex.ZERO;
const cumulative_contribution_ratio = Matrix.createMatrixDoEachCalculation(function(row, col) {
x = x.add(contribution_ratio.getComplex(row + col));
return x;
}, contribution_ratio.column_length, contribution_ratio.row_length);
// 主成分得点
// 行が各主成分に相当し、各列にそのレコードの主成分の得点
samples = samples.sub(samples.mean({dimension : "column"})); // 平均を0にする
const principal_component_score = Matrix.createMatrixDoEachCalculation(function(row, col) {
return samples.getMatrix(col, ":").dotmul(factor_loading.getMatrix(row, ":")).sum();
}, eigen_value.length, samples.size(1));
{
const array_eigen_value = eigen_value.T().getNumberMatrixArray()[0];
const array_factor_loading = factor_loading.getNumberMatrixArray();
const array_factor_loading_contribution_rate = factor_loading_contribution_rate.getNumberMatrixArray();
const array_cumulative_contribution_ratio = cumulative_contribution_ratio.getNumberMatrixArray()[0];
const array_contribution_ratio = contribution_ratio.T().getNumberMatrixArray()[0];
const array_score = principal_component_score.getNumberMatrixArray();
/**
* @type {KPrincipalComponent[]}
*/
const principal_component = [];
for(let i = 0; i < eigen_value.length; i++) {
principal_component.push({
eigen_value : array_eigen_value[i],
factor_loading : array_factor_loading[i],
factor_loading_contribution_rate : array_factor_loading_contribution_rate[i],
cumulative_contribution_ratio : array_cumulative_contribution_ratio[i],
contribution_ratio : array_contribution_ratio[i],
score : array_score[i]
});
}
return {
principal_component : principal_component
};
}
}
}