bpp-core3  3.0.0
PrincipalComponentAnalysis.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <cmath>
6 
7 #include "../../Matrix/Matrix.h"
8 #include "../../Matrix/MatrixTools.h"
9 #include "../../VectorTools.h"
10 #include "DualityDiagram.h"
12 
13 using namespace bpp;
14 using namespace std;
15 
17  const Matrix<double>& data,
18  unsigned int nbAxes,
19  const vector<double>& rowW,
20  const vector<double>& colW,
21  bool centered,
22  bool scaled,
23  double tol,
24  bool verbose) :
26  columnMeans_(),
27  columnSd_()
28 {
29  RowMatrix<double> tmpData = data;
30 
31  // Centering of data?
32  if (centered)
33  {
34  center(tmpData, rowW);
35  }
36 
37  // Scaling of data?
38  if (scaled)
39  {
40  scale(tmpData, rowW);
41  }
42 
43  setData(tmpData, rowW, colW, nbAxes, tol, verbose);
44 }
45 
46 /******************************************************************************/
47 
49  const Matrix<double>& data,
50  unsigned int nbAxes,
51  bool centered,
52  bool scaled,
53  double tol,
54  bool verbose) :
56  columnMeans_(),
57  columnSd_()
58 {
59  size_t nRow = data.getNumberOfRows();
60  size_t nCol = data.getNumberOfColumns();
61 
62  vector<double> rowW(nRow);
63  vector<double> colW(nCol);
64  VectorTools::fill(rowW, 1. / static_cast<double>(nRow));
65  VectorTools::fill(colW, 1.);
66 
67  RowMatrix<double> tmpData = data;
68 
69  // Centering of data?
70  if (centered)
71  {
72  center(tmpData, rowW);
73  }
74 
75  // Scaling of data?
76  if (scaled)
77  {
78  scale(tmpData, rowW);
79  }
80 
81  setData(tmpData, rowW, colW, nbAxes, tol, verbose);
82 }
83 
84 /******************************************************************************/
85 
86 void PrincipalComponentAnalysis::center(Matrix<double>& matrix, const vector<double>& rowW)
87 {
88  size_t nRow = matrix.getNumberOfRows();
89  size_t nCol = matrix.getNumberOfColumns();
90  if (nRow != rowW.size())
91  throw Exception("PrincipalComponentAnalysis::center. The number of row weigths have to be equal to the number of rows!");
92 
93  double sumRowWeights = VectorTools::sum(rowW);
94 
95  vector<double> columnMeans(nCol);
96  for (unsigned int i = 0; i < nCol; i++)
97  {
98  double tmp = 0.;
99  for (unsigned int j = 0; j < nRow; j++)
100  {
101  tmp += matrix(j, i) * rowW[j];
102  }
103  columnMeans[i] = tmp / sumRowWeights;
104  }
105 
106  for (unsigned int i = 0; i < nCol; i++)
107  {
108  for (unsigned int j = 0; j < nRow; j++)
109  {
110  matrix(j, i) -= columnMeans[i];
111  }
112  }
113 }
114 
115 /******************************************************************************/
116 
117 void PrincipalComponentAnalysis::scale(Matrix<double>& matrix, const vector<double>& rowW)
118 {
119  size_t nRow = matrix.getNumberOfRows();
120  size_t nCol = matrix.getNumberOfColumns();
121  if (nRow != rowW.size())
122  throw Exception("PrincipalComponentAnalysis::scale. The number of row weigths have to be equal to the number of rows!");
123 
124  double sumRowWeights = VectorTools::sum(rowW);
125 
126  vector<double> columnSd(nCol);
127  for (size_t i = 0; i < nCol; i++)
128  {
129  double tmp = 0.;
130  for (unsigned int j = 0; j < nRow; j++)
131  {
132  tmp += pow(matrix(j, i), 2) * rowW[j];
133  }
134  columnSd[i] = sqrt(tmp / sumRowWeights);
135  }
136 
137  for (size_t i = 0; i < nCol; i++)
138  {
139  for (unsigned int j = 0; j < nRow; j++)
140  {
141  if (columnSd[i] == 0.)
142  matrix(j, i) = 0.;
143  else
144  matrix(j, i) /= columnSd[i];
145  }
146  }
147 }
The matrix template interface.
Definition: Matrix.h:22
void setData(const Matrix< double > &matrix, const std::vector< double > &rowWeights, const std::vector< double > &colWeights, unsigned int nbAxes, double tol=0.0000001, bool verbose=true)
Set the data and perform computations.
PrincipalComponentAnalysis(const Matrix< double > &data, unsigned int nbAxes, const std::vector< double > &rowW, const std::vector< double > &colW, bool centered=true, bool scaled=true, double tol=0.0000001, bool verbose=true)
Build a new PrincipalComponentAnalysis object.
static T sum(const std::vector< T > &v1)
Definition: VectorTools.h:587
STL namespace.
static void scale(Matrix< double > &matrix, const std::vector< double > &rowW)
This function allows to center an input matrix from its column means.
static void center(Matrix< double > &matrix, const std::vector< double > &rowW)
This function allows to center an input matrix from its column means.
virtual size_t getNumberOfColumns() const =0
static void fill(std::vector< T > &v, T value)
Definition: VectorTools.h:359
Exception base class. Overload exception constructor (to control the exceptions mechanism). Destructor is already virtual (from std::exception)
Definition: Exceptions.h:20
virtual size_t getNumberOfRows() const =0
The core class of a multivariate analysis.