bpp-core3  3.0.0
PrincipalComponentAnalysis.cpp
Go to the documentation of this file.
1 //
2 // File: PrincipalComponentAnalysis.cpp
3 // Authors:
4 // Mathieu Groussin
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 #include <cmath>
41 
42 #include "../../Matrix/Matrix.h"
43 #include "../../Matrix/MatrixTools.h"
44 #include "../../VectorTools.h"
45 #include "DualityDiagram.h"
47 
48 using namespace bpp;
49 using namespace std;
50 
52  const Matrix<double>& data,
53  unsigned int nbAxes,
54  const vector<double>& rowW,
55  const vector<double>& colW,
56  bool centered,
57  bool scaled,
58  double tol,
59  bool verbose) :
61  columnMeans_(),
62  columnSd_()
63 {
64  RowMatrix<double> tmpData = data;
65 
66  // Centering of data?
67  if (centered)
68  {
69  center(tmpData, rowW);
70  }
71 
72  // Scaling of data?
73  if (scaled)
74  {
75  scale(tmpData, rowW);
76  }
77 
78  setData(tmpData, rowW, colW, nbAxes, tol, verbose);
79 }
80 
81 /******************************************************************************/
82 
84  const Matrix<double>& data,
85  unsigned int nbAxes,
86  bool centered,
87  bool scaled,
88  double tol,
89  bool verbose) :
91  columnMeans_(),
92  columnSd_()
93 {
94  size_t nRow = data.getNumberOfRows();
95  size_t nCol = data.getNumberOfColumns();
96 
97  vector<double> rowW(nRow);
98  vector<double> colW(nCol);
99  VectorTools::fill(rowW, 1. / static_cast<double>(nRow));
100  VectorTools::fill(colW, 1.);
101 
102  RowMatrix<double> tmpData = data;
103 
104  // Centering of data?
105  if (centered)
106  {
107  center(tmpData, rowW);
108  }
109 
110  // Scaling of data?
111  if (scaled)
112  {
113  scale(tmpData, rowW);
114  }
115 
116  setData(tmpData, rowW, colW, nbAxes, tol, verbose);
117 }
118 
119 /******************************************************************************/
120 
121 void PrincipalComponentAnalysis::center(Matrix<double>& matrix, const vector<double>& rowW)
122 {
123  size_t nRow = matrix.getNumberOfRows();
124  size_t nCol = matrix.getNumberOfColumns();
125  if (nRow != rowW.size())
126  throw Exception("PrincipalComponentAnalysis::center. The number of row weigths have to be equal to the number of rows!");
127 
128  double sumRowWeights = VectorTools::sum(rowW);
129 
130  vector<double> columnMeans(nCol);
131  for (unsigned int i = 0; i < nCol; i++)
132  {
133  double tmp = 0.;
134  for (unsigned int j = 0; j < nRow; j++)
135  {
136  tmp += matrix(j, i) * rowW[j];
137  }
138  columnMeans[i] = tmp / sumRowWeights;
139  }
140 
141  for (unsigned int i = 0; i < nCol; i++)
142  {
143  for (unsigned int j = 0; j < nRow; j++)
144  {
145  matrix(j, i) -= columnMeans[i];
146  }
147  }
148 }
149 
150 /******************************************************************************/
151 
152 void PrincipalComponentAnalysis::scale(Matrix<double>& matrix, const vector<double>& rowW)
153 {
154  size_t nRow = matrix.getNumberOfRows();
155  size_t nCol = matrix.getNumberOfColumns();
156  if (nRow != rowW.size())
157  throw Exception("PrincipalComponentAnalysis::scale. The number of row weigths have to be equal to the number of rows!");
158 
159  double sumRowWeights = VectorTools::sum(rowW);
160 
161  vector<double> columnSd(nCol);
162  for (size_t i = 0; i < nCol; i++)
163  {
164  double tmp = 0.;
165  for (unsigned int j = 0; j < nRow; j++)
166  {
167  tmp += pow(matrix(j, i), 2) * rowW[j];
168  }
169  columnSd[i] = sqrt(tmp / sumRowWeights);
170  }
171 
172  for (size_t i = 0; i < nCol; i++)
173  {
174  for (unsigned int j = 0; j < nRow; j++)
175  {
176  if (columnSd[i] == 0.)
177  matrix(j, i) = 0.;
178  else
179  matrix(j, i) /= columnSd[i];
180  }
181  }
182 }
The core class of a multivariate analysis.
void setData(const Matrix< double > &matrix, const std::vector< double > &rowWeights, const std::vector< double > &colWeights, unsigned int nbAxes, double tol=0.0000001, bool verbose=true)
Set the data and perform computations.
Exception base class. Overload exception constructor (to control the exceptions mechanism)....
Definition: Exceptions.h:59
The matrix template interface.
Definition: Matrix.h:61
virtual size_t getNumberOfColumns() const =0
virtual size_t getNumberOfRows() const =0
static void scale(Matrix< double > &matrix, const std::vector< double > &rowW)
This function allows to center an input matrix from its column means.
static void center(Matrix< double > &matrix, const std::vector< double > &rowW)
This function allows to center an input matrix from its column means.
PrincipalComponentAnalysis(const Matrix< double > &data, unsigned int nbAxes, const std::vector< double > &rowW, const std::vector< double > &colW, bool centered=true, bool scaled=true, double tol=0.0000001, bool verbose=true)
Build a new PrincipalComponentAnalysis object.
static T sum(const std::vector< T > &v1)
Definition: VectorTools.h:624
static void fill(std::vector< T > &v, T value)
Definition: VectorTools.h:396