Summary
When we use RSpectra::svds()
with opts = list(center = TRUE, scale = TRUE)
, then
the eigenvalues do not match the values returned by base::svd()
or irlba::irlba()
.
Please see the full example below for details.
library(Matrix)
library(RSpectra)
library(irlba)
For the sake of reproducibility, let’s use the iris data.
mat <- as.matrix(iris[,1:4])
n_pcs <- 3
Columns are 4 features and rows are 150 flowers.
dim(mat)
#> [1] 150 4
head(mat)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> [1,] 5.1 3.5 1.4 0.2
#> [2,] 4.9 3.0 1.4 0.2
#> [3,] 4.7 3.2 1.3 0.2
#> [4,] 4.6 3.1 1.5 0.2
#> [5,] 5.0 3.6 1.4 0.2
#> [6,] 5.4 3.9 1.7 0.4
Example 1
Let’s run SVD without scaling the features.
mat_svd <- base::svd(x = mat)
mat_svds <- RSpectra::svds(
A = mat,
k = n_pcs,
opts = list(center = FALSE, scale = FALSE)
)
mat_irlba <- irlba::irlba(
A = mat,
nv = n_pcs,
center = FALSE,
scale = FALSE
)
#> Warning in irlba::irlba(A = mat, nv = n_pcs, center = FALSE, scale = FALSE):
#> You're computing too large a percentage of total singular values, use a standard
#> svd instead.
All methods return the same values for u
mat_svd$u[1:5,1:n_pcs]
#> [,1] [,2] [,3]
#> [1,] -0.06161685 0.1296114 0.002138597
#> [2,] -0.05807094 0.1110198 0.070672387
#> [3,] -0.05676305 0.1179665 0.004342549
#> [4,] -0.05665344 0.1053081 0.005924672
#> [5,] -0.06123020 0.1310898 -0.031881095
mat_svds$u[1:5,]
#> [,1] [,2] [,3]
#> [1,] -0.06161685 0.1296114 0.002138597
#> [2,] -0.05807094 0.1110198 0.070672387
#> [3,] -0.05676305 0.1179665 0.004342549
#> [4,] -0.05665344 0.1053081 0.005924672
#> [5,] -0.06123020 0.1310898 -0.031881095
mat_irlba$u[1:5,]
#> [,1] [,2] [,3]
#> [1,] -0.06161685 0.1296114 0.002138597
#> [2,] -0.05807094 0.1110198 0.070672387
#> [3,] -0.05676305 0.1179665 0.004342549
#> [4,] -0.05665344 0.1053081 0.005924672
#> [5,] -0.06123020 0.1310898 -0.031881095
All methods return the same values for v
mat_svd$v[,1:3]
#> [,1] [,2] [,3]
#> [1,] -0.7511082 0.2841749 0.50215472
#> [2,] -0.3800862 0.5467445 -0.67524332
#> [3,] -0.5130089 -0.7086646 -0.05916621
#> [4,] -0.1679075 -0.3436708 -0.53701625
mat_svds$v
#> [,1] [,2] [,3]
#> [1,] -0.7511082 0.2841749 0.50215472
#> [2,] -0.3800862 0.5467445 -0.67524332
#> [3,] -0.5130089 -0.7086646 -0.05916621
#> [4,] -0.1679075 -0.3436708 -0.53701625
mat_irlba$v
#> [,1] [,2] [,3]
#> [1,] -0.7511082 0.2841749 0.50215472
#> [2,] -0.3800862 0.5467445 -0.67524332
#> [3,] -0.5130089 -0.7086646 -0.05916621
#> [4,] -0.1679075 -0.3436708 -0.53701625
All methods return the same values for d
mat_svd$d[1:n_pcs]
#> [1] 95.959914 17.761034 3.460931
mat_svds$d
#> [1] 95.959914 17.761034 3.460931
mat_irlba$d
#> [1] 95.959914 17.761034 3.460931
Example 2
Let’s try again, but this time let’s center and scale each feature.
mat_scaled <- scale(mat)
head(mat_scaled)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> [1,] -0.8976739 1.01560199 -1.335752 -1.311052
#> [2,] -1.1392005 -0.13153881 -1.335752 -1.311052
#> [3,] -1.3807271 0.32731751 -1.392399 -1.311052
#> [4,] -1.5014904 0.09788935 -1.279104 -1.311052
#> [5,] -1.0184372 1.24503015 -1.335752 -1.311052
#> [6,] -0.5353840 1.93331463 -1.165809 -1.048667
After scaling, the means should be equal to zero:
colMeans(mat)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> 5.843333 3.057333 3.758000 1.199333
colMeans(mat_scaled)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> -4.480675e-16 2.035409e-16 -2.844947e-17 -3.714621e-17
And the standard deviation should be equal to one:
apply(mat, 2, sd)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> 0.8280661 0.4358663 1.7652982 0.7622377
apply(mat_scaled, 2, sd)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width
#> 1 1 1 1
Ok, let’s try SVD again:
mat_svd_scaled <- base::svd(x = mat_scaled)
mat_svds_scaled <- RSpectra::svds(
A = mat,
k = n_pcs,
opts = list(center = TRUE, scale = TRUE)
)
mat_irlba_scaled <- irlba::irlba(
A = mat,
nv = n_pcs,
center = colMeans(mat),
scale = apply(mat, 2, sd)
)
#> Warning in irlba::irlba(A = mat, nv = n_pcs, center = colMeans(mat), scale =
#> apply(mat, : You're computing too large a percentage of total singular values,
#> use a standard svd instead.
All methods return the same values for u
mat_svd_scaled$u[1:5,1:n_pcs]
#> [,1] [,2] [,3]
#> [1,] -0.10823953 -0.04099580 0.027218646
#> [2,] -0.09945776 0.05757315 0.050003401
#> [3,] -0.11299630 0.02920003 -0.009420891
#> [4,] -0.10989710 0.05101939 -0.019457133
#> [5,] -0.11422046 -0.05524180 -0.003354363
mat_svds_scaled$u[1:5,]
#> [,1] [,2] [,3]
#> [1,] 0.10823953 0.04099580 -0.027218646
#> [2,] 0.09945776 -0.05757315 -0.050003401
#> [3,] 0.11299630 -0.02920003 0.009420891
#> [4,] 0.10989710 -0.05101939 0.019457133
#> [5,] 0.11422046 0.05524180 0.003354363
mat_irlba_scaled$u[1:5,]
#> [,1] [,2] [,3]
#> [1,] -0.10823953 -0.04099580 0.027218646
#> [2,] -0.09945776 0.05757315 0.050003401
#> [3,] -0.11299630 0.02920003 -0.009420891
#> [4,] -0.10989710 0.05101939 -0.019457133
#> [5,] -0.11422046 -0.05524180 -0.003354363
All methods return the same values for v
mat_svd_scaled$v[,1:3]
#> [,1] [,2] [,3]
#> [1,] 0.5210659 -0.37741762 0.7195664
#> [2,] -0.2693474 -0.92329566 -0.2443818
#> [3,] 0.5804131 -0.02449161 -0.1421264
#> [4,] 0.5648565 -0.06694199 -0.6342727
mat_svds_scaled$v
#> [,1] [,2] [,3]
#> [1,] -0.5210659 0.37741762 -0.7195664
#> [2,] 0.2693474 0.92329566 0.2443818
#> [3,] -0.5804131 0.02449161 0.1421264
#> [4,] -0.5648565 0.06694199 0.6342727
mat_irlba_scaled$v
#> [,1] [,2] [,3]
#> [1,] 0.5210659 -0.37741762 0.7195664
#> [2,] -0.2693474 -0.92329566 -0.2443818
#> [3,] 0.5804131 -0.02449161 -0.1421264
#> [4,] 0.5648565 -0.06694199 -0.6342727
Uh oh! It looks like svds()
is not returning the correct values for d
mat_svd_scaled$d[1:n_pcs]
#> [1] 20.853205 11.670070 4.676192
mat_svds_scaled$d
#> [1] 1.7083611 0.9560494 0.3830886
mat_irlba_scaled$d
#> [1] 20.853205 11.670070 4.676192
Example 3
Let’s try RSpectra again, but we’ll do the scaling manually.
mat_svds_man_scaled <- RSpectra::svds(
A = mat_scaled,
k = n_pcs,
opts = list(center = FALSE, scale = FALSE)
)
mat_svds_man_scaled$d
#> [1] 20.853205 11.670070 4.676192
Conclusions
Manual scaling eliminates the error in the svds()
output. This
implies that there might be a bug inside the svds()
function.
The wrong values are off by a multiplicative constant factor,
but the value of the factor is dependent on the input data.
For the iris data, the factor seems to be 12.20656, which is close
to the number of matrix multiplication operations (11)
mat_svds_man_scaled$d / mat_svds_scaled$d
#> [1] 12.20656 12.20656 12.20656
mat_svds_scaled$nops
#> [1] 11
Created on 2022-01-25 by the reprex package (v2.0.1)
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.3 (2020-10-10)
#> os macOS Catalina 10.15.7
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/New_York
#> date 2022-01-25
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> backports 1.2.1 2020-12-09 [1] CRAN (R 4.0.2)
#> cli 3.0.1 2021-07-17 [1] CRAN (R 4.0.2)
#> crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.2)
#> digest 0.6.28 2021-09-23 [1] CRAN (R 4.0.2)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.0.2)
#> evaluate 0.14 2019-05-28 [2] CRAN (R 4.0.1)
#> fansi 0.5.0 2021-05-25 [1] CRAN (R 4.0.2)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.0.2)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> glue 1.4.2 2020-08-27 [2] CRAN (R 4.0.2)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.0.2)
#> htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.0.2)
#> irlba * 2.3.3 2019-02-05 [2] CRAN (R 4.0.2)
#> knitr 1.36 2021-09-29 [1] CRAN (R 4.0.2)
#> lattice 0.20-45 2021-09-22 [1] CRAN (R 4.0.2)
#> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.0.2)
#> magrittr 2.0.1.9000 2020-12-15 [1] Github (tidyverse/magrittr@bb1c86a)
#> Matrix * 1.3-4 2021-06-01 [1] CRAN (R 4.0.2)
#> pillar 1.6.3 2021-09-26 [1] CRAN (R 4.0.2)
#> pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.0.2)
#> purrr 0.3.4 2020-04-17 [2] CRAN (R 4.0.2)
#> R.cache 0.15.0 2021-04-30 [1] CRAN (R 4.0.2)
#> R.methodsS3 1.8.1 2020-08-26 [1] CRAN (R 4.0.2)
#> R.oo 1.24.0 2020-08-26 [1] CRAN (R 4.0.2)
#> R.utils 2.11.0 2021-09-26 [1] CRAN (R 4.0.2)
#> Rcpp 1.0.7 2021-07-07 [1] CRAN (R 4.0.2)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.0.2)
#> rlang 0.4.11 2021-04-30 [1] CRAN (R 4.0.2)
#> rmarkdown 2.11 2021-09-14 [1] CRAN (R 4.0.2)
#> RSpectra * 0.16-0 2019-12-01 [2] CRAN (R 4.0.2)
#> rstudioapi 0.13 2020-11-12 [2] CRAN (R 4.0.2)
#> sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 4.0.2)
#> stringi 1.7.5 2021-10-04 [1] CRAN (R 4.0.2)
#> stringr 1.4.0 2019-02-10 [2] CRAN (R 4.0.2)
#> styler 1.6.2 2021-09-23 [1] CRAN (R 4.0.2)
#> tibble 3.1.5 2021-09-30 [1] CRAN (R 4.0.2)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.0.2)
#> vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.0.2)
#> withr 2.4.2 2021-04-18 [1] CRAN (R 4.0.2)
#> xfun 0.26 2021-09-14 [1] CRAN (R 4.0.2)
#> yaml 2.2.1 2020-02-01 [2] CRAN (R 4.0.2)
#>
#> [1] /Users/kamil/Library/R/4.0/library
#> [2] /Library/Frameworks/R.framework/Versions/4.0/Resources/library