21) Low Rank#

Last time#

  • Condition number and SVD

  • SVD for solving systems

  • Costs of decompositions

Today#

  1. Reflection on algorithm choices

  2. Low-rank structure

  3. Primer on interpolation

using LinearAlgebra
using Plots
default(linewidth=4, legendfontsize=12)

function vander(x, k=nothing)
    if isnothing(k)
        k = length(x)
    end
    m = length(x)
    V = ones(m, k)
    for j in 2:k
        V[:, j] = V[:, j-1] .* x
    end
    V
end

function gram_schmidt_classical(A)
    m, n = size(A)
    Q = zeros(m, n)
    R = zeros(n, n)
    for j in 1:n
        v = A[:,j]
        R[1:j-1,j] = Q[:,1:j-1]' * v
        v -= Q[:,1:j-1] * R[1:j-1,j]
        R[j,j] = norm(v)
        Q[:,j] = v / R[j,j]
    end
    Q, R
end

function qr_householder(A)
    m, n = size(A)
    R = copy(A)
    V = [] # list of reflectors
    for j in 1:n
        v = copy(R[j:end, j])
        v[1] += sign(v[1]) * norm(v) # <--- here we pick the sign of v so that moves it the largest distance
        v = normalize(v)
        R[j:end,j:end] -= 2 * v * v' * R[j:end,j:end]
        push!(V, v)
    end
    V, R
end

function qr_chol(A)
    R = cholesky(A' * A).U
    Q = A / R
    Q, R
end

function qr_chol2(A)
    Q, R = qr_chol(A)
    Q, R1 = qr_chol(Q)
    Q, R1 * R
end

function peanut()
    theta = LinRange(0, 2*pi, 50)
    r = 1 .+ .4*sin.(3*theta) + .6*sin.(2*theta)
    r' .* [cos.(theta) sin.(theta)]'
end

function circle()
    theta = LinRange(0, 2*pi, 50)
    [cos.(theta) sin.(theta)]'
end

function Aplot(A)
    "Plot a transformation from X to Y"
    X = peanut()
    Y = A * X
    p = scatter(X[1,:], X[2,:], label="in")
    scatter!(p, Y[1,:], Y[2,:], label="out")
    X = circle()
    Y = A * X
    q = scatter(X[1,:], X[2,:], label="in")
    scatter!(q, Y[1,:], Y[2,:], label="out")
    plot(p, q, layout=2, aspect_ratio=:equal)
end
Aplot (generic function with 1 method)

1. Reflection on algorithm choices#

Recap on Condition number via SVD#

\[\begin{split} U \overbrace{\begin{bmatrix} \sigma_{\max} && \\ & \ddots & \\ && \sigma_{\min} \end{bmatrix}}^{\Sigma} V^T = A \end{split}\]
(8)#\[\begin{align} \lVert A \rVert &= \sigma_{\max} & \textrm{, and } \; \kappa(A) &= \frac{\sigma_{\max}}{\sigma_{\min}} = \texttt{cond}(A) \end{align}\]
A = randn(2,2) # nonsymmetric
A = A + A' # make it symmetric
2×2 Matrix{Float64}:
 -3.33879  -2.41028
 -2.41028   1.19463
@show svdvals(A) # Julia built-in
U, S, V = svd(A) # Julia built-in
@show U - U' # U is symmetric
Aplot(A)
svdvals(A) = [4.380764426241067, 2.236607842772889]
U - U' = 
[0.0 1.1102230246251565e-16; -1.1102230246251565e-16 0.0]

Real-world example: autonomous vehicles#

  • Need to solve least squares problems in real time

  • Weight/cost/size increase with compute

  • What algorithm to choose?

  • What precision to use?

Factors to consider:

  • How many objects?

  • Speed (of robot and objects)

  • Aerial, wheeled, walking

  • Fog, light – longer memory?

  • Tolerences (how accurate does the solution need to be?)

  • Consequences of being wrong, who bears those consequences?

A = rand(5000, 500)
A_32 = Float32.(A)
@show cond(A)
@time qr(A);       # Householder; backward stable
@time qr_chol(A);  # Unstable
@time qr(A_32);    # Julia built-in; best in terms of memory allocations; Run twice!
cond(A) = 56.492237636147166
  0.056697 seconds (20.78 k allocations: 20.649 MiB, 24.56% compilation time)
  0.242713 seconds (748.90 k allocations: 73.596 MiB, 5.18% gc time, 87.68% compilation time)
  0.081420 seconds (56.96 k allocations: 13.407 MiB, 38.53% compilation time)
V = vander(LinRange(-1, 1, 20))
@show cond(V)
Q, R = qr(Float32.(V)) # Julia built-in, but with single-precision Floats
@show norm(Q' * Q - I)
Q, R = qr_chol(V) # Unstable; really not orthogonal
@show norm(Q' * Q - I)
cond(V) = 2.7224082312417406e8
norm(Q' * Q - I) = 
1.6641898f-6
norm(Q' * Q - I) = 0.1749736012761826
0.1749736012761826

2. Low-rank structure#

Best low rank approximation#

The SVD can be truncated to yield the best rank-\(k\) approximation of a matrix.

n, k = 2, 1
A = randn(n, n)
Aplot(A)
@show U, S, V = svd(A)
(U, S, V) = svd(A) = SVD{Float64, Float64, Matrix{Float64}, Vector{Float64}}([-0.02693607540013121 -0.9996371580938949; -0.999637158093895 0.0269360754001311], [1.6646404744723, 0.5780471693697978], [-0.9974523574605526 -0.07133578762715097; 0.07133578762715097 -0.9974523574605526])
SVD{Float64, Float64, Matrix{Float64}, Vector{Float64}}
U factor:
2×2 Matrix{Float64}:
 -0.0269361  -0.999637
 -0.999637    0.0269361
singular values:
2-element Vector{Float64}:
 1.6646404744723
 0.5780471693697978
Vt factor:
2×2 Matrix{Float64}:
 -0.997452   -0.0713358
  0.0713358  -0.997452
@show Uhat = U[:, 1:k] # Uhat is now 2x1
@show Shat = S[1:k] # truncate to first k singular values, in this case 1
@show Vhat = V[:, 1:k] # Vhat is now 2x1
@show Ahat = Uhat * diagm(Shat) * Vhat'
@show norm(Ahat)
Aplot(Ahat - A) # we have squished every point onto a line
Uhat = U[:, 1:k] = [-0.02693607540013121; -0.999637158093895;;]
Shat = S[1:k] = [1.6646404744723]
Vhat = V[:, 1:k] = [-0.9974523574605526; -0.07133578762715097;;]
Ahat = Uhat * diagm(Shat) * Vhat' = [0.04472464789298707 0.003198616916316635; 1.6597971030433751 0.11870535245243052]
norm(Ahat) = 1.6646404744723002

Example: Galaxies#

Suppose we have two galaxies of size \(n_1 = 100\) and \(n_2 = 200\), each randomly distributed around their respective centers.

galaxy(center, sigma, n) = reshape(center, 1, 3) .+ sigma*randn(n, 3)
g1 = galaxy([0 0 0], 1, 100)
g2 = galaxy([10 0 0], 1, 100)

scatter(g1[:,1], g1[:,2], aspect_ratio=:equal)
scatter!(g2[:,1], g2[:,2])

Forces between stars#

Consider Newton’s law of universal gravitation between two bodies with spatial extent (i.e., not point masses), then we can write the gravitational force from a star at position \(x_2\) acting on a star at position \(x_1\),

\[ F_{1,2} = G \frac{m_1 m_2}{\lVert \mathbf{x}_2 - \mathbf{x}_1 \rVert^3} (\mathbf{x}_2 - \mathbf{x}_1) \]
where \(m_1\) and \(m_2\) are the masses of each star, respectively.

function gravitational_force(g1, g2)
    m = size(g1, 1)
    n = size(g2, 1)
    F = zeros(3*m, n)
    for i in 0:m-1
        for j in 1:n
            r = g2[j,:] - g1[1+i,:]
            F[1+3*i:3*(i+1),j] = r / norm(r)^3
        end
    end
    F
end

# Let's apply it to our two galaxies
gravitational_force(g1, g2)
300×100 Matrix{Float64}:
  0.00636116    0.00732331    0.00638666   …   0.00834808    0.0090871
 -0.000107148  -0.00109593   -0.000727072     -0.00146346   -0.00045082
  0.000101704   0.00033505    0.000479355     -8.39935e-5   -0.000157112
  0.00699787    0.0079215     0.00694592       0.00895428    0.0100512
 -0.000517151  -0.00174277   -0.00122987      -0.00226276   -0.00121364
 -0.000145166   5.67896e-5    0.000284138  …  -0.000493583  -0.000641941
  0.0116318     0.0156273     0.0125142        0.0194043     0.0194575
  0.0022455     0.000459183   0.000774671      0.000204247   0.00400904
  0.00041212    0.001265      0.00148283       3.05712e-5   -0.000173751
  0.00809633    0.0099153     0.00843545       0.0114881     0.012165
  0.00057469   -0.000714714  -0.000317219  …  -0.00109481    0.000644822
 -0.000294629  -7.71966e-5    0.000250984     -0.00088473   -0.00105998
  0.00757179    0.00938838    0.00800692       0.0108182     0.0111934
  ⋮                                        ⋱                
  0.00592634    0.00648113    0.00573242       0.0073371     0.00827879
 -0.0006913    -0.00165499   -0.00122139      -0.00211206   -0.00138889
  0.000443875   0.00070822    0.00076576   …   0.000435445   0.0004453
  0.0068008     0.00772338    0.00688091       0.00848643    0.00948211
 -0.000553698  -0.00176388   -0.00127149      -0.00222136   -0.00123158
 -0.000954124  -0.000957924  -0.00055428      -0.00166771   -0.00197771
  0.00699787    0.00880175    0.00746167       0.0102544     0.0102667
  0.00121368    0.000431678   0.000541042  …   0.00037799    0.00183373
 -1.67157e-5    0.00024559    0.000452894     -0.000349507  -0.000436511
  0.00672655    0.00807559    0.0070033        0.00915918    0.00966031
  0.000379042  -0.000600526  -0.000299698     -0.000873352   0.000360064
 -0.000443743  -0.000346765  -4.33875e-5      -0.000986135  -0.00113428

Spectrum#

g1 = galaxy([0 0 0], 1, 500)
g2 = galaxy([10 0 0], 1, 500)
F = gravitational_force(g1, g2)
@show size(F)
U, S, V = svd(F) # U is 1500 x 500, S is 500 long, and V is 500 x 500
scatter(S, yscale=:log10, ylims=(1e-10, 10), xlims=(0, 200))
size(F) = (1500, 500)
k = 10 # let's truncate at the first 10 singular values
Uhat = U[:,1:k]   # Uhat is now 1500 x 10
Shat = S[1:k]     # Shat is now 10 long
Vhat = V[:,1:k]   # Vhat is now 500 x 10
Fhat = Uhat * diagm(Shat) * Vhat' # Fhat is still 1500 x 500
@show norm(F)
@show norm(F - Fhat) # Fhat is the best rank-10 approximation of F and it is not too far off from F indeed
norm(F) = 5.342969915234898
norm(F - Fhat) = 0.00413746972004963
0.00413746972004963