24) Low Rank#
Last time#
Condition number and SVD
SVD for solving systems
Costs of decompositions
Today#
Reflection on algorithm choices
Low-rank structure
Primer on interpolation
using LinearAlgebra
using Plots
default(linewidth=4, legendfontsize=12)
function vander(x, k=nothing)
if isnothing(k)
k = length(x)
end
m = length(x)
V = ones(m, k)
for j in 2:k
V[:, j] = V[:, j-1] .* x
end
V
end
function gram_schmidt_classical(A)
m, n = size(A)
Q = zeros(m, n)
R = zeros(n, n)
for j in 1:n
v = A[:,j]
R[1:j-1,j] = Q[:,1:j-1]' * v
v -= Q[:,1:j-1] * R[1:j-1,j]
R[j,j] = norm(v)
Q[:,j] = v / R[j,j]
end
Q, R
end
function qr_householder(A)
m, n = size(A)
R = copy(A)
V = [] # list of reflectors
for j in 1:n
v = copy(R[j:end, j])
v[1] += sign(v[1]) * norm(v) # <--- here we pick the sign of v so that moves it the largest distance
v = normalize(v)
R[j:end,j:end] -= 2 * v * v' * R[j:end,j:end]
push!(V, v)
end
V, R
end
function qr_chol(A)
R = cholesky(A' * A).U
Q = A / R
Q, R
end
function qr_chol2(A)
Q, R = qr_chol(A)
Q, R1 = qr_chol(Q)
Q, R1 * R
end
function peanut()
theta = LinRange(0, 2*pi, 50)
r = 1 .+ .4*sin.(3*theta) + .6*sin.(2*theta)
r' .* [cos.(theta) sin.(theta)]'
end
function circle()
theta = LinRange(0, 2*pi, 50)
[cos.(theta) sin.(theta)]'
end
function Aplot(A)
"Plot a transformation from X to Y"
X = peanut()
Y = A * X
p = scatter(X[1,:], X[2,:], label="in")
scatter!(p, Y[1,:], Y[2,:], label="out")
X = circle()
Y = A * X
q = scatter(X[1,:], X[2,:], label="in")
scatter!(q, Y[1,:], Y[2,:], label="out")
plot(p, q, layout=2, aspect_ratio=:equal)
end
Aplot (generic function with 1 method)
1. Reflection on algorithm choices#
Recap on Condition number via SVD#
Recall the SVD decomposition
and the matrix norm:
That is, the matrix norm induced by the Euclidean norm or \(l_2\)-norm for vectors is the spectral norm, and:
A = randn(2,2) # nonsymmetric
A = A + A' # make it symmetric
2×2 Matrix{Float64}:
-1.99903 0.501034
0.501034 -2.65658
@show svdvals(A) # Julia built-in
U, S, V = svd(A) # Julia built-in
@show U - U' # U is symmetric
Aplot(A)
svdvals(A) = [2.927076206865345, 1.7285284483672967]
U - U' =
[0.0 0.0; 0.0 0.0]
Real-world example: autonomous vehicles#
Need to solve least squares problems in real time
Weight/cost/size increase with compute
What algorithm to choose?
What precision to use?
Factors to consider:
How many objects?
Speed (of robot and objects)
Aerial, wheeled, walking
Fog, light – longer memory?
Tolerences (how accurate does the solution need to be?)
Consequences of being wrong, who bears those consequences?
A = rand(5000, 500)
A_32 = Float32.(A)
@show cond(A)
@time qr(A); # Householder; backward stable
@time qr_chol(A); # Unstable
@time qr(A_32); # Julia built-in; best in terms of memory allocations; Run twice!
cond(A) = 56.67333832546338
0.082765 seconds (20.78 k allocations: 20.649 MiB, 21.72% gc time, 16.85% compilation time)
0.331964 seconds (748.90 k allocations: 73.596 MiB, 31.15% gc time, 93.20% compilation time)
0.156953 seconds (56.97 k allocations: 13.391 MiB, 57.00% gc time, 19.70% compilation time)
V = vander(LinRange(-1, 1, 20))
@show cond(V)
Q, R = qr(Float32.(V)) # Julia built-in, but with single-precision Floats
@show norm(Q' * Q - I)
Q, R = qr_chol(V) # Unstable; really not orthogonal
@show norm(Q' * Q - I)
cond(V) = 2.7224082312417406e8
norm(Q' * Q - I) =
1.6641898f-6
norm(Q' * Q - I) = 0.1749736012761826
0.1749736012761826
2. Low-rank structure#
Best low rank approximation#
The SVD can be truncated to yield the best rank-\(k\) approximation of a matrix.
n, k = 2, 1
A = randn(n, n)
Aplot(A)
@show U, S, V = svd(A)
(U, S, V) = svd(A) = SVD{Float64, Float64, Matrix{Float64}, Vector{Float64}}([0.8362147881639908 0.548402067880722; 0.5484020678807219 -0.836214788163991], [1.8815983557272586, 1.6507100882644747], [-0.240754733182974 0.9705859871489981; 0.9705859871489981 0.240754733182974])
SVD{Float64, Float64, Matrix{Float64}, Vector{Float64}}
U factor:
2×2 Matrix{Float64}:
0.836215 0.548402
0.548402 -0.836215
singular values:
2-element Vector{Float64}:
1.8815983557272586
1.6507100882644747
Vt factor:
2×2 Matrix{Float64}:
-0.240755 0.970586
0.970586 0.240755
@show Uhat = U[:, 1:k] # Uhat is now 2x1
@show Shat = S[1:k] # truncate to first k singular values, in this case 1
@show Vhat = V[:, 1:k] # Vhat is now 2x1
@show Ahat = Uhat * diagm(Shat) * Vhat'
@show norm(Ahat)
Aplot(Ahat - A) # we have squished every point onto a line
Uhat = U[:, 1:k] = [0.8362147881639908; 0.5484020678807219;;]
Shat = S[1:k] = [1.8815983557272586]
Vhat = V[:, 1:k] = [-0.240754733182974; 0.9705859871489981;;]
Ahat = Uhat * diagm(Shat) * Vhat' = [-0.37880840147094536 1.5271397634479096; -0.2484281713713453 1.0015209203086586]
norm(Ahat) = 1.8815983557272586
Example: Galaxies#
Suppose we have two galaxies of size \(n_1 = 100\) and \(n_2 = 200\), each randomly distributed around their respective centers.
galaxy(center, sigma, n) = reshape(center, 1, 3) .+ sigma*randn(n, 3)
g1 = galaxy([0 0 0], 1, 100)
g2 = galaxy([10 0 0], 1, 100)
scatter(g1[:,1], g1[:,2], aspect_ratio=:equal)
scatter!(g2[:,1], g2[:,2])
Forces between stars#
Consider Newton’s law of universal gravitation between two bodies with spatial extent (i.e., not point masses), then we can write the gravitational force from a star at position \(x_2\) acting on a star at position \(x_1\),
function gravitational_force(g1, g2)
m = size(g1, 1)
n = size(g2, 1)
F = zeros(3*m, n)
for i in 0:m-1
for j in 1:n
r = g2[j,:] - g1[1+i,:]
F[1+3*i:3*(i+1),j] = r / norm(r)^3
end
end
F
end
# Let's apply it to our two galaxies
gravitational_force(g1, g2)
300×100 Matrix{Float64}:
0.0105178 0.0137443 0.0114621 … 0.0100266 0.00998725
0.00229505 -0.000472935 0.000830519 -9.47275e-5 0.000394458
0.000371329 -0.00199326 -0.00124265 -0.00284472 -0.00235557
0.00940712 0.0104222 0.00936229 0.00793922 0.00808866
0.000391556 -0.00214897 -0.000885298 -0.00131933 -0.000960651
-0.000112797 -0.00187864 -0.00134776 … -0.00240936 -0.00209771
0.00991286 0.0128703 0.0108928 0.00986976 0.00978675
0.00155 -0.00119966 0.000161357 -0.000621255 -0.000144893
0.00117935 -0.000570315 -0.000194024 -0.00186153 -0.00141053
0.00945868 0.0123506 0.0102591 0.00893229 0.00888452
0.0026457 0.000532271 0.00143413 … 0.000527487 0.000935457
-0.000290981 -0.00262506 -0.00175631 -0.00303473 -0.00259752
0.00941691 0.0100051 0.00897386 0.00709791 0.00732324
⋮ ⋱
0.00755282 0.00888995 0.00788382 0.0071546 0.00715236
0.000467191 -0.00139954 -0.000476868 -0.000899491 -0.000597002
0.000658102 -0.000474697 -0.000237383 … -0.00125975 -0.000987715
0.0117416 0.0173023 0.0137948 0.0126184 0.0123447
0.00324854 9.21753e-5 0.0016332 0.000351336 0.000993578
0.00167744 -0.000726876 -0.000167081 -0.00259774 -0.0019161
0.00893346 0.00952388 0.00873038 0.00741564 0.00758666
-0.000149479 -0.00250906 -0.00131182 … -0.00162344 -0.0013041
-3.0924e-5 -0.00157957 -0.00114974 -0.00212977 -0.00185481
0.0134855 0.0163423 0.0140375 0.0115804 0.0117931
0.00170994 -0.00275726 -0.000516683 -0.00144564 -0.000807667
0.000342618 -0.00290932 -0.00188293 -0.00379167 -0.00322791
Spectrum#
# A new pair of galaxies
g1 = galaxy([0 0 0], 1, 500)
g2 = galaxy([10 0 0], 1, 500)
F = gravitational_force(g1, g2)
@show size(F)
# Let's plot the singular values
U, S, V = svd(F) # U is 1500 x 500, S is 500 long, and V is 500 x 500
scatter(S, yscale=:log10, ylims=(1e-10, 10), xlims=(0, 200), label="singular values")
size(F) = (1500, 500)
# Let's make a reduced order model
k = 10 # let's truncate at the first 10 singular values
Uhat = U[:,1:k] # Uhat is now 1500 x 10
Shat = S[1:k] # Shat is now 10 long
Vhat = V[:,1:k] # Vhat is now 500 x 10
Fhat = Uhat * diagm(Shat) * Vhat' # Fhat is still 1500 x 500
@show norm(F)
# And how good does it do?
@show norm(F - Fhat) # Fhat is the best rank-10 approximation of F and it is not too far off from F indeed
norm(F) = 5.547943424194474
norm(F - Fhat) = 0.004941979403326817
0.004941979403326817
# what if included more?
n = 200
norm_err = zeros(n)
for k in 1:n
Û = U[:, 1:k]
Ŝ = S[1:k]
Ṽ = V[:, 1:k]
F́ = Û * diagm(Ŝ) * Ṽ'
norm_err[k] = norm(F - F́)
end
scatter(norm_err, yscale=:log10, ylims=(1e-15, 1e0), xlims=(1, n), label="\$\\vert \\vert F - F_k \\vert \\vert\$", xlabel="\$k\$")