Linear Models

This file is best viewed in a Pluto notebook. To do so, from the MagNav.jl directory, run:

julia> using Pluto
julia> Pluto.run() # select & open notebook

This is a reactive notebook, so feel free to change any parameters of interest, like adding/removing features to the model, switching between linear models (:TL, :TL_mod, :elasticnet, :plsr), or different training & testing lines.

Import packages and DataFrames

The DataFrames listed below provide useful information about the flight data collected by Sander Geophysics Ltd. (SGL) & magnetic anomaly maps.

Dataframe	Description
`df_map`	map files relevant for SGL flights
`df_cal`	SGL calibration flight lines
`df_flight`	SGL flight files
`df_all`	all flight lines
`df_nav`	all navigation-capable flight lines
`df_event`	pilot-recorded in-flight events

begin
    cd(@__DIR__)
    # uncomment line below to use local MagNav.jl (downloaded folder)
    # using Pkg; Pkg.activate("../"); Pkg.instantiate()
    using MagNav
    using CSV, DataFrames
    using Plots: plot, plot!
    using Random: seed!
    using Statistics: mean, median, std
    seed!(33); # for reproducibility
    include("dataframes_setup.jl"); # setup DataFrames
end;

begin
    using MagNav: elasticnet_fit, plsr_fit, linear_test
    using LinearAlgebra
    using Statistics: cov
end;

Select magnetometers & parameters for compensation.

begin # try modifying these parameters
    features = [:mag_4_uc,:mag_4_uc_dot,:mag_4_uc_dot4,:TL_A_flux_d]
    use_mag  = :mag_4_uc
    use_vec  = :flux_d
    terms    = [:permanent,:induced,:fdm]
    terms_A  = [:permanent,:induced,:eddy,:bias]
    k_plsr   = 10
end;

comp_params_1_init = LinCompParams(features_setup = features,
                                   model_type     = :plsr,
                                   y_type         = :d,
                                   use_mag        = use_mag,
                                   terms          = terms,
                                   k_plsr         = k_plsr);

Select training & testing flights from Flight 1006 (see readme).

begin
    lines_train = [1006.03, 1006.04, 1006.05, 1006.06]
    lines_test  = [1006.08]
end;

PLSR-based calibration and compensation

Perform PLSR-based calibration using training data. The full list of SGL flights is in df_flight, the full list of maps is in df_map, & the full list of flight lines is in df_all.

(comp_params_1,_,_,_,feats) =
    comp_train(comp_params_1_init,lines_train,df_all,df_flight,df_map);

Perform PLSR-based compensation on testing data. The full list of navigation-capable flight lines is in df_nav.

comp_test(comp_params_1,lines_test,df_nav,df_flight,df_map);

Setup data for Tolles-Lawson. TL_ind holds the Boolean indices (mask) just for the selected flight data. The full list of calibration flight line options is in df_cal.

begin
    TL_i   = 6 # select first calibration box of 1006.04
    TL_xyz = get_XYZ(df_cal.flight[TL_i],df_flight;silent=true) # load flight data
    TL_ind = get_ind(TL_xyz;tt_lim=[df_cal.t_start[TL_i],df_cal.t_end[TL_i]])
end;

comp_params_2_init = LinCompParams(features_setup = features,
                                   model_type     = :TL,
                                   y_type         = :d,
                                   use_mag        = use_mag,
                                   use_vec        = use_vec,
                                   terms_A        = terms_A);

Perform Tolles-Lawson calibration using training data.

(comp_params_2,_,_,_,_) =
    comp_train(comp_params_2_init,TL_xyz,TL_ind);

Perform Tolles-Lawson compensation on testing data.

comp_test(comp_params_2,lines_test,df_all,df_flight,df_map);

Get training & testing data & normalize by feature (columns). Typically this is done internally, but shown here to better explain PLSR.

begin
    (_,x_train,y_train,_,_,l_segs_train) =
        MagNav.get_Axy(lines_train,df_all,df_flight,df_map,features;
                       use_mag=use_mag,use_vec=use_vec,terms=terms)
    (_,x_test,y_test,_,_,l_segs_test) =
        MagNav.get_Axy(lines_test,df_nav,df_flight,df_map,features;
                       use_mag=use_mag,use_vec=use_vec,terms=terms)
    (x_bias,x_scale,x_train_norm,x_test_norm) = norm_sets(x_train,x_test)
    (y_bias,y_scale,y_train_norm,y_test_norm) = norm_sets(y_train,y_test)
    x   = x_train_norm # for conciseness
    y   = y_train_norm # for conciseness
    x_t = x_test_norm  # for conciseness
    y_t = y_test_norm  # for conciseness
end;

Training data matrix decomposition & reconstruction with SVD.

begin
    (U,S,V) = svd(x)
    Vt      = V'
    x_err   = [std(U[:,1:i]*Diagonal(S[1:i])*Vt[1:i,:]-x) for i in eachindex(S)]
    x_var   = [sum(S[1:i])/sum(S) for i in eachindex(S)]
end;

Plot PLSR error & variance retained.

begin
    p1 = plot(xlab="k (number of compenents)",ylim=(0,1.01),dpi=200)
    plot!(p1,eachindex(S),x_err,lab="error",lc=:blue)
    plot!(p1,eachindex(S),x_var,lab="variance retained" ,lc=:red)
end

Determine training & testing error with different numbers of components.

begin
    k_max     = size(x,2) # maximum number of compenents (features)
    err_train = zeros(k_max)
    err_test  = zeros(k_max)
    coef_set  = plsr_fit(x,y,k_max;return_set=true)
    for k = 1:k_max
        y_train_hat_norm = vec(x  *coef_set[:,:,k])
        y_test_hat_norm  = vec(x_t*coef_set[:,:,k])
        (y_train_hat,y_test_hat) = denorm_sets(y_bias,y_scale,
                                               y_train_hat_norm,
                                               y_test_hat_norm)
        err_train[k] = std(err_segs(y_train_hat,y_train,l_segs_train))
        err_test[k]  = std(err_segs(y_test_hat ,y_test ,l_segs_test ))
    end
end;

Plot PLSR-based training & testing error with different numbers of components.

begin
    p2 = plot(xlab="k (number of compenents)",ylab="PLSR error [nT]",ylim=(0,150))
    plot!(p2,1:k_max,err_train,lab="train")
    plot!(p2,1:k_max,err_test ,lab="test")
end

Elastic net-based calibration and compensation

begin
    α = 0.99 # tradeoff between ridge regression (0) & Lasso (1)
    (model,data_norms,y_train_hat,_) =
        elasticnet_fit(x_train,y_train,α;l_segs=l_segs_train)
    (y_test_hat,_) = linear_test(x_test,y_test,data_norms,model;l_segs=l_segs_test)
    (coef,bias) = model
end;

Features with largest elastic net coefficients.

feats[sortperm(abs.(coef),rev=true)]

15-element Vector{Symbol}:
 :mag_4_uc
 :TL_A_flux_d_4
 :TL_A_flux_d_1
 :TL_A_flux_d_9
 :TL_A_flux_d_6
 :TL_A_flux_d_7
 :TL_A_flux_d_2
 ⋮
 :TL_A_flux_d_3
 :TL_A_flux_d_11
 :TL_A_flux_d_10
 :TL_A_flux_d_12
 :mag_4_uc_dot4
 :mag_4_uc_dot

Principal component analysis (PCA)

Look at how k_pca is used in neural network-based calibration & compensation.

Note that reduced-dimension data uses: x * V = U * Diagonal(S)

begin
    k_pca = 3 # select k (order reduction factor)
    println("std dev error with k = $k_pca: ",round(x_err[k_pca],digits=2))
    println("var  retained with k = $k_pca: ",round(x_var[k_pca],digits=2))
    (_,S_pca,V_pca) = svd(cov(x))
    x_new   = x*V_pca[:,1:k_pca]
    x_t_new = x_t*V_pca[:,1:k_pca]
    v_scale = V_pca[:,1:k_pca]*inv(Diagonal(sqrt.(S_pca[1:k_pca])))
    x_use   = x*v_scale   #* this could be trained on
    x_t_use = x_t*v_scale #* this could be tested  on
end;