Version()

最新バージョン 2024-10-29 14:12

using MultivariateStats
using Plots
default(
    titlefont         = (12, "times"),
    guidefont         = (10, "times"),
    tickfont          = (7, "times"),
    label             = "",
    foreground_color_legend = nothing,
    markerstrokewidth = 0,
    grid              = false,
    tick_direction    = :out,
    alpha             = 0.4
    )

using MultivariateStats

using RCall
iris = rcopy(R"iris")
first(iris)

X = iris.Sepal_Length[1:50];
y = iris.Sepal_Width[1:50];

a, b = llsq(X, y) # 戻り値は a, b の順

2-element Vector{Float64}:
  0.7985283006471573
 -0.569432673039669

minx, maxx = extrema(X)
w = 0.1(maxx - minx)
minx -= w
maxx += w

using Plots
scatter(X, y, smooth=true,
    xlabel="Sepal Length", ylabel="Sepal Width")
plot!([minx, maxx], [minx, maxx] .* a .+ b)

predicted_y = a .* X .+ b;

minx, maxx = extrema(predicted_y)
w = 0.5(maxx - minx)
minx -= w
maxx += w
scatter(predicted_y, y,
    xlabel="predicted values", ylabel="Sepal Width",
    size=(400, 400), aspectratio=1)
plot!([minx, maxx], [minx, maxx])

a = llsq(X, y, bias=false)

1-element Vector{Float64}:
 0.6853282926558071

minx, maxx = extrema(X)
w = 0.1(maxx - minx)
minx -= w
maxx += w

using Plots
scatter(X, y, smooth=true,
    xlabel="Sepal Length", ylabel="Sepal Width")
plot!([minx, maxx], [minx, maxx] .* a)

predicted_y = a .* X;

minx, maxx = extrema(predicted_y)
w = 0.5(maxx - minx)
minx -= w
maxx += w
scatter(predicted_y, y,
    xlabel="predicted values", ylabel="Sepal Width",
    size=(400, 400), aspectratio=1)
plot!([minx, maxx], [minx, maxx])

using RCall
iris = rcopy(R"iris");
first(iris)

X =　Matrix(iris[:, [:Sepal_Length, :Sepal_Width, :Petal_Length]]);
y = iris.Petal_Width;

par = llsq(X, y) # 戻り値は a1, a2, ..., ap, b の順序

4-element Vector{Float64}:
 -0.20726607375749423
  0.2228285438610108
  0.5240831147784665
 -0.2403073891122502

predicted_y = X * par[1:end-1] .+ par[end];

minx, maxx = extrema(predicted_y)
w = 0.1(maxx - minx)
minx -= w
maxx += w
scatter(predicted_y, y,
    xlabel="predicted values", ylabel="Petal Width",
    size=(400, 400), aspectratio=1)
plot!([minx, maxx], [minx, maxx])

par = llsq(X, y, bias=false) # 戻り値は a1, a2, ..., ap の順序

3-element Vector{Float64}:
 -0.24560512728636977
  0.20405076926806906
  0.5355216479007071

predicted_y = X * par;

minx, maxx = extrema(predicted_y)
w = 0.1(maxx - minx)
minx -= w
maxx += w
scatter(predicted_y, y,
    xlabel="predicted values", ylabel="Petal Width",
    size=(400, 400), aspectratio=1)
plot!([minx, maxx], [minx, maxx])

using RCall
iris = rcopy(R"iris");
first(iris)

X = iris.Sepal_Length[1:50];
y = iris.Sepal_Width[1:50];

a, b = ridge(X, y, 0.5) # r = 0.5　戻り値は a, b の順序

2-element Vector{Float64}:
  0.7379253817431333
 -0.2660544610061244

minx, maxx = extrema(X)
w = 0.1(maxx - minx)
minx -= w
maxx += w

using Plots
scatter(X, y, smooth=true,
    xlabel="Sepal Length", ylabel="Sepal Width")
plot!([minx, maxx], [minx, maxx] .* a .+ b)

predicted_y = X .* a .+ b;

minx, maxx = extrema(predicted_y)
w = 0.5(maxx - minx)
minx -= w
maxx += w
scatter(predicted_y, y,
    xlabel="predicted values", ylabel="Sepal Width",
    size=(400, 400), aspectratio=1)
plot!([minx, maxx], [minx, maxx])

a = ridge(X, y, 0.5, bias=false)

1-element Vector{Float64}:
 0.6850562484618012

minx, maxx = extrema(X)
w = 0.1(maxx - minx)
minx -= w
maxx += w

using Plots
scatter(X, y, smooth=true,
    xlabel="Sepal Length", ylabel="Sepal Width")
plot!([minx, maxx], [minx, maxx] .* a)

predicted_y = X .* a;

minx, maxx = extrema(predicted_y)
w = 0.5(maxx - minx)
minx -= w
maxx += w
scatter(predicted_y, y,
    xlabel="predicted values", ylabel="Sepal Width",
    size=(400, 400), aspectratio=1)
plot!([minx, maxx], [minx, maxx])

using RCall
iris = rcopy(R"iris");
first(iris)

X =　Matrix(iris[:, [:Sepal_Length, :Sepal_Width, :Petal_Length]]);
y = iris.Petal_Width;

a = ridge(X, y, 0.5) # 戻り値は a1, a2, ..., ap, b の順序

4-element Vector{Float64}:
 -0.19022561531733848
  0.2070232201907222
  0.5148884729729777
 -0.25700486112457116

predicted_y = X * a[1:end-1] .+ a[end];
minx, maxx = extrema(predicted_y)
w = 0.1(maxx - minx)
minx -= w
maxx += w

scatter(predicted_y, y,
    xlabel="predicted values", ylabel="Petal Width",
    size=(400, 400), aspectratio=1)
plot!([minx, maxx], [minx, maxx])

a = ridge(X, y, 0.5, bias=false) # 戻り値は a1, a2, ..., ap の順序

3-element Vector{Float64}:
 -0.23078471296513325
  0.18644776803795668
  0.5268402699019675

predicted_y = X * a;
minx, maxx = extrema(predicted_y)
w = 0.1(maxx - minx)
minx -= w
maxx += w

scatter(predicted_y, y,
    xlabel="predicted values", ylabel="Petal Width",
    size=(400, 400), aspectratio=1)
plot!([minx, maxx], [minx, maxx])

using RCall
iris = rcopy(R"iris");
first(iris)

# import Pkg; Pkg.add("GLM")

using GLM
result = lm(@formula(Petal_Width ~ Sepal_Length + Sepal_Width + Petal_Length), iris);

println(result)

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

Petal_Width ~ 1 + Sepal_Length + Sepal_Width + Petal_Length

Coefficients:
──────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────────────
(Intercept)   -0.240307   0.17837    -1.35    0.1800  -0.592828   0.112213
Sepal_Length  -0.207266   0.0475062  -4.36    <1e-04  -0.301155  -0.113377
Sepal_Width    0.222829   0.048938    4.55    <1e-04   0.12611    0.319547
Petal_Length   0.524083   0.0244913  21.40    <1e-46   0.47568    0.572486
──────────────────────────────────────────────────────────────────────────

println(coeftable(result))
# coeftable(result) |> println

──────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────────────
(Intercept)   -0.240307   0.17837    -1.35    0.1800  -0.592828   0.112213
Sepal_Length  -0.207266   0.0475062  -4.36    <1e-04  -0.301155  -0.113377
Sepal_Width    0.222829   0.048938    4.55    <1e-04   0.12611    0.319547
Petal_Length   0.524083   0.0244913  21.40    <1e-46   0.47568    0.572486
──────────────────────────────────────────────────────────────────────────

coefnames(result) |> println

["(Intercept)", "Sepal_Length", "Sepal_Width", "Petal_Length"]

coef(result) |> println

[-0.24030738911227026, -0.20726607375748946, 0.2228285438610107, 0.5240831147784644]

stderror(result) |> println

[0.17836974630353397, 0.04750617515116344, 0.04893803982536723, 0.024491344016769598]

confint(result, level=0.99)

4×2 Matrix{Float64}:
 -0.705839   0.225224
 -0.331254  -0.0832786
  0.095104   0.350553
  0.460163   0.588004

r2(result), adjr2(result)

(0.9378502736046809, 0.9365732244321743)

using StatsBase
aic(result), aicc(result), bic(result)

(-63.50217888684172, -63.085512220175055, -48.44900241636044)

loglikelihood(result), nullloglikelihood(result)

(36.75108944342086, -171.614575308807)

deviance(result), nulldeviance(result)

(5.380297670727681, 86.56993333333332)

using Statistics
sum((iris.Petal_Width .- mean(iris.Petal_Width)) .^ 2) |> println
var(iris.Petal_Width, corrected=false) * nobs(result) |> println

86.56993333333335
86.56993333333335

nobs(result), dof_residual(result)

(150.0, 146.0)

vcov(result)

4×4 Matrix{Float64}:
  0.0318158   -0.00507594  -0.0024861     0.00151442
 -0.00507594   0.00225684  -0.001344     -0.00106505
 -0.0024861   -0.001344     0.00239493    0.000802941
  0.00151442  -0.00106505   0.000802941   0.000599826

response(result)[1:10] |> println

[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1]

fitted(result)[1:10] |> println

[0.21625189892792102, 0.14629084174891355, 0.17990145379476727, 0.28316182974010806, 0.2592613606897712, 0.4004284287786176, 0.2976020814205647, 0.2671039633954154, 0.22764102424155702, 0.22098200761286113]

pred = fitted(result);
# pred = GLM.predict(result); # GLM.predict() としているのは，他のパッケージの predict() と区別するため
pred[1:10] |> println # 先頭の 10 個

[0.21625189892792102, 0.14629084174891355, 0.17990145379476727, 0.28316182974010806, 0.2592613606897712, 0.4004284287786176, 0.2976020814205647, 0.2671039633954154, 0.22764102424155702, 0.22098200761286113]

minx, maxx = extrema(pred)
w = 0.1(maxx - minx)
minx -= w
maxx += w

2.7719531985997383

using Plots
scatter(pred, response(result),
    xlabel="predicted values", ylabel="Petal Width",
    size=(400, 400), aspectratio=1)
plot!([minx, maxx], [minx, maxx])

first(iris, 2)

using DataFrames

newx = DataFrame([5.1 3.5 1.4
                  4.3 3.2 1.6], [:Sepal_Length, :Sepal_Width, :Petal_Length])
GLM.predict(result, newx)

2-element Vector{Union{Missing, Float64}}:
 0.21625189892792102
 0.4200328177313023

result.model

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}:

Coefficients:
────────────────────────────────────────────────────────────────
        Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────
x1  -0.240307   0.17837    -1.35    0.1800  -0.592828   0.112213
x2  -0.207266   0.0475062  -4.36    <1e-04  -0.301155  -0.113377
x3   0.222829   0.048938    4.55    <1e-04   0.12611    0.319547
x4   0.524083   0.0244913  21.40    <1e-46   0.47568    0.572486
────────────────────────────────────────────────────────────────

println(coeftable(result))

──────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────────────
(Intercept)   -0.240307   0.17837    -1.35    0.1800  -0.592828   0.112213
Sepal_Length  -0.207266   0.0475062  -4.36    <1e-04  -0.301155  -0.113377
Sepal_Width    0.222829   0.048938    4.55    <1e-04   0.12611    0.319547
Petal_Length   0.524083   0.0244913  21.40    <1e-46   0.47568    0.572486
──────────────────────────────────────────────────────────────────────────

ftest(result.model)

F-test against the null model:
F-statistic: 734.39 on 150 observations and 3 degrees of freedom, p-value: <1e-87

result2 = lm(@formula(Petal_Width ~ Sepal_Length + Sepal_Width), iris);
result1 = lm(@formula(Petal_Width ~ Sepal_Length), iris);
ftest(result1.model, result2.model, result.model)

F-test: 3 models fitted on 150 observations
───────────────────────────────────────────────────────────────────
     DOF  ΔDOF      SSR      ΔSSR      R²     ΔR²        F*   p(>F)
───────────────────────────────────────────────────────────────────
[1]    3        28.6523            0.6690                          
[2]    4     1  22.2547   -6.3975  0.7429  0.0739   42.2580  <1e-08
[3]    5     1   5.3803  -16.8744  0.9379  0.1949  457.9047  <1e-46
───────────────────────────────────────────────────────────────────

dof(result), dof(result2), dof(result1)

(5, 4, 3)

x = [61.5  50.6  53.4
     52.5  57.2  44.9
     48.1  51.8  54.2
     45.6  35.3  45.4
     39.5  32.0  35.3]
X = hcat(ones(size(x, 1)), x)

5×4 Matrix{Float64}:
 1.0  61.5  50.6  53.4
 1.0  52.5  57.2  44.9
 1.0  48.1  51.8  54.2
 1.0  45.6  35.3  45.4
 1.0  39.5  32.0  35.3

y = [115.1, 87.2, 93.4, 92.3, 73.5]

5-element Vector{Float64}:
 115.1
  87.2
  93.4
  92.3
  73.5

result2 = fit(LinearModel, X, y)

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}:

Coefficients:
──────────────────────────────────────────────────────────────────
        Coef.  Std. Error       t  Pr(>|t|)   Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────
x1  -4.70461    1.7307      -2.72    0.2244  -26.6952    17.286
x2   1.61927    0.0518213   31.25    0.0204    0.960815   2.27772
x3  -0.710114   0.0356268  -19.93    0.0319   -1.1628    -0.257432
x4   1.05431    0.0516889   20.40    0.0312    0.397541   1.71108
──────────────────────────────────────────────────────────────────

using RCall
infert = rcopy(R"infert");
first(infert, 5)

using DataFrames, CategoricalArrays, GLM, Plots
probit = glm(@formula(case ~ education + age + parity + induced + spontaneous), infert, Binomial(), ProbitLink())

StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

case ~ 1 + education + age + parity + induced + spontaneous

Coefficients:
───────────────────────────────────────────────────────────────────────────────────
                         Coef.  Std. Error      z  Pr(>|z|)   Lower 95%   Upper 95%
───────────────────────────────────────────────────────────────────────────────────
(Intercept)         -0.641326     0.826036  -0.78    0.4375  -2.26033     0.977675
education: 6-11yrs  -0.570558     0.462537  -1.23    0.2174  -1.47711     0.335997
education: 12+ yrs  -0.79871      0.485883  -1.64    0.1002  -1.75102     0.153602
age                  0.0205609    0.018276   1.13    0.2606  -0.0152595   0.0563813
parity              -0.454428     0.109973  -4.13    <1e-04  -0.66997    -0.238886
induced              0.721496     0.169951   4.25    <1e-04   0.388398    1.05459
spontaneous          1.17378      0.169791   6.91    <1e-11   0.840996    1.50656
───────────────────────────────────────────────────────────────────────────────────

coeftable(probit) |> println

───────────────────────────────────────────────────────────────────────────────────
                         Coef.  Std. Error      z  Pr(>|z|)   Lower 95%   Upper 95%
───────────────────────────────────────────────────────────────────────────────────
(Intercept)         -0.641326     0.826036  -0.78    0.4375  -2.26033     0.977675
education: 6-11yrs  -0.570558     0.462537  -1.23    0.2174  -1.47711     0.335997
education: 12+ yrs  -0.79871      0.485883  -1.64    0.1002  -1.75102     0.153602
age                  0.0205609    0.018276   1.13    0.2606  -0.0152595   0.0563813
parity              -0.454428     0.109973  -4.13    <1e-04  -0.66997    -0.238886
induced              0.721496     0.169951   4.25    <1e-04   0.388398    1.05459
spontaneous          1.17378      0.169791   6.91    <1e-11   0.840996    1.50656
───────────────────────────────────────────────────────────────────────────────────

coefnames(probit) |> println

["(Intercept)", "education: 6-11yrs", "education: 12+ yrs", "age", "parity", "induced", "spontaneous"]

coef(probit) |> println

[-0.641326177377741, -0.5705584682163576, -0.7987104417029449, 0.02056089588624572, -0.4544282806759491, 0.7214959235620557, 1.1737802352425202]

stderror(probit) |>  println

[0.826036440470636, 0.462536619626669, 0.4858825716948646, 0.018276028711565157, 0.1099725106776763, 0.16995126103459288, 0.16979106512772785]

confint(probit; level=0.95)

7×2 Matrix{Float64}:
 -2.26033     0.977675
 -1.47711     0.335997
 -1.75102     0.153602
 -0.0152595   0.0563813
 -0.66997    -0.238886
  0.388398    1.05459
  0.840996    1.50656

using StatsBase
aic(probit), aicc(probit), bic(probit)

(273.22520833410056, 273.6918750007672, 297.8192095572554)

deviance(probit), loglikelihood(probit)

(259.2252083341008, -129.61260416705028)

nobs(probit), dof_residual(probit)

(248.0, 242.0)

pred = fitted(probit) # = GLM.predict(probit)
pred[1:10] |> println

[0.5931842699010288, 0.6876850183502383, 0.13071257320292726, 0.37563052991672463, 0.5158490078573164, 0.6283046745030185, 0.11635396599829256, 0.07176207768915337, 0.47577761145781267, 0.06116876315256316]

GLM.predict(probit, infert)[1:10] |> println

Union{Missing, Float64}[0.5931842699010288, 0.6876850183502383, 0.13071257320292728, 0.37563052991672463, 0.5158490078573164, 0.6283046745030185, 0.11635396599829259, 0.07176207768915337, 0.4757776114578127, 0.06116876315256316]

probit.model.rr.y[1:10] |> println

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

vcov(probit)

7×7 Matrix{Float64}:
  0.682336    -0.243392     -0.283966    …  -0.00987559   -0.00464972
 -0.243392     0.21394       0.205956       -0.00949874   -0.0153412
 -0.283966     0.205956      0.236082       -0.0145149    -0.0206264
 -0.0118327    0.000775951   0.00165048      0.000541801   0.000525446
 -0.0204892    0.0169133     0.0212583      -0.0121734    -0.0119004
 -0.00987559  -0.00949874   -0.0145149   …   0.0288834     0.0179336
 -0.00464972  -0.0153412    -0.0206264       0.0179336     0.028829

linearpredictor = probit.model.rr.eta
linearpredictor[1:10] |> println

[0.23574382565604912, 0.4892990927306858, -1.123029074745741, -0.3169769928250714, 0.03973802720123021, 0.3273665659735825, -1.193412320886396, -1.4627925385861338, -0.0607538774163674, -1.5450361221311166]

scatter(linearpredictor, pred,
    xlab="Linear Predictor", ylab="Probablity")

y = probit.model.rr.y
y[1:10] |> println

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

using FreqTables
freqtable(y, pred .> 0.5)

2×2 Named Matrix{Int64}
Dim1 ╲ Dim2 │ false   true
────────────┼─────────────
0.0         │   149     16
1.0         │    44     39

using RCall
iris = rcopy(R"infert");
first(infert, 5)

using DataFrames, CategoricalArrays, GLM, Plots
logit = glm(@formula(case ~ education + age + parity + induced + spontaneous), infert, Binomial(), LogitLink())

StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, LogitLink}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

case ~ 1 + education + age + parity + induced + spontaneous

Coefficients:
─────────────────────────────────────────────────────────────────────────────────
                        Coef.  Std. Error      z  Pr(>|z|)   Lower 95%  Upper 95%
─────────────────────────────────────────────────────────────────────────────────
(Intercept)         -1.14924    1.4122     -0.81    0.4158  -3.91709     1.61862
education: 6-11yrs  -1.04424    0.79255    -1.32    0.1876  -2.59761     0.509125
education: 12+ yrs  -1.40321    0.834156   -1.68    0.0925  -3.03812     0.231711
age                  0.039582   0.0312025   1.27    0.2046  -0.0215738   0.100738
parity              -0.828277   0.196489   -4.22    <1e-04  -1.21339    -0.443165
induced              1.28876    0.30146     4.28    <1e-04   0.697906    1.87961
spontaneous          2.04591    0.310157    6.60    <1e-10   1.43801     2.6538
─────────────────────────────────────────────────────────────────────────────────

coeftable(logit) |> println

─────────────────────────────────────────────────────────────────────────────────
                        Coef.  Std. Error      z  Pr(>|z|)   Lower 95%  Upper 95%
─────────────────────────────────────────────────────────────────────────────────
(Intercept)         -1.14924    1.4122     -0.81    0.4158  -3.91709     1.61862
education: 6-11yrs  -1.04424    0.79255    -1.32    0.1876  -2.59761     0.509125
education: 12+ yrs  -1.40321    0.834156   -1.68    0.0925  -3.03812     0.231711
age                  0.039582   0.0312025   1.27    0.2046  -0.0215738   0.100738
parity              -0.828277   0.196489   -4.22    <1e-04  -1.21339    -0.443165
induced              1.28876    0.30146     4.28    <1e-04   0.697906    1.87961
spontaneous          2.04591    0.310157    6.60    <1e-10   1.43801     2.6538
─────────────────────────────────────────────────────────────────────────────────

coefnames(logit) |> println

["(Intercept)", "education: 6-11yrs", "education: 12+ yrs", "age", "parity", "induced", "spontaneous"]

coef(logit) |> println

[-1.1492365368637094, -1.0442435817853968, -1.4032050872379318, 0.0395820016989101, -0.8282773808099847, 1.2887573786822106, 2.0459050192518973]

stderror(logit) |> println

[1.4121953744517868, 0.7925497626709377, 0.8341561759690906, 0.031202533064173518, 0.1964894921449399, 0.30146019277606595, 0.3101565211574761]

confint(logit; level=0.95)

7×2 Matrix{Float64}:
 -3.91709     1.61862
 -2.59761     0.509125
 -3.03812     0.231711
 -0.0215738   0.100738
 -1.21339    -0.443165
  0.697906    1.87961
  1.43801     2.6538

using StatsBase
aic(logit), aicc(logit), bic(logit)

(271.79769020552897, 272.2643568721956, 296.3916914286838)

deviance(logit), loglikelihood(logit)

(257.79769020552897, -128.89884510276448)

nobs(logit), dof_residual(logit)

(248.0, 242.0)

nobs(logit), dof_residual(logit)

(248.0, 242.0)

vcov(logit)

7×7 Matrix{Float64}:
  1.9943     -0.707598    -0.827706    …  -0.0248236   -0.0203873
 -0.707598    0.628135     0.606138       -0.0376174   -0.0505133
 -0.827706    0.606138     0.695817       -0.0524051   -0.0676415
 -0.0347921   0.00241636   0.00493288      0.00158694   0.00174851
 -0.0573387   0.0500454    0.0642437      -0.0403615   -0.041082
 -0.0248236  -0.0376174   -0.0524051   …   0.0908782    0.0625116
 -0.0203873  -0.0505133   -0.0676415       0.0625116    0.0961971

pred = fitted(logit) # = GLM.predict(logit)
pred[1:10] |> println

[0.5721916418849309, 0.7258538907308169, 0.11944588118476783, 0.3684101759215754, 0.5104285364858039, 0.6322269044425685, 0.10799647849488257, 0.07021373265533512, 0.4639052859413472, 0.06055490896146651]

linearpredictor = logit.model.rr.eta
linearpredictor[1:10] |> println

[0.2907986396340503, 0.9736875323627409, -1.9976879981017022, -0.5390432449762836, 0.04172019631690116, 0.5417821958880373, -2.1113714603841585, -2.583410825903952, -0.14463044453008145, -2.7417388326995926]

scatter(linearpredictor, pred,
    xlab="Linear Predictor", ylab="Probablity")

y = logit.model.rr.y
y[1:10] |> println

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

using FreqTables
freqtable(y, pred .> 0.5)

2×2 Named Matrix{Int64}
Dim1 ╲ Dim2 │ false   true
────────────┼─────────────
0.0         │   148     17
1.0         │    42     41

x = [-2.1, -1.5, -0.7, 0.5, 1.4,  2.3,  3.5, 4.2]
y = [-7.5, -2.0,  3.8, 5.6, 3.7, -1.7, -2.6, 2.3]
scatter(x, y)

using DataFrames, GLM
df = DataFrame(
        x=x,
        x2=x.^2,
        x3=x.^3,
        x4=x.^4,
        y=y)
a3 = lm(@formula(y ~ x + x2 + x3), df)
a4 = lm(@formula(y ~ x + x2 + x3 + x4), df)

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

y ~ 1 + x + x2 + x3 + x4

Coefficients:
────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error       t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   5.84207     0.366164    15.95    0.0005   4.67677     7.00736
x             1.26578     0.362788     3.49    0.0398   0.111223    2.42033
x2           -2.6176      0.140454   -18.64    0.0003  -3.06459    -2.17061
x3            0.111094    0.0855336    1.30    0.2848  -0.161112    0.3833
x4            0.0938633   0.0195116    4.81    0.0171   0.0317687   0.155958
────────────────────────────────────────────────────────────────────────────

minx, maxx = extrema(x)
newx = minx:0.1:maxx
df2 = DataFrame(x=newx, x2=newx.^2, x3=newx.^3, x4=newx.^4)
pred3 = GLM.predict(a3, df2);
pred4 = GLM.predict(a4, df2);

plot!(newx, [pred3, pred4], label=["order 3" "order 4"])

# import Pkg; Pkg.add("Polynomials")

using Polynomials;

WARNING: using Polynomials.fit in module Main conflicts with an existing identifier.

f2 = Polynomials.fit(x, y, 2); # 2次式で予測
# 0.9313226293358072 - 0.5231411796668045∙x + 0.06976795086638117∙x2

f3 = Polynomials.fit(x, y, 3); # 3次式で予測
# 0.9917995957122127 - 0.7993193261190567∙x + 0.22096036680739517∙x2 - 0.02015898879213521∙x3

f4 = Polynomials.fit(x, y, 4); # 4次式で予測
# 0.9995995032131947 - 0.9293177844687606∙x + 0.360708709533328∙x2 - 0.06565844921453219∙x3 + 0.004549946042239714∙x4

f5 = Polynomials.fit(x, y); # 最高次数 = length(x) - 1 = 5次式で予測
# 1.0 - 0.9762026083107394∙x + 0.4413086878778398∙x2 - 0.11144858183923873∙x3 + 0.015062986695871163∙x4 - 0.0008410432522905108∙x5

scatter(x, y, grid=false, tick_direction=:out,
        markerstrokewidth=0, size=(400, 300),
        xlabel="\$x\$", ylabel="\$y\$", label="Data", legend=:bottom)
plot!(f2, extrema(x)..., label="Order2")
plot!(f3, extrema(x)..., label="Order3")
plot!(f4, extrema(x)..., label="Order4")
plot!(f5, extrema(x)..., label="Order5")

scatter(x, y, grid=false, tick_direction=:out,
        markerstrokewidth=0, size=(400, 300),
        xlims=(-3,5),
        xlabel="\$x\$", ylabel="\$y\$", label="Data")
plot!(f2, [-3,5]..., label="Order2")
plot!(f3, [-3,5]..., label="Order3")
plot!(f4, [-3,5]..., label="Order4")
plot!(f5, [-3,5]..., label="Order5")

using Plots
x2 = 0:5
y2 = [2,6,9,4,11,7]
f52 = Polynomials.fit(x2, y2); # degree = length(x) - 1 = 5
scatter(x2, y2, grid=false, tick_direction=:out,
        markerstrokewidth=0, size=(400, 300),
        xlabel="\$x\$", ylabel="\$y\$", label="")
plot!(f52, [-0.3,5.3]..., label="")

using Random; Random.seed!(123)
using DataFrames, GLM, Plots
x = 0:0.5:10
y = round.(1.2 .* 1.46 .^ x .+ max.(2randn(length(x)), 0), digits=1)
Y = log.(y)
df =  DataFrame(x=x, Y=Y)
result = lm(@formula(Y ~ x), df)
"x=" |> print; x |> println
"y=" |> print; y |> println

x=0.0:0.5:10.0
y=[2.8, 1.4, 1.8, 2.1, 3.1, 3.6, 3.7, 4.5, 5.6, 6.6, 10.4, 10.2, 11.6, 14.7, 17.0, 20.5, 24.8, 31.3, 36.5, 44.0, 54.8]

A, B = coef(result)

2-element Vector{Float64}:
 0.3812532472123365
 0.35448017218895966

a, b = exp(A), exp(B)

(1.46411834235243, 1.4254394785723803)

x2 = -0.5:0.1:10.5
y2 = a .* b .^ x2;

scatter(x, y)
plot!(x2, y2, xlabel="\$x\$", ylabel="\$a b^x\$")
annotate!(7.5, 10, text("\$a = $(round(a, digits=5))\$\n\$b = $(round(b, digits=5))\$", 10, :left))

using Random; Random.seed!(123)
using DataFrames, GLM, Plots
n = 20
x1 = round.(randn(n).*5 .+ 20, digits=2)
x2 = round.(randn(n).*7 .+ 30, digits=2)
y = round.(2.1 .* 1.31 .^ x1 .* 1.65 .^ x2 .+ max.(2randn.(n), 0), digits=1)
Y = log.(y)
df =  DataFrame(x1=x1, x2=x2, Y=Y)
result = lm(@formula(Y ~ x1 + x2), df)
"x1=" |> print; x1 |> println
"x2=" |> print; x2 |> println
"y=" |> print; y |> println

x1=[24.04, 14.39, 14.48, 17.92, 21.44, 21.15, 17.89, 13.22, 20.35, 19.41, 26.1, 21.46, 19.84, 21.58, 9.19, 15.55, 14.56, 23.52, 20.72, 20.74]
x2=[47.9, 23.87, 24.77, 27.97, 30.87, 30.22, 31.63, 21.14, 29.71, 23.26, 26.95, 26.32, 17.07, 29.01, 24.62, 35.12, 21.61, 42.78, 30.98, 40.2]
y=[3.6216140818127e13, 1.58889423e7, 2.55496517e7, 3.211787716e8, 3.5501379699e9, 2.3706748494e9, 1.9916816858e9, 2.9522722e6, 1.4795789504e9, 4.54079281e7, 1.7546577082e9, 3.656273807e8, 2.2979085e6, 1.4525900889e9, 5.6806392e6, 6.0788868978e9, 5.3643544e6, 2.4232783487002e12, 3.088397078e9, 3.142537899387e11]

A, B1, B2 = coef(result)

3-element Vector{Float64}:
 0.7419374137437735
 0.270027135506774
 0.5007752870315855

a, b1, b2 = exp(A), exp(B1), exp(B2)

(2.1000001449302372, 1.3099999977647652, 1.6499999985465088)

yhat = a .* (b1 .^ x1) .* (b2 .^ x2);
yhat[1:10] |> println

[3.621614030385744e13, 1.5888942660828743e7, 2.5549650504968673e7, 3.211787758893171e8, 3.5501379884848666e9, 2.3706748628261786e9, 1.991681705147434e9, 2.952272329508554e6, 1.479578961853971e9, 4.5407928770480216e7]

scatter(predict(result), Y, aspect_ratio=1,
    xlabel="\$\\log(predicted): Y\$", ylabel="\$\\log(observed): Y\$")
#plot!(x2, y, xlabel="\$x\$", ylabel="\$a b^x\$")
#annotate!(7.5, 10, text("\$a = $(round(a, digits=5))\$\n\$b = $(round(b, digits=5))\$", 10, :left))

using Random; Random.seed!(123)
using DataFrames, GLM, Plots
x = 0.5:0.5:10
y = round.(0.6 .* x .^ 2.15 .+ max.(2randn(length(x)), 0), digits=1)
X = log.(x)
Y = log.(y)
df =  DataFrame(X=X, Y=Y)
result = lm(@formula(Y ~ X), df)
"x=" |> print; x |> println
"y=" |> print; y |> println

x=0.5:0.5:10.0
y=[1.8, 0.6, 1.4, 2.7, 4.9, 6.8, 8.9, 11.8, 15.4, 19.1, 25.9, 28.8, 33.6, 40.0, 45.7, 52.5, 59.8, 69.0, 76.2, 85.0]

A, b = coef(result)

2-element Vector{Float64}:
 0.25904728487209333
 1.7129464560695171

a = exp(A)

1.2956950701552397

x2 = 0:0.1:10
y2 = a .* x2 .^ b;

scatter(x, y, xlabel="\$x\$", ylabel="\$a x^b\$")
plot!(x2, y2, label="")
annotate!(7.5, 10, text("\$a = $(round(a, digits=5))\$\n\$b = $(round(b, digits=5))\$", 10, :left))

using Random; Random.seed!(123)
using DataFrames, GLM, Plots
n = 20
x1 = round.(randn(n).*5 .+ 20, digits=2)
x2 = round.(randn(n).*7 .+ 30, digits=2)
y = round.(0.14 .* x1 .^ 1.12 .* x2 .^ 1.22 .+ max.(2randn.(n), 0), digits=1)
X1 = log.(x1)
X2 = log.(x2)
Y = log.(y)
df =  DataFrame(X1=X1, X2=X2, Y=Y)
result = lm(@formula(Y ~ X1 + X2), df)
"x1=" |> print; x1 |> println
"x2=" |> print; x2 |> println
"y=" |> print; y |> println

x1=[24.04, 14.39, 14.48, 17.92, 21.44, 21.15, 17.89, 13.22, 20.35, 19.41, 26.1, 21.46, 19.84, 21.58, 9.19, 15.55, 14.56, 23.52, 20.72, 20.74]
x2=[47.9, 23.87, 24.77, 27.97, 30.87, 30.22, 31.63, 21.14, 29.71, 23.26, 26.95, 26.32, 17.07, 29.01, 24.62, 35.12, 21.61, 42.78, 30.98, 40.2]
y=[553.1, 133.1, 142.0, 206.6, 284.7, 274.7, 241.3, 104.4, 256.8, 180.3, 303.3, 234.6, 126.7, 265.8, 83.6, 234.1, 120.3, 474.6, 278.2, 378.5]

A, b1, b2 = coef(result)

3-element Vector{Float64}:
 -1.972018639081411
  1.1205104869793496
  1.2224005751554017

a = exp(A)

0.13917562710046374

yhat = a .* (x1 .^ b1) .* (x2 .^ b2);
yhat[1:10] |> println

[555.8502852686023, 133.49654946522986, 140.65412894394512, 207.19499887038816, 285.77546960915873, 274.22065013392836, 240.3523233305722, 104.64630156102172, 257.2174235978244, 180.86469599578948]

scatter(predict(result), Y, aspect_ratio=1,
    xlabel="\$\\log(predicted): Y\$", ylabel="\$\\log(observed): Y\$")
#plot!(x2, y, xlabel="\$x\$", ylabel="\$a b^x\$")
#annotate!(7.5, 10, text("\$a = $(round(a, digits=5))\$\n\$b = $(round(b, digits=5))\$", 10, :left))

# import Pkg; Pkg.add("LsqFit")

using LsqFit

t = 1:0.5:10
y = 1.2 .* exp.(1.4 .* t)
y .*= (1 .+ randn(length(y))*0.15)
m(t, p) = p[1] * exp.(p[2] * t)
p0 = [0.5, 0.5]
result = curve_fit(m, t, y, p0)

LsqFit.LsqFitResult{Vector{Float64}, Vector{Float64}, Matrix{Float64}, Vector{Float64}, Vector{LsqFit.LMState{LsqFit.LevenbergMarquardt}}}([1.4883171764406258, 1.3770024859336887], [1.6217019409794613, 3.2841237421949856, 1.1250600339121526, 1.0151142943585043, 20.409671207816643, 15.889881025589858, 32.854605018537995, 77.80097948639286, 309.54899091077823, 356.9500301508024, 429.21525916980227, -1819.7318621828508, -323.68120275048204, -860.0993893015839, -7704.080745035608, -21494.201299125154, -51321.85159699916, 72230.47709605598, -20073.59728201572], [3.9630046434970163 5.898207881435509; 7.8892709505656375 17.612606198763782; … ; 479991.47214770067 6.786605756335687e6; 955533.2679027494 1.4221365769609815e7], true, Iter     Function value   Gradient norm 
------   --------------   --------------
, Float64[])

dof(result)

17

result.param

2-element Vector{Float64}:
 1.4883171764406258
 1.3770024859336887

result.resid

19-element Vector{Float64}:
      1.6217019409794613
      3.2841237421949856
      1.1250600339121526
      1.0151142943585043
     20.409671207816643
     15.889881025589858
     32.854605018537995
     77.80097948639286
    309.54899091077823
    356.9500301508024
    429.21525916980227
  -1819.7318621828508
   -323.68120275048204
   -860.0993893015839
  -7704.080745035608
 -21494.201299125154
 -51321.85159699916
  72230.47709605598
 -20073.59728201572

result.jacobian

19×2 Matrix{Float64}:
     3.963          5.89821
     7.88927       17.6126
    15.7054        46.7493
    31.2652       116.331
    62.2406       277.901
   123.904        645.431
   246.66        1468.43
   491.033       3288.66
   977.514       7274.25
  1945.97       15929.2
  3873.89       34593.5
  7711.87       74605.1
 15352.3            1.59943e5
 30562.2            3.41147e5
 60841.0            7.24406e5
     1.21118e5      1.53223e6
     2.41113e5      3.22968e6
     4.79991e5      6.78661e6
     9.55533e5      1.42214e7

using Plots
scatter(t, y)
t2 = 1:0.1:10
y2 = m(t2, result.param)
plot!(t2, y2)

using RCall
iris = rcopy(R"iris");
iris[[51, 101], :]

using MultivariateStats

Xp = float.(Matrix(iris[51:100, 1:4])');
Xn = float.(Matrix(iris[101:150, 1:4])');
f = MultivariateStats.fit(LinearDiscriminant, Xp, Xn);

f.w |> println

[0.5002224138767678, 0.7846776066269631, -0.9804041999057049, -1.7421957418876874]

f.b

2.343796226212194

length(f)

4

fp = evaluate(f, Xp);
fn = evaluate(f, Xn);

pp = predict(f, Xp);
pn = predict(f, Xn);

pred = ["Versicolor", "Virginica"][2 .- vcat(pp, pn)] # pp, pn が 1 のとき Versicolor
group =  repeat(["Versicolor", "Virginica"], inner=50) # 最初の 50 個が Versicolor
using FreqTables
freqtable(group, pred)

2×2 Named Matrix{Int64}
Dim1 ╲ Dim2 │ Versicolor   Virginica
────────────┼───────────────────────
Versicolor  │         48           2
Virginica   │          1          49

using Plots
gr(grid=false, tick_direction=:out, alpha=0.4, label="")
histogram(fp, bins=20, legend=:topleft, label="Versicolor")
histogram!(fn, bins=20, label="Virsinica")

using DataFrames
df = DataFrame(group=group, discriminantvalue = vcat(fp, fn));

first(df, 5)

using StatsPlots
@df df groupedhist(:discriminantvalue, group=:group, bins=20, bin_width=0.01,
                   bar_position=:dodge, xlabel="Discriminant value",
                   legend=:topleft)

using RCall
iris = rcopy(R"iris");
iris[[1, 51, 101], :]

X = float.(Matrix(iris[:, 1:4])');

y = repeat(1:3, inner=50);

f = MultivariateStats.fit(MulticlassLDA, X, y);

projection(f)

4×2 Matrix{Float64}:
 -0.0684058  -0.00198759
 -0.12656    -0.178527
  0.181553    0.0768624
  0.2318     -0.23417

using Statistics
mean(f) |> println

[5.843333333333332, 3.0573333333333337, 3.7579999999999996, 1.1993333333333331]

classmeans(f)

4×3 Matrix{Float64}:
 5.006  5.936  6.588
 3.428  2.77   2.974
 1.462  4.26   5.552
 0.246  1.326  2.026

withclass_scatter(f)

4×4 Matrix{Float64}:
 38.9562  13.63    24.6246  5.645
 13.63    16.962    8.1208  4.8084
 24.6246   8.1208  27.2226  6.2718
  5.645    4.8084   6.2718  6.1566

betweenclass_scatter(f)

4×4 Matrix{Float64}:
  63.2121  -19.9527  165.248    71.2793
 -19.9527   11.3449  -57.2396  -22.9327
 165.248   -57.2396  437.103   186.774
  71.2793  -22.9327  186.774    80.4133

score = MultivariateStats.transform(f, X)

2×150 Matrix{Float64}:
 -0.491296  -0.414335  -0.444121  …   0.583358   0.659107   0.559886
 -0.574208  -0.484547  -0.527541     -0.617156  -0.742849  -0.576815

palette = [:red, :green, :blue]
color = repeat(palette, inner=50)
scatter(score[1,:], score[2,:], color=color, label="")

centroid = f.pmeans

2×3 Matrix{Float64}:
 -0.453834   0.324155   0.650563
 -0.567173  -0.489394  -0.591722

scatter!(centroid[1,:], centroid[2,:], color=palette,
    markershape=:star5, markersize=7)

using RCall
iris = rcopy(R"iris");

using MultivariateStats
x = Matrix(iris[:,1:4])' # データ行列の転置

4×150 adjoint(::Matrix{Float64}) with eltype Float64:
 5.1  4.9  4.7  4.6  5.0  5.4  4.6  5.0  …  6.8  6.7  6.7  6.3  6.5  6.2  5.9
 3.5  3.0  3.2  3.1  3.6  3.9  3.4  3.4     3.2  3.3  3.0  2.5  3.0  3.4  3.0
 1.4  1.4  1.3  1.5  1.4  1.7  1.4  1.5     5.9  5.7  5.2  5.0  5.2  5.4  5.1
 0.2  0.2  0.2  0.2  0.2  0.4  0.3  0.2     2.3  2.5  2.3  1.9  2.0  2.3  1.8

pca = MultivariateStats.fit(PCA, x)
println(pca)

PCA(indim = 4, outdim = 3, principalratio = 0.9947878161267246)

indim(pca) |> println

4

outdim(pca) |> println

3

using Statistics
mean(pca) |> println

[5.843333333333334, 3.0573333333333332, 3.7580000000000005, 1.1993333333333336]

proj = projection(pca)

4×3 Matrix{Float64}:
 -0.361387    0.656589   0.58203
  0.0845225   0.730161  -0.597911
 -0.856671   -0.173373  -0.0762361
 -0.358289   -0.075481  -0.545831

principalvars(pca)　|> println

[4.228241706034861, 0.24267074792863338, 0.07820950004291928]

tvar(pca) # = tr(cov(x'))

4.572957046979863

tprincipalvar(pca) # = sum(principalvars(pca))

4.549121954006414

tresidualvar(pca) # = tr(cov(x')) - sum(principalvars(pca))

0.023835092973449434

principalratio(pca)

0.9947878161267246

score = MultivariateStats.transform(pca, x)

3×150 Matrix{Float64}:
 2.68413     2.71414    2.88899    …  -1.76435    -1.90094   -1.39019
 0.319397   -0.177001  -0.144949       0.0788589   0.116628  -0.282661
 0.0279148   0.210464  -0.0179003     -0.130482   -0.723252  -0.36291

var(score, dims=2) |> println

[4.228241706034863; 0.24267074792863344; 0.07820950004291934;;]

x2 = reconstruct(pca, score)

4×150 Matrix{Float64}:
 5.09929   4.86876   4.6937    …  6.21975  6.45678  6.18593  5.94891
 3.50072   3.03166   3.20638      2.58133  3.0438   3.41426  2.95043
 1.40109   1.44752   1.30958      5.12206  5.26574  5.4214   5.02561
 0.198295  0.125368  0.184951     1.70829  1.89675  2.26639  1.91685

dif = abs.(x .- x2)

4×150 Matrix{Float64}:
 0.00071377   0.0312416  0.00629977  …  0.0432221  0.0140692  0.0489127
 0.000723353  0.0316611  0.00638436     0.0438025  0.0142581  0.0495694
 0.00108561   0.0475168  0.00958161     0.0657386  0.0213986  0.0743936
 0.0017051    0.0746321  0.0150493      0.103252   0.0336096  0.116846

using Plots
# pyplot()

palette = [:red, :green, :blue]
color = repeat(palette, inner=50)
p = scatter(score[1,:], score[2,:], color=color, alpha=0.4, label="")

p = scatter(-x2[1,:], x2[2,:], color=color, alpha=0.4, label="")

loadings(pca)

4×3 Matrix{Float64}:
 -0.743108   0.323446    0.16277
  0.173801   0.359689   -0.167212
 -1.76155   -0.0854062  -0.0213202
 -0.736739  -0.0371832  -0.152647

function expandname(words; width=0, left = true)
    width == 0 && (width = maximum(length.(words)))
    blanks = ' ' ^ width
    results = String[]
    for name in words
        blks = blanks[1:width-length(name)]
        append!(results, left ? [name *  blks] : [blks * name])
    end
    results
end

using Printf
function output(M, n, vnames, method)
    p, nc = outdim(M), indim(M)
    lambda = principalvars(M)
    pl = sqrt.(lambda)' .* projection(M)
    cum = sum(pl .^2, dims=2)
    pl = vcat(pl, hcat(lambda, lambda./sum(nc), cumsum(lambda)./sum(nc))')
    header = expandname("PC" .* string.(1:p), width=8, left=false)
    side = expandname(vcat(vnames, "Eigenvalues", "Proportion", "Cum. Prop."))
    [@printf("%s ", h) for h in vcat(' ' ^ (length(side[1])-1), header)]
    println(" Communality")
    for i in 1:nc+3
        @printf("%s", side[i])
        [@printf("%8.3f ", pl[i, j]) for j in 1:p]
        i <= nc && @printf(" %8.3f", cum[i])
        println()
    end
end

import StatsBase
function prcmp(x::DataFrame; method=:cov, center=true, scale=true, maxoutdim=size(x, 2), pratio=1)
    vnames = names(x)
    n = nrow(x)
    x = Matrix(x)
    if scale
        temp = MultivariateStats.fit(StatsBase.ZScoreTransform, x, dims=1; center, scale)
        x = StatsBase.transform(temp, x)
    end
    M = MultivariateStats.fit(PCA, x'; method, maxoutdim, pratio)
    output(M, n, vnames, method)
    M
end;

prcmp(iris[:, 1:4], maxoutdim=3, method=:svd)

                 PC1      PC2      PC3  Communality
Sepal_Length  -0.890   -0.361    0.276     0.999
Sepal_Width    0.460   -0.883   -0.094     1.000
Petal_Length  -0.992   -0.023   -0.054     0.987
Petal_Width   -0.965   -0.064   -0.243     0.994
Eigenvalues    2.918    0.914    0.147 
Proportion     0.730    0.229    0.037 
Cum. Prop.     0.730    0.958    0.995

PCA(indim = 4, outdim = 3, principalratio = 0.9948212908928452)

Pattern matrix (unstandardized loadings):
───────────────────────────────────
         PC1        PC2         PC3
───────────────────────────────────
1   0.890169  0.36083    -0.275658
2  -0.460143  0.882716    0.0936199
3   0.991555  0.0234152   0.054447
4   0.964979  0.0639998   0.242983
───────────────────────────────────

Importance of components:
────────────────────────────────────────────────────────
                                PC1       PC2        PC3
────────────────────────────────────────────────────────
SS Loadings (Eigenvalues)  2.9185    0.91403   0.146757
Variance explained         0.729624  0.228508  0.0366892
Cumulative variance        0.729624  0.958132  0.994821
Proportion explained       0.733423  0.229697  0.0368802
Cumulative proportion      0.733423  0.96312   1.0
────────────────────────────────────────────────────────

prcmp(iris[:, 1:4], maxoutdim=3, method=:cov)

                 PC1      PC2      PC3  Communality
Sepal_Length  -0.890    0.361   -0.276     0.999
Sepal_Width    0.460    0.883    0.094     1.000
Petal_Length  -0.992    0.023    0.054     0.987
Petal_Width   -0.965    0.064    0.243     0.994
Eigenvalues    2.918    0.914    0.147 
Proportion     0.730    0.229    0.037 
Cum. Prop.     0.730    0.958    0.995

PCA(indim = 4, outdim = 3, principalratio = 0.9948212908928453)

Pattern matrix (unstandardized loadings):
───────────────────────────────────
         PC1        PC2         PC3
───────────────────────────────────
1   0.890169  0.36083    -0.275658
2  -0.460143  0.882716    0.0936199
3   0.991555  0.0234152   0.054447
4   0.964979  0.0639998   0.242983
───────────────────────────────────

Importance of components:
────────────────────────────────────────────────────────
                                PC1       PC2        PC3
────────────────────────────────────────────────────────
SS Loadings (Eigenvalues)  2.9185    0.91403   0.146757
Variance explained         0.729624  0.228508  0.0366892
Cumulative variance        0.729624  0.958132  0.994821
Proportion explained       0.733423  0.229697  0.0368802
Cumulative proportion      0.733423  0.96312   1.0
────────────────────────────────────────────────────────

using RCall
iris = rcopy(R"iris");

x = Matrix(iris[:,1:4])' # データ行列の転置

4×150 adjoint(::Matrix{Float64}) with eltype Float64:
 5.1  4.9  4.7  4.6  5.0  5.4  4.6  5.0  …  6.8  6.7  6.7  6.3  6.5  6.2  5.9
 3.5  3.0  3.2  3.1  3.6  3.9  3.4  3.4     3.2  3.3  3.0  2.5  3.0  3.4  3.0
 1.4  1.4  1.3  1.5  1.4  1.7  1.4  1.5     5.9  5.7  5.2  5.0  5.2  5.4  5.1
 0.2  0.2  0.2  0.2  0.2  0.4  0.3  0.2     2.3  2.5  2.3  1.9  2.0  2.3  1.8

fa = MultivariateStats.fit(FactorAnalysis, x, maxoutdim=2)

Factor Analysis(indim = 4, outdim = 2)

indim(fa)

4

outdim(fa)

2

using Statistics
mean(fa) |> println

[5.843333333333334, 3.0573333333333332, 3.7580000000000005, 1.1993333333333336]

projection(fa)

4×2 Matrix{Float64}:
 -0.355      -0.465066
  0.0849544  -0.876323
 -0.859451    0.120379
 -0.357914   -0.0357892

loadings(fa)

4×2 Matrix{Float64}:
  0.715901  0.252173
 -0.196345  0.389136
  1.76477   0.0432933
  0.732752  0.0571334

cov(fa)

4×4 Matrix{Float64}:
  0.685694  -0.042434   1.27432    0.538985
 -0.042434   0.189979  -0.329656  -0.12164
  1.27432   -0.329656   3.11628    1.29561
  0.538985  -0.12164    1.29561    0.590566

var(fa)

4-element Vector{Float64}:
 0.1095886787924468
 1.0e-6
 1.0e-6
 0.050377258200117764

score = MultivariateStats.transform(fa, x)

2×150 Matrix{Float64}:
 -1.34738   -1.31625  -1.38467   -1.2665   …  0.810685  0.89772  0.754712
  0.457716  -0.81146  -0.332034  -0.52939     0.261709  1.33353  0.233459

using Plots
# pyplot()
palette = [:red, :green, :blue]
col = repeat(palette, inner=50)
p = scatter(-score[1,:], score[2,:], color=col, alpha=0.4, label="")

score2 = reconstruct(fa, score)

4×150 Matrix{Float64}:
 5.02863  4.58824   4.71331   4.7274    …  5.98693  6.52949  6.98397  6.4784
 3.5      3.0       3.2       3.1          2.5      3.0      3.4      3.0
 1.4      1.4       1.3       1.5          5.0      5.2      5.4      5.1
 0.2287   0.173637  0.152431  0.227879     1.67488  1.81641  1.94692  1.77318

using Plots
p = scatter(-score2[1,:], score2[2,:], color=col, alpha=0.4, label="")

using RCall
iris = rcopy(R"iris");

function dissimilality(X)
    nr,  nc = size(X)
    d = zeros(nr, nr)
    for i in 1:nr-1
        for j in i+1:nr
            d[i, j] = d[j, i] = -sum((X[i, :] .- X[j, :]).^2)
        end
    end
    d
end;

using MultivariateStats
X = Matrix(iris[:, 1:4]);
d = dissimilality(X);
result = classical_mds(d, 2)

2×150 Matrix{Float64}:
 13.3824    13.9704  15.7721   14.4231   …  -7.43229   -8.7037    -5.34469
  0.939982  -1.4282  -1.30943  -2.00764      0.252773   0.404985  -0.629019

palette = [:red, :green, :blue]
color = repeat(palette, inner=50)
scatter(-result[1, :], result[2, :], color=color)
for i in 1:150
    annotate!(-result[1, i], result[2, i], text(string(i), 6))
end
plot!(label="")

using LinearAlgebra
function princo(s)
    n = size(s, 1)
    m = vec(sum(s, dims=1)) ./ n
    h = s .+ (sum(s) / n ^ 2) .- (m .+ m')
    values, vectors = eigen(h, sortby=x-> -x)
    values = values[values .> 1e-5]
    ax = length(values)
    sqrt.(values)' .* vectors[:, 1:ax]
end

princo (generic function with 1 method)

p = princo(d);
palette = [:red, :green, :blue]
color = repeat(palette, inner=50)
scatter(-p[:,1], -p[:,2], color=color)

using RCall
iris = rcopy(R"iris");

# using Random; Random.seed!(123) # 毎回同じ結果にするためには乱数の種を設定する
using Clustering
X = float.(Matrix(iris[:, 1:4])'); # 転置データ行列
result = kmeans(X, 3, display=:iter);

  Iters               objv        objv-change | affected 
-------------------------------------------------------------
      0       1.710500e+02
      1       8.466759e+01      -8.638241e+01 |        2
      2       8.260681e+01      -2.060777e+00 |        2
      3       8.154360e+01      -1.063210e+00 |        2
      4       8.080638e+01      -7.372268e-01 |        2
      5       7.987358e+01      -9.327962e-01 |        2
      6       7.934436e+01      -5.292157e-01 |        2
      7       7.892131e+01      -4.230544e-01 |        2
      8       7.885567e+01      -6.564390e-02 |        0
      9       7.885567e+01       0.000000e+00 |        0
K-means converged with 9 iterations (objv = 78.85566582597667)

nclusters(result)

3

result.centers

4×3 Matrix{Float64}:
 5.88361  5.006  6.85385
 2.74098  3.428  3.07692
 4.38852  1.462  5.71538
 1.43443  0.246  2.05385

assignments(result) |> println

[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 1, 3, 1, 3, 1, 3, 3, 1, 1, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 1]

using FreqTables
freqtable(iris.Species, result.assignments)

3×3 Named Matrix{Int64}
Dim1 ╲ Dim2 │  1   2   3
────────────┼───────────
setosa      │  0  50   0
versicolor  │ 47   0   3
virginica   │ 14   0  36

using Plots
palette = [:red, :green, :blue]
color = repeat(palette, inner=50)
# 主成分分析の第 1 主成分への寄与率が高い 2 変数をプロットする
# color は k-means の結果から色分けする
scatter(iris.Sepal_Length, iris.Petal_Length, color=color,
    xlabel="Sepal Length", ylabel="Petal Length")

round.(result.costs, digits=3)[1:10] |> println

[0.02, 0.2, 0.174, 0.276, 0.036, 0.458, 0.172, 0.004, 0.652, 0.142]

counts(result)

3-element Vector{Int64}:
 61
 50
 39

result.wcounts

3-element Vector{Int64}:
 61
 50
 39

result.totalcost, sum(result.costs)

(78.85566582597667, 78.85566582597667)

result.iterations

9

result.converged

true

min_totalcost = Inf
best_result = 0
for i in 1:1000 # 回数の指定は様子を見て適切に設定
    result = kmeans(X, 3);
    if result.totalcost < min_totalcost
        min_totalcost = result.totalcost
        best_result = result
    end
end
# best_result が最良の結果
println(best_result.totalcost)
# この後は，best_result について assignments(result) などで必要な情報を取り出す

78.851441426147

freqtable(iris.Species, best_result.assignments)

3×3 Named Matrix{Int64}
Dim1 ╲ Dim2 │  1   2   3
────────────┼───────────
setosa      │  0   0  50
versicolor  │  2  48   0
virginica   │ 36  14   0

using RCall
iris = rcopy(R"iris");

function distancematrix(X)
    nr,  nc = size(X)
    d = zeros(nr, nr)
    for i in 1:nr-1
        for j in i+1:nr
            # ユークリッド距離
            d[i, j] = d[j, i] = sqrt(sum((X[i, :] .- X[j, :]).^2))
        end
    end
    d
end;

# using Random; Random.seed!(123) # 毎回同じ結果にするためには乱数の種を設定する
using Clustering
x = Matrix(iris[1:15, 1:4]);

D = distancematrix(x)

15×15 Matrix{Float64}:
 0.0       0.538516  0.509902  0.648074  …  0.591608  0.994987  0.883176
 0.538516  0.0       0.3       0.331662     0.141421  0.678233  1.36015
 0.509902  0.3       0.0       0.244949     0.264575  0.5       1.36382
 0.648074  0.331662  0.244949  0.0          0.264575  0.519615  1.52971
 0.141421  0.608276  0.509902  0.648074     0.640312  0.974679  0.916515
 0.616441  1.09087   1.08628   1.16619   …  1.1619    1.57162   0.678233
 0.519615  0.509902  0.264575  0.331662     0.489898  0.616441  1.36015
 0.173205  0.424264  0.412311  0.5          0.469042  0.905539  1.04403
 0.921954  0.509902  0.43589   0.3          0.424264  0.34641   1.79165
 0.469042  0.173205  0.316228  0.316228     0.173205  0.728011  1.31149
 0.374166  0.866025  0.883176  1.0       …  0.932738  1.36748   0.583095
 0.374166  0.458258  0.374166  0.374166     0.458258  0.818535  1.23288
 0.591608  0.141421  0.264575  0.264575     0.0       0.583095  1.43178
 0.994987  0.678233  0.5       0.519615     0.583095  0.0       1.80831
 0.883176  1.36015   1.36382   1.52971      1.43178   1.80831   0.0

# result = hclust(D.^2, linkage=:ward_presquared); # R の ward.D2
result = hclust(D, linkage=:ward); # R の ward.D
# cutree()

result.merges

14×2 Matrix{Int64}:
  -1   -5
  -2  -13
 -10    2
  -8    1
  -3   -4
  -7  -12
  -6  -11
  -9  -14
   5    6
   3    9
 -15    7
   4   10
   8   12
  11   13

result.linkage

:ward

result.heights |> println

[0.1414213562373093, 0.1414213562373099, 0.1825741858350556, 0.2160246899469288, 0.24494897427831802, 0.3000000000000002, 0.3464101615137753, 0.3464101615137755, 0.3937003937005902, 0.5966174494199624, 0.7023769168568489, 0.8803678884691438, 1.125759003221086, 2.227255411188099]

result.order |> println

[15, 6, 11, 9, 14, 8, 1, 5, 10, 2, 13, 3, 4, 7, 12]

using Plots
function plot_hclust_horizontal(hc; xlabel="")
    function get(i, x)
        x[i] < 0 && return(ord[abs(x[i])])
        get(x[i], x)
    end
    n = length(hc.order)
    apy = collect(n:-1:1) .+ 0.5
    apx = zeros(n)
    ord = sortperm(hc.order)
    plot(yshowaxis=false, yticks=false, tick_direction=:out, grid=false,
         xlims=(-0.05, maximum(hc.height)), ylims=(0, apy[1]),
         xlabel=xlabel, label="")
    for i in 1:n
        annotate!(0, apy[i], text(string(hc.labels[hc.order[i]]) * " ", 8, :right))
    end
    for i in 1:n-1
        c1 = get(i, hc.merge[:,1])
        c2 = get(i, hc.merge[:,2])
        plot!([apx[c1], hc.height[i], hc.height[i], apx[c2]],
              [apy[c1], apy[c1], apy[c2], apy[c2]], color=:black, label="")
        apx[c1] = apx[c2] = hc.height[i]
        apy[c1] = apy[c2] = (apy[c1] + apy[c2]) / 2
    end
    plot!()
end;

result = hclust(D.^2, linkage=:ward_presquared); # R の ward.D2
# hc = hclust(dist(iris[1:15, 1:4])^2, method="ward.D2"); plot.hclust.horizontal(hc)
#result = hclust(D.^2, linkage=:ward); # R の ward.D
# hc = hclust(dist(iris[1:15, 1:4])^2, method="ward.D"); plot.hclust.horizontal(hc)
plot_hclust_horizontal(result)

using StatsPlots
plot(result)

cutree(result, k=4)

15-element Vector{Int64}:
 1
 2
 2
 2
 1
 3
 2
 1
 4
 2
 3
 2
 2
 4
 3

using DataFrames, CategoricalArrays, GLM
df = DataFrame(y = [45.19, 63.64, 44.4, 48.01, 44.72, 41.8, 48.25, 42.39, 45.72, 
                    54.41, 46.03, 51.89, 47.9, 41.72, 47.51, 63.8, 58.63, 51.73, 
                    52.09, 47.86, 51.59, 42.9, 37.48, 51.52, 47.94, 40.12, 54.64, 
                    55.73, 50.25, 57.55, 51.86, 46.98, 72.54, 49.6, 56.46, 47.76, 
                    47.39],
x1 = categorical(["A2", "A3", "A2", "A2", "A1", "A2", "A2", "A2", "A3", "A2",
                  "A1", "A2", "A2", "A2", "A1", "A3", "A2", "A3", "A1", "A2",
                  "A2", "A1", "A1", "A2", "A2", "A1", "A2", "A1", "A2", "A2",
                  "A3", "A2", "A3", "A2", "A3", "A1", "A1"]),
x2 = categorical(["Male", "Female", "Male", "Female", "Male", "Male", "Female",
                  "Male", "Male", "Female", "Male", "Female", "Male", "Female",
                  "Male", "Female", "Female", "Male", "Female", "Male", "Female",
                  "Male", "Male", "Female", "Male", "Male", "Male", "Female",
                  "Male", "Male", "Female", "Male", "Female", "Female", "Female",
                  "Male", "Male"], levels=["Male", "Female"]),
x3 = categorical(["Blue", "Red", "Red", "Gray", "Gray", "Blue", "Gray", "Red",
                  "Blue", "Green", "Gray", "Red", "Blue", "Gray", "Gray", "Green",
                  "Green", "Gray", "Blue", "Gray", "Green", "Gray", "Red", "Red",
                  "Blue", "Gray", "Red", "Green", "Blue", "Blue", "Red", "Gray",
                  "Green", "Gray", "Red", "Red", "Green"]));

result = lm(@formula(y ~ x1 + x2 + x3), df);
result

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

y ~ 1 + x1 + x2 + x3

Coefficients:
─────────────────────────────────────────────────────────────────────────
                 Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
─────────────────────────────────────────────────────────────────────────
(Intercept)  45.5953       2.36148  19.31    <1e-17  40.7726     50.4181
x1: A2        1.8447       2.015     0.92    0.3672  -2.27048     5.95989
x1: A3        8.70556      2.61236   3.33    0.0023   3.37041    14.0407
x2: Female    3.90344      1.9686    1.98    0.0566  -0.116973    7.92386
x3: Gray     -2.25848      2.32587  -0.97    0.3393  -7.00854     2.49159
x3: Green     5.5081       2.93767   1.87    0.0706  -0.491416   11.5076
x3: Red      -0.877087     2.42162  -0.36    0.7197  -5.8227      4.06853
─────────────────────────────────────────────────────────────────────────

r2(result)

0.6231114674133165

pred = predict(result);

using Plots
scatter(pred, df.y, aspect_ratio=1,
    xlabel="Predicted value", ylabel="Observed value")

using MultivariateStats

using Statistics
median(df.y)

48.01

using DataFrames
function getdummy(x, levels; name="")
    p = length(levels)
    p < 2 && throw(ErrorException("levels の要素数は 2 以上"))
    n = length(x)
    y = zeros(Int, n, p)
    for i in 1:n
        index = indexin([x[i]], levels)[1]
        isnothing(index) && throw(ErrorException("levels にない要素があった $(x[i])"))
        y[i, index] = 1
    end
    z = DataFrame(y[:, 2:end], name .* ":" .* levels[2:end])
end;

x12 = getdummy(df.x1, ["A1", "A2", "A3"], name="x1");

first(x12, 5)

x22 = getdummy(df.x2, ["Male", "Female"], name="x2");
x32 = getdummy(df.x3, ["Blue", "Gray", "Green", "Red"],  name="x3");

df2 = hcat(x12, x22, x32);

positive = df.y .> 48;
Xp = float.(Matrix(df2[positive .== 1, :])');
Xn = float.(Matrix(df2[positive .== 0, :])');
f = MultivariateStats.fit(LinearDiscriminant, Xp, Xn);

pp = predict(f, Xp);
pn = predict(f, Xn);
pred = ["Positive", "Negative"][2 .- vcat(pp, pn)]; # pp, pn が 1 のとき "Positive"

group =  vcat(repeat(["Positive"], length(pp)), repeat(["Negative"], length(pn))...);

using FreqTables
freqtable(group, pred)

2×2 Named Matrix{Int64}
Dim1 ╲ Dim2 │ Negative  Positive
────────────┼───────────────────
Negative    │       17         1
Positive    │        4        15

fp = evaluate(f, Xp);
fn = evaluate(f, Xn);using DataFrames
df = DataFrame(group=group, discriminantvalue = vcat(fp, fn));

using StatsPlots
@df df groupedhist(:discriminantvalue, group=:group, bins=20, bin_width=0.01,
                   bar_position=:dodge, xlabel="Discriminant value",
                   legend=:top)

引数	内容
`trans`	転置したデータ行列を使う場合は `true`。デフォルトは `false`（普通のデータ行列）
`bias`	定数項を含まない場合は `false`。デフォルトは `true`（定数項を含む）

引数	内容
`trans`	転置したデータ行列を使う場合は `true`。デフォルトは `false`（普通のデータ行列）
`bias`	定数項を含まない場合は `false`。デフォルトは `true`（定数項を含む）

`M`: 戻り値；メソッド	内容
`coeftable(M)`	結果表
`coefnames(M)`	変数名
`coef(M)`	偏回帰係数（線形予測子の係数）
`stderror(M)`	偏回帰係数の標準誤差
`confint(M; level=0.95)`	偏回帰係数の信頼区間
`r2(M)`	決定係数 $R^2$
`adjr2(M)`	自由度調整済み決定係数
`aic(M)`	AIC [^1]
`aicc(M)`	調整済み AIC [^1]
`bic(M)`	BIC [^1]
`loglikelihood(M)`	対数尤度
`nullloglikelihood(M)`	定数モデル（ヌル・モデル）の対数尤度
`deviance(M)`	逸脱度（デビアンス）
`nulldeviance(M)`	定数モデル（ヌル・モデル）の逸脱度（デビアンス）
`nobs(M)`	サンプルサイズ $n$
`dof_residual(M)`	残差の自由度
`vcov(M)`	偏回帰係数の推定値の分散共分散行列
`response(M)`	目的変数の実測値
`predict(M)`, `fitted(M)`	予測値
`predict(M; newx)`	新たなデータ `newx` に対する予測値
`M.model`	ほぼ `coeftable(M)` と同じ
`ftest(M.model)`	定数モデル（ヌル・モデル）との比較
`ftest(M1.model, ...)`	複数のモデルの比較（変数は包含関係であること）
`dof(M)`	消費された自由度

`M`: 戻り値；メソッド	内容
`coeftable(M)`	結果表
`coefnames(M)`	変数名
`coef(M)`	偏回帰係数（線形予測子の係数）
`stderror(M)`	偏回帰係数の標準誤差
`confint(M; level=0.95)`	偏回帰係数の信頼区間
`aic(M)`	AIC [^1]
`aicc(M)`	調整済み AIC [^1]
`bic(M)`	BIC [^1]
`loglikelihood(M)`	対数尤度
`deviance(M)`	逸脱度（デビアンス）
`nobs(M)`	サンプルサイズ $n$
`dof_residual(M)`	残差の自由度
`vcov(M)`	偏回帰係数の推定値の分散共分散行列
`predict(M)`, `fitted(M)`	予測値
`predict(M; newx)`	新たなデータ `newx` に対する予測値
`M.model.rr.eta`	線形予測子
`M.model.rr.y`	目的変数の実測値

`M`: 戻り値；メソッド	内容
`coeftable(M)`	結果表
`coefnames(M)`	変数名
`coef(M)`	偏回帰係数（線形予測子の係数）
`stderror(M)`	偏回帰係数の標準誤差
`confint(M; level=0.95)`	偏回帰係数の信頼区間
`aic(M)`	AIC [^1]
`aicc(M)`	調整済み AIC [^1]
`bic(M)`	BIC [^1]
`loglikelihood(M)`	対数尤度
`deviance(M)`	逸脱度（デビアンス）
`nobs(M)`	サンプルサイズ $n$
`dof_residual(M)`	残差の自由度
`vcov(M)`	偏回帰係数の推定値の分散共分散行列
`predict(M)`, `fitted(M)`	予測値
`predict(M; newx)`	新たなデータ `newx` に対する予測値
`M.model.rr.eta`	線形予測子
`M.model.rr.y`	目的変数の実測値

Julia で統計解析　第 8 章　多変量解析¶

回帰分析¶

線形最小二乗回帰 Linear Least Square¶

単回帰分析¶

重回帰分析¶

リッジ回帰 Ridge Regression¶

説明変数が 1 個の場合¶

説明変数が 2 個以上の場合¶

GLM パッケージによる回帰分析¶

重回帰分析 Ordinary Least Squares Regression¶

プロビット回帰 Probit Regression¶

ロジット回帰 Logit Regression¶

多項式回帰¶

重回帰プログラムを用いる¶

多項式回帰 Polynomials パッケージ¶

指数回帰¶

説明変数が 1 個の場合¶

説明変数が 2 個以上の場合¶

累乗回帰¶

説明変数が 1 個の場合¶

説明変数が 2 個以上の場合¶

非線形回帰¶

判別分析¶

二群判別分析¶

多重判別分析¶

主成分分析¶

因子分析¶

古典的多次元尺度解析¶

クラスター分析¶

K-means 法による非階層的クラスター分析¶

階層的クラスター分析¶

カテゴリー変数の取り扱い方¶

重回帰分析の場合¶

判別分析の場合¶

Row	Sepal_Length	Sepal_Width	Petal_Length	Petal_Width	Species
	Float64	Float64	Float64	Float64	Cat…
1	5.1	3.5	1.4	0.2	setosa
2	4.9	3.0	1.4	0.2	setosa

`M`: 戻り値；メソッド	内容
`dof(M)`	自由度
`M.converged`	収束したかどうか `true` / `false`
`M.param`	パラメータの推定値
`confidence_interval(M; alpha=0.05)`	パラメータの $100(1-\alpha)\%$ 信頼区間
`M.resid`	残差
`M.jacobian`	ヤコビアン

`M`: 戻り値；メソッド	内容
`M.w`	判別係数
`M.b`	定数項
`length(M)`	判別係数ベクトルの長さ
`evaluate(M, x)`	`x` の判別値
`predict(M, x)`	`x` の判別結果

Row	group	discriminantvalue
	String	Float64
1	Versicolor	1.30935
2	Versicolor	1.03108
3	Versicolor	0.810557
4	Versicolor	0.713307
5	Versicolor	0.669186

引数	内容
`method`	`:gevd`　一般化固有値を用いる（デフォルト） `:whiten`　`Sw` をホワイトニング変換してから `Sb` の固有値分解をする
`outdim`	抽出する判別係数（`min(d, g-1)`）
`regcoef`	数値解の安定性を向上するために `Sw` の対角成分に加える `regcoef * eigmax(Sw)` を決める正数値（デフォルトは 1e-6）。

`M`: 戻り値；メソッド	内容
`projection(M)`	判別係数行列（`d` × `p` 行列）
`mean(M)`	全体の各説明変数の平均値（`d` 個）
`classmeans(M)`	群ごとの説明変数の平均値（`d` × `g` 行列）
`withclass_scatter(M)`	群内分散共分散行列（`d` × `d` 行列）[^2]
`betweenclass_scatter(M)`	群間分散共分散行列（`d` × `d` 行列）
`transform(M, x)`	判別得点（`p` × `n` 行列）
`M.pmeans`	各群ごとの判別値の平均値（`p` × `g` 行列）

引数	内容
`method`	`:cov`　分散共分散行列を使う `:svd`　データ行列の特異値分解 SVD を使う `:auto`　d < n のときは `:cov`，d　≧ n のときは `:svd`
`maxoutdim`	抽出する主成分数
`pratio`	累積寄与率がこの値以上になるように抽出主成分数を決める
`mean`	・ 0: 入力データが既に中心化されている・ `nothing`: 平均値を計算して中心化する・指定する平均値ベクトル

`M`: 戻り値；メソッド	内容
`indim(M)`	入力データ行列の変数の個数 `d`
`outdim(M)`	主成分の個数 `p`
`mean(M)`	各変数の平均値（`d` 個）[^3]
`projection(M)`	射影行列（重み）（`d` × `p` 行列）
`principalvars(M)`	各種成分で説明される分散
`tvar(M)`	入力データの各変数の分散の和
`tprincipalvar(M)`	`p` 個の主成分で説明される分散
`tresidualvar(M)`	`p` 個の主成分で説明されない分散（残差分散）
`principalratio(M)`	寄与率
`transorm(M, x)`	主成分得点（`p` × `n` 行列）
`reconstruct(M, y)`	主成分から元のデータを再生する
`loadings(M)`	主成分負荷量

引数	内容
`method`	`:em` ならば EM バージョン（expectation-maximization algorithm [^4]）の因子分析 `:cm` ならば CM バージョン（fast conditional maximization algorithm [^5]）の因子分析（デフォルト）
`maxoutdim`	抽出する因子数（デフォルトで `d`-1）
`mean`	・ 0: 入力データが既に中心化されている・ `nothing`: 平均値を計算して中心化する・指定する平均値ベクトル
`tol`	収束許容限界値（デフォルトで 1.0e-6）
`maxiter`	収束許容繰り返し数（デフォルトで 1000）
`η`	分散下限値（デフォルトで `tol`）

引数	内容
`weights`	データ点の重み（デフォルトは `nothing`）
`display`	`display=:iter` でステップごとの結果を表示する。`:final` で最終結果のみ（デフォルトは `:none`）。
`maxiter`	収束までの繰り返し回数の上限（デフォルトは `100`）
`tol`	収束したとみなす許容限界（デフォルトは `1.0e-6`）

`M`: 戻り値；メソッド	内容
`indim(M)`	入力データ行列の変数の個数 `d`
`outdim(M)`	抽出された因子の個数 `p`
`size(M)`	タプル　(`d`, `p`)
`mean(M)`	各変数の平均値（`d` 個）[^3]
`projection(M)`	射影行列（重み）（`d` × `p` 行列）= svd(`M.W`).U
`loadings(M)`, `M.W`	因子負荷量
`var(M)`, `M.Ψ`	分散 noise covariance
`cov(M)`	各因子で説明される分散 = `M.W`*`M.W'` + Diagonal(`M.Ψ`)
`transorm(M, x)`, `predict(M, x)`	因子得点（`p` × `n` 行列）
`reconstruct(M, z)`	因子得点 `z` から元のデータを再生する

`M`: 戻り値；メソッド	内容
`nclusters(M)`	クラスター数
`M.centers`	クラスターの重心
`assignments(M)`, `M.assignments`	所属クラスター
`result.costs`	データ点とその所属クラスターの重心までのコスト（平方距離）
`counts(M)`, `M.counts`	各クラスターのサンプルサイズ
`wcounts(M)`, `M.wcounts`	重み付けした場合の各クラスターのサンプルサイズ
`M.totalcost`	`result.costs` コストの和（クラスター内平方和）
`M.iterations`	収束までの繰り返し数
`M.converged`	収束したかどうかの判定値 true/false

引数	内容
`linkage`	クラスター間の距離の更新方法 `:single`最も近い距離（デフォルト） `:average` 平均距離 `:complete` 最も遠い距離 `:ward` ウォード法 `:ward_presquared` 平方距離を使うウォード法
`uplo`	`:U` 距離行列の上三角行列を使う `:L` 距離行列の下三角行列を使う `nothing`（デフォルト）
`blanchorder`	枝分かれ順序 `:r` R と同じ方法 `:barjoseph` または `:optimal` 隣り合う「葉」の枝分かれ距離が短くなるようにする

`M`: 戻り値；メソッド	内容
`M.merges`	クラスターの融合過程
`M.linkage`	linkage
`M.heights`, `M.height`	「葉」までの距離（枝分けれの位置）
`M.order`	デンドログラム中でのデータポイントの順序

引数	内容
`k`	希望するクラスター数で指定する
`h`	枝を切る位置（height）で指定する

Julia で統計解析 第 8 章 多変量解析¶

回帰分析¶

線形最小二乗回帰 Linear Least Square¶

単回帰分析¶

重回帰分析¶

リッジ回帰 Ridge Regression¶

説明変数が 1 個の場合¶

説明変数が 2 個以上の場合¶

GLM パッケージによる回帰分析¶

重回帰分析 Ordinary Least Squares Regression¶

プロビット回帰 Probit Regression¶

ロジット回帰 Logit Regression¶

多項式回帰¶

重回帰プログラムを用いる¶

多項式回帰 Polynomials パッケージ¶

指数回帰¶

説明変数が 1 個の場合¶

説明変数が 2 個以上の場合¶

累乗回帰¶

説明変数が 1 個の場合¶

説明変数が 2 個以上の場合¶

非線形回帰¶

判別分析¶

二群判別分析¶

多重判別分析¶

主成分分析¶

因子分析¶

古典的多次元尺度解析¶

クラスター分析¶

K-means 法による非階層的クラスター分析¶

階層的クラスター分析¶

カテゴリー変数の取り扱い方¶

重回帰分析の場合¶

判別分析の場合¶

Julia で統計解析　第 8 章　多変量解析¶