Version()

最新バージョン 2024-10-29 14:07

using HypothesisTests

result = PowerDivergenceTest([4, 2, 1, 3, 4, 2], lambda=1.0) # ピアソンの$\chi^2$検定

Pearson's Chi-square Test
-------------------------
Population details:
    parameter of interest:   Multinomial Probabilities
    value under h_0:         [0.166667, 0.166667, 0.166667, 0.166667, 0.166667, 0.166667]
    point estimate:          [0.25, 0.125, 0.0625, 0.1875, 0.25, 0.125]
    95% confidence interval: [(0.0625, 0.5238), (0.0, 0.3988), (0.0, 0.3363), (0.0, 0.4613), (0.0625, 0.5238), (0.0, 0.3988)]

Test summary:
    outcome with 95% confidence: fail to reject h_0
    one-sided p-value:           0.7385

Details:
    Sample size:        16
    statistic:          2.75
    degrees of freedom: 5
    residuals:          [0.816497, -0.408248, -1.02062, 0.204124, 0.816497, -0.408248]
    std. residuals:     [0.894427, -0.447214, -1.11803, 0.223607, 0.894427, -0.447214]

result = PowerDivergenceTest([4, 2, 1, 3, 4, 2], lambda=1.0);
p = pvalue(result)
println("Chisq. = $(result.stat),  df = $(result.df),  p value = $p")

Chisq. = 2.75,  df = 5,  p value = 0.7384611787603711

function chisq_test(x; lambda=1.0, theta0=ones(length(x))/length(x))
    result = PowerDivergenceTest(x; lambda, theta0)
    p = pvalue(result)
    println("χ-sq. = $(result.stat),  df = $(result.df),  p value = $p")
end;

chisq_test([4, 2, 1, 3, 4, 2])

χ-sq. = 2.75,  df = 5,  p value = 0.7384611787603711

chisq_test([21, 12, 5, 4])

χ-sq. = 17.61904761904762,  df = 3,  p value = 0.0005270259708713794

pvalue(result)

0.7384611787603711

pvalue(result, tail=:right)

0.7384611787603711

confint(result)

6-element Vector{Tuple{Float64, Float64}}:
 (0.0625, 0.5237538769784804)
 (0.0, 0.3987538769784804)
 (0.0, 0.3362538769784804)
 (0.0, 0.4612538769784804)
 (0.0625, 0.5237538769784804)
 (0.0, 0.3987538769784804)

confint(result, tail=:both)

6-element Vector{Tuple{Float64, Float64}}:
 (0.0625, 0.5237538769784804)
 (0.0, 0.3987538769784804)
 (0.0, 0.3362538769784804)
 (0.0, 0.4612538769784804)
 (0.0625, 0.5237538769784804)
 (0.0, 0.3987538769784804)

result.lambda

1.0

result.n

16

result.observed

6×1 Matrix{Int64}:
 4
 2
 1
 3
 4
 2

result.thetahat

6-element Vector{Float64}:
 0.25
 0.125
 0.0625
 0.1875
 0.25
 0.125

round.(result.theta0, digits=5)

6-element Vector{Float64}:
 0.16667
 0.16667
 0.16667
 0.16667
 0.16667
 0.16667

round.(result.expected, digits=5)

6×1 Matrix{Float64}:
 2.66667
 2.66667
 2.66667
 2.66667
 2.66667
 2.66667

result.stat

2.75

result.df

5

round.(result.residuals, digits=5)

6×1 Matrix{Float64}:
  0.8165
 -0.40825
 -1.02062
  0.20412
  0.8165
 -0.40825

round.(result.stdresiduals, digits=5)

6×1 Matrix{Float64}:
  0.89443
 -0.44721
 -1.11803
  0.22361
  0.89443
 -0.44721

result = PowerDivergenceTest([4, 2, 1, 3, 4, 2], lambda=0.0)

Multinomial Likelihood Ratio Test
---------------------------------
Population details:
    parameter of interest:   Multinomial Probabilities
    value under h_0:         [0.166667, 0.166667, 0.166667, 0.166667, 0.166667, 0.166667]
    point estimate:          [0.25, 0.125, 0.0625, 0.1875, 0.25, 0.125]
    95% confidence interval: [(0.0625, 0.5238), (0.0, 0.3988), (0.0, 0.3363), (0.0, 0.4613), (0.0625, 0.5238), (0.0, 0.3988)]

Test summary:
    outcome with 95% confidence: fail to reject h_0
    one-sided p-value:           0.7106

Details:
    Sample size:        16
    statistic:          2.931024858031232
    degrees of freedom: 5
    residuals:          [0.816497, -0.408248, -1.02062, 0.204124, 0.816497, -0.408248]
    std. residuals:     [0.894427, -0.447214, -1.11803, 0.223607, 0.894427, -0.447214]

function G2_test(x; theta0=ones(length(x))/length(x), lambda=0.0)
    result = PowerDivergenceTest(x; theta0, lambda);
    p = pvalue(result)
    println("G-sq. = $(result.stat),  df = $(result.df),  p value = $p")
end

G2_test([4, 2, 1, 3, 4, 2])

G-sq. = 2.931024858031232,  df = 5,  p value = 0.710619024390339

G2_test([21, 12, 5, 4])

G-sq. = 17.176914390863786,  df = 3,  p value = 0.0006499306837478541

pvalue(result)

0.710619024390339

pvalue(result, tail=:right)

0.710619024390339

confint(result)

6-element Vector{Tuple{Float64, Float64}}:
 (0.0625, 0.5237538769784804)
 (0.0, 0.3987538769784804)
 (0.0, 0.3362538769784804)
 (0.0, 0.4612538769784804)
 (0.0625, 0.5237538769784804)
 (0.0, 0.3987538769784804)

confint(result, tail=:both)

6-element Vector{Tuple{Float64, Float64}}:
 (0.0625, 0.5237538769784804)
 (0.0, 0.3987538769784804)
 (0.0, 0.3362538769784804)
 (0.0, 0.4612538769784804)
 (0.0625, 0.5237538769784804)
 (0.0, 0.3987538769784804)

result.lambda

0.0

result.n

16

result.observed

6×1 Matrix{Int64}:
 4
 2
 1
 3
 4
 2

result.thetahat

6-element Vector{Float64}:
 0.25
 0.125
 0.0625
 0.1875
 0.25
 0.125

round.(result.theta0, digits=5)

6-element Vector{Float64}:
 0.16667
 0.16667
 0.16667
 0.16667
 0.16667
 0.16667

round.(result.expected, digits=5)

6×1 Matrix{Float64}:
 2.66667
 2.66667
 2.66667
 2.66667
 2.66667
 2.66667

result.stat

2.931024858031232

result.df

5

round.(result.residuals, digits=5)

6×1 Matrix{Float64}:
  0.8165
 -0.40825
 -1.02062
  0.20412
  0.8165
 -0.40825

round.(result.stdresiduals, digits=5)

6×1 Matrix{Float64}:
  0.89443
 -0.44721
 -1.11803
  0.22361
  0.89443
 -0.44721

result = PowerDivergenceTest([21, 12, 5, 4], theta0=[9/16, 3/16, 3/16, 1/16], lambda=1.0)

Pearson's Chi-square Test
-------------------------
Population details:
    parameter of interest:   Multinomial Probabilities
    value under h_0:         [0.5625, 0.1875, 0.1875, 0.0625]
    point estimate:          [0.5, 0.285714, 0.119048, 0.0952381]
    95% confidence interval: [(0.3571, 0.6576), (0.1429, 0.4433), (0.0, 0.2766), (0.0, 0.2528)]

Test summary:
    outcome with 95% confidence: fail to reject h_0
    one-sided p-value:           0.2384

Details:
    Sample size:        42
    statistic:          4.22222222222222
    degrees of freedom: 3
    residuals:          [-0.540062, 1.46994, -1.0245, 0.848668]
    std. residuals:     [-0.816497, 1.63075, -1.13658, 0.876501]

chisq_test([21, 12, 5, 4], theta0=[9/16, 3/16, 3/16, 1/16]) # ピアソンのχ二乗検定

χ-sq. = 4.22222222222222,  df = 3,  p value = 0.23844641563476865

pvalue(result)

0.23844641563476865

pvalue(result, tail=:right)

0.23844641563476865

confint(result)

4-element Vector{Tuple{Float64, Float64}}:
 (0.35714285714285715, 0.6575992779891421)
 (0.14285714285714285, 0.4433135637034278)
 (0.0, 0.2766468970367611)
 (0.0, 0.2528373732272373)

confint(result, tail=:both)

4-element Vector{Tuple{Float64, Float64}}:
 (0.35714285714285715, 0.6575992779891421)
 (0.14285714285714285, 0.4433135637034278)
 (0.0, 0.2766468970367611)
 (0.0, 0.2528373732272373)

result.lambda

1.0

result.n

42

result.observed

4×1 Matrix{Int64}:
 21
 12
  5
  4

result.thetahat

4-element Vector{Float64}:
 0.5
 0.2857142857142857
 0.11904761904761904
 0.09523809523809523

round.(result.theta0, digits=5)

4-element Vector{Float64}:
 0.5625
 0.1875
 0.1875
 0.0625

round.(result.expected, digits=5)

4×1 Matrix{Float64}:
 23.625
  7.875
  7.875
  2.625

result.stat

4.22222222222222

result.df

3

round.(result.residuals, digits=5)

4×1 Matrix{Float64}:
 -0.54006
  1.46994
 -1.0245
  0.84867

round.(result.stdresiduals, digits=5)

4×1 Matrix{Float64}:
 -0.8165
  1.63075
 -1.13658
  0.8765

result = PowerDivergenceTest([21, 12, 5, 4], theta0=[9/16, 3/16, 3/16, 1/16], lambda=0.0)

Multinomial Likelihood Ratio Test
---------------------------------
Population details:
    parameter of interest:   Multinomial Probabilities
    value under h_0:         [0.5625, 0.1875, 0.1875, 0.0625]
    point estimate:          [0.5, 0.285714, 0.119048, 0.0952381]
    95% confidence interval: [(0.3571, 0.6576), (0.1429, 0.4433), (0.0, 0.2766), (0.0, 0.2528)]

Test summary:
    outcome with 95% confidence: fail to reject h_0
    one-sided p-value:           0.2626

Details:
    Sample size:        42
    statistic:          3.9893906620976454
    degrees of freedom: 3
    residuals:          [-0.540062, 1.46994, -1.0245, 0.848668]
    std. residuals:     [-0.816497, 1.63075, -1.13658, 0.876501]

G2_test([21, 12, 5, 4], theta0=[9/16, 3/16, 3/16, 1/16]) # 対数尤度比検定（G2検定）

G-sq. = 3.9893906620976454,  df = 3,  p value = 0.26261202803389555

pvalue(result)

0.26261202803389555

pvalue(result, tail=:right)

0.26261202803389555

confint(result)

4-element Vector{Tuple{Float64, Float64}}:
 (0.35714285714285715, 0.6575992779891421)
 (0.14285714285714285, 0.4433135637034278)
 (0.0, 0.2766468970367611)
 (0.0, 0.2528373732272373)

confint(result, tail=:both)

4-element Vector{Tuple{Float64, Float64}}:
 (0.35714285714285715, 0.6575992779891421)
 (0.14285714285714285, 0.4433135637034278)
 (0.0, 0.2766468970367611)
 (0.0, 0.2528373732272373)

result.lambda

0.0

result.n

42

result.observed

4×1 Matrix{Int64}:
 21
 12
  5
  4

result.thetahat

4-element Vector{Float64}:
 0.5
 0.2857142857142857
 0.11904761904761904
 0.09523809523809523

round.(result.theta0, digits=5)

4-element Vector{Float64}:
 0.5625
 0.1875
 0.1875
 0.0625

round.(result.expected, digits=5)

4×1 Matrix{Float64}:
 23.625
  7.875
  7.875
  2.625

result.stat

3.9893906620976454

result.df

3

round.(result.residuals, digits=5)

4×1 Matrix{Float64}:
 -0.54006
  1.46994
 -1.0245
  0.84867

round.(result.stdresiduals, digits=5)

4×1 Matrix{Float64}:
 -0.8165
  1.63075
 -1.13658
  0.8765

x = [12 35
     43 56]

2×2 Matrix{Int64}:
 12  35
 43  56

result = PowerDivergenceTest(x, lambda=1.0)

Pearson's Chi-square Test
-------------------------
Population details:
    parameter of interest:   Multinomial Probabilities
    value under h_0:         [0.12127, 0.255442, 0.200647, 0.42264]
    point estimate:          [0.0821918, 0.294521, 0.239726, 0.383562]
    95% confidence interval: [(0.0, 0.1716), (0.2123, 0.384), (0.1575, 0.3292), (0.3014, 0.473)]

Test summary:
    outcome with 95% confidence: reject h_0
    one-sided p-value:           0.0370

Details:
    Sample size:        146
    statistic:          4.350164943588549
    degrees of freedom: 1
    residuals:          [-1.35593, 0.934264, 1.05414, -0.726324]
    std. residuals:     [-2.0857, 2.0857, 2.0857, -2.0857]

chisq_test(x)

χ-sq. = 4.350164943588549,  df = 1,  p value = 0.037005362019413034

pvalue(result)

0.037005362019413034

pvalue(result, tail=:right)

0.037005362019413034

using Distributions
function yates(x)
    result = PowerDivergenceTest(x, lambda=1.0)
    n = sum(x)
    a, c, b, d = x
    stat = n*max(0, abs(a*d - b*c) - 0.5n)^2 / ((a+b)*(c+d)*(a+c)*(b+d))
    p = ccdf(Chisq(1), stat)
    println("corrected χ-sq. = $stat,  df = 1,  p value = $p")
end;

yates(x)

corrected χ-sq. = 3.621119907386832,  df = 1,  p value = 0.05705045741699577

confint(result)

4-element Vector{Tuple{Float64, Float64}}:
 (0.0, 0.17163425153174042)
 (0.2123287671232877, 0.38396301865502813)
 (0.15753424657534246, 0.3291684981070829)
 (0.3013698630136986, 0.47300411454543906)

confint(result, tail=:both)

4-element Vector{Tuple{Float64, Float64}}:
 (0.0, 0.17163425153174042)
 (0.2123287671232877, 0.38396301865502813)
 (0.15753424657534246, 0.3291684981070829)
 (0.3013698630136986, 0.47300411454543906)

result.lambda

1.0

result.n

146

result.observed

2×2 Matrix{Int64}:
 12  35
 43  56

result.thetahat

4-element Vector{Float64}:
 0.0821917808219178
 0.2945205479452055
 0.23972602739726026
 0.3835616438356164

round.(result.theta0, digits=5)

4-element Vector{Float64}:
 0.12127
 0.25544
 0.20065
 0.42264

round.(result.expected, digits=5)

2×2 Matrix{Float64}:
 17.7055  29.2945
 37.2945  61.7055

result.stat

4.350164943588549

result.df

1

Base.print_matrix(stdout, round.(result.residuals, digits=7), "", "   ", "\n")

-1.3559332    1.0541416
 0.934264    -0.7263238

Base.print_matrix(stdout, round.(result.stdresiduals, digits=7), "", "   ", "\n")

-2.0857049    2.0857049
 2.0857049   -2.0857049

using Distributions
function residual_analysis(x)
    result = PowerDivergenceTest(x, lambda=1.0);
    nrows, ncols = size(x)
    p = 2ccdf.(Normal(), abs.(result.stdresiduals))
    Base.print_matrix(stdout, round.(p, digits=7), "", "   ", "\n")
end

residual_analysis(x)

0.0370054   0.0370054
0.0370054   0.0370054

answer = ["Yes", "Yes", "No", "No", "No", "Yes", "No", "Yes", "Yes", "No",
          "No", "Yes", "Yes", "No", "Yes", "Yes", "No", "No", "No", "No"]
status = ["Hi", "Lo", "Med", "Med", "Lo", "Hi", "Lo", "Hi", "Lo", "Lo",
          "Med", "Med", "Lo", "Med", "Med", "Lo", "Lo", "Hi", "Lo", "Med"]

using FreqTables
y = freqtable(answer, status)

2×3 Named Matrix{Int64}
Dim1 ╲ Dim2 │  Hi   Lo  Med
────────────┼──────────────
No          │   1    5    5
Yes         │   3    4    2

chisq_test(y)

χ-sq. = 2.2190155523488855,  df = 2,  p value = 0.32972121777778757

residual_analysis(y)

0.1775299   0.9639693   0.2785029
0.1775299   0.9639693   0.2785029

x = [10 3; 4 12]

2×2 Matrix{Int64}:
 10   3
  4  12

result = PowerDivergenceTest(x, lambda=0.0)

Multinomial Likelihood Ratio Test
---------------------------------
Population details:
    parameter of interest:   Multinomial Probabilities
    value under h_0:         [0.216409, 0.26635, 0.231867, 0.285375]
    point estimate:          [0.344828, 0.137931, 0.103448, 0.413793]
    95% confidence interval: [(0.1724, 0.537), (0.0, 0.3301), (0.0, 0.2957), (0.2414, 0.606)]

Test summary:
    outcome with 95% confidence: reject h_0
    one-sided p-value:           0.0044

Details:
    Sample size:        29
    statistic:          8.1280145470097
    degrees of freedom: 1
    residuals:          [1.48658, -1.33999, -1.43618, 1.29455]
    std. residuals:     [2.7828, -2.7828, -2.7828, 2.7828]

G2_test(x) # 対数尤度比に基づく検定（G2検定）

G-sq. = 8.1280145470097,  df = 1,  p value = 0.00435864516471826

pvalue(result)

0.00435864516471826

pvalue(result, tail=:right)

0.00435864516471826

confint(result)

4-element Vector{Tuple{Float64, Float64}}:
 (0.1724137931034483, 0.5370409523965961)
 (0.0, 0.3301444006724581)
 (0.0, 0.2956616420517684)
 (0.24137931034482757, 0.6060064696379754)

confint(result, tail=:both)

4-element Vector{Tuple{Float64, Float64}}:
 (0.1724137931034483, 0.5370409523965961)
 (0.0, 0.3301444006724581)
 (0.0, 0.2956616420517684)
 (0.24137931034482757, 0.6060064696379754)

result.lambda

0.0

result.n

29

result.observed

2×2 Matrix{Int64}:
 10   3
  4  12

result.thetahat

4-element Vector{Float64}:
 0.3448275862068966
 0.13793103448275862
 0.10344827586206896
 0.41379310344827586

round.(result.theta0, digits=5)

4-element Vector{Float64}:
 0.21641
 0.26635
 0.23187
 0.28537

round.(result.expected, digits=5)

2×2 Matrix{Float64}:
 6.27586  6.72414
 7.72414  8.27586

result.stat

8.1280145470097

result.df

1

Base.print_matrix(stdout, round.(result.residuals, digits=7), "", "   ", "\n")

 1.4865827   -1.4361753
-1.3399875    1.2945509

Base.print_matrix(stdout, round.(result.stdresiduals, digits=7), "", "   ", "\n")

 2.7827964   -2.7827964
-2.7827964    2.7827964

z = [4 5 2 0
     0 7 6 1
     1 0 3 1]
result = PowerDivergenceTest(z, lambda=0.0);
p = pvalue(result)
println("Chisq. = $(result.stat),  df = $(result.df),  p value = $p")

Chisq. = NaN,  df = 6,  p value = NaN

result = PowerDivergenceTest(z, lambda=0.000000001);
p = pvalue(result)
println("Chisq. = $(result.stat),  df = $(result.df),  p value = $p")

Chisq. = 15.364590917511304,  df = 6,  p value = 0.017602889013051494

using Distributions
function trueG2_test(x; correct=false)
    # セルに 0 があっても正しい答えを出す
    # correct=true で，連続性の補正も行うことができる
    ln(n) = sum(n .== 0 ? 0 : n .* log(n) for n in vcat(n...))
    nrows, ncols = size(x)
    n = sum(x) # 全サンプルサイズ
    n1 = sum(x, dims=2) # 行和
    n2 = sum(x, dims=1) # 列和
    G2 = 2*(ln(x) - ln(n1) - ln(n2) + ln(n)) # G 統計量
    correct && (G2 /= 1 + (n * sum(1 ./ n1) - 1) * (n * sum(1 ./ n2) - 1) / (6n * nrows * ncols)) # 連続性の補正
    df = (nrows - 1) * (ncols - 1) # G の自由度
    p = ccdf(Chisq(df), G2)
    name = correct ? "corrected G-sq." : "G-sq."
    println("$name = $G2,  df = $df,  p value = $p")
end

trueG2_test(z)

G-sq. = 15.364591286599591,  df = 6,  p value = 0.01760288650305146

trueG2_test(z)

G-sq. = 15.364591286599591,  df = 6,  p value = 0.01760288650305146

trueG2_test(z, correct=true) # 連続性の補正

corrected G-sq. = 13.776490649307341,  df = 6,  p value = 0.03223501559558125

using Plots
x = [2 4 3; 1 5 3; 4 2 3];
lambdas = collect(-2:0.1:1)
stats = Float64[]
for lambda in lambdas
    result = PowerDivergenceTest(x, lambda=lambda)
    append!(stats, result.stat)
end
plot(lambdas, stats, xlabel="λ", ylabel="test statistic",
     grid=false, tick_direction=:out, tickfont=("Times", 6),
        size=(340, 230), label="")

result1 = PowerDivergenceTest([10 3; 4 12], lambda=1.0)
p = pvalue(result1)
println("χ-sq. = $(result1.stat),  df = $(result1.df),  p value = $p")

χ-sq. = 7.743956043956044,  df = 1,  p value = 0.005389260671694263

chisq_test([10 3; 4 12]) # 上の例と同じである

χ-sq. = 7.743956043956044,  df = 1,  p value = 0.005389260671694263

result2 = PowerDivergenceTest([10 3; 4 12], lambda=0.0)
p = pvalue(result2)
println("G-sq. = $(result2.stat),  df = $(result2.df),  p value = $p")

G-sq. = 8.1280145470097,  df = 1,  p value = 0.00435864516471826

G2_test([10 3; 4 12]) # 上の例と同じである

G-sq. = 8.1280145470097,  df = 1,  p value = 0.00435864516471826

result = FisherExactTest(10, 3, 4, 12)

Fisher's exact test
-------------------
Population details:
    parameter of interest:   Odds ratio
    value under h_0:         1.0
    point estimate:          9.06784
    95% confidence interval: (1.422, 80.35)

Test summary:
    outcome with 95% confidence: reject h_0
    two-sided p-value:           0.0146

Details:
    contingency table:
        10   3
         4  12

pvalue(result) # デフォールトで両側検定の p 値

0.014589609220157725

pvalue(result, tail=:both) # 両側検定の p 値

0.014589609220157725

pvalue(result, tail=:left) # 片側検定の p 値

0.9994164940233701

pvalue(result, tail=:right) # 片側検定の p 値

0.007294804610078863

pvalue(result, method=:central) # 両側検定の p 値（デフォルト）

0.014589609220157725

pvalue(result, method=:minlike)

0.009220570313398513

using RCall # R で計算すると以下のようになる
R"""
options(digits=15)
x = matrix(c(10, 3, 4, 12), 2)
fisher.test(x)$p.value
"""

RObject{RealSxp}
[1] 0.00922057031339851

result.ω

9.067840887689634

confint(result)

(1.4222662898175877, 80.34790877482563)

confint(result, level=0.95)

(1.4222662898175877, 80.34790877482563)

using Distributions
function oddsratio(a, b, c, d; conf=0.95, correct=false)
    if correct || a*b*c*d == 0
        a, b, c, d = a+0.5, b+0.5, c+0.5, d+0.5
    end
    or = a*d / (b*c)
    LCL, UCL = or .* exp.([1, -1] .* quantile(Normal(), 0.5 - conf/2) * sqrt(1/a+1/b+1/c+1/d))
    println("オッズ比 = $or")
    println("$(Int(100conf))% 信頼区間  [$LCL, $UCL]")
end;

oddsratio(10, 3, 4, 12)

オッズ比 = 10.0
95% 信頼区間  [1.7975962537192371, 55.62984446206951]

using RCall
R"fisher.test(matrix(c(10, 3, 4, 12), 2))"

RObject{VecSxp}

	Fisher's Exact Test for Count Data

data:  matrix(c(10, 3, 4, 12), 2)
p-value = 0.0092205703134
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
  1.42227515556236 80.34058365829780
sample estimates:
      odds ratio 
9.06776007785795

result = BinomialTest(12, 20)

Binomial test
-------------
Population details:
    parameter of interest:   Probability of success
    value under h_0:         0.5
    point estimate:          0.6
    95% confidence interval: (0.3605, 0.8088)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.5034

Details:
    number of observations: 20
    number of successes:    12

pvalue(result, tail=:left)

0.8684120178222656

pvalue(result, tail=:right)

0.2517223358154298

pvalue(result, tail=:both)

0.5034446716308596

result2 = BinomialTest(18, 24, 0.68); # p=0.68 の場合。数値だけ指定するので注意。
pvalue(result2, tail=:both)

0.6206975448201958

using RCall
R"""
options(digits=16)
binom.test(18, 24, 0.68)$p.value
"""

RObject{RealSxp}
[1] 0.5205908910845283

pvalue(result, tail=:right)

0.2517223358154298

2 * pvalue(result, tail=:right)

0.5034446716308596

using Plots, Distributions
n = 24
p = 0.68
p_value = Float64[]
col = [i <= 14 || i >= 18 ? :red : :blue for i in 0:24]
for x = 0:n
    probability = pdf(Binomial(n, p), x)
    append!(p_value, probability)
end
bar(0:n, p_value, grid=false, tick_direction=:out,
    xlabel= "x", ylabel="probability", color=col, alpha=0.4,
    tickfont=("Times", 6), size=(340, 230), label="")

sum(p_value[1:15]) + sum(p_value[19:25]) # Julia の添字は 1 から始まるので p_value[n+1] が n のときの確率である

0.5205908910845278

using Distributions
function binomial_pvalue(x, n, p=0.5; tail=:both)
    obj = Binomial(n, p)
    tail == :left  && return cdf(obj, x)
    tail == :right && return ccdf(obj, x - 1)
    p == 0 && return Float64(x == 0)
    p == 1 && return Float64(x == n)
    m = n * p
    x == m && return 1
    limit = 1.0000001pdf(obj, x)
    if x < m
        y = sum(pdf.(obj, ceil(Int, m):n) .<= limit)
        cdf(obj, x) + ccdf(obj, n - y)
    else
        y = sum(pdf.(obj, 0:floor(Int, m)) .<= limit)
        cdf(obj, y - 1) + ccdf(obj, x - 1)
    end
end

binomial_pvalue (generic function with 2 methods)

binomial_pvalue(12, 20)

0.5034446716308596

binomial_pvalue(4, 20, 0.6, tail=:left)

0.00031703112116863026

binomial_pvalue(16, 20, 0.6, tail=:right)

0.05095195319416655

result = BinomialTest(5, 26)
confint(result)

(0.06554810873678252, 0.3935055279393218)

confint(result, level=0.99)

(0.04400829697421974, 0.4550043049581578)

confint(result, tail=:right)

(0.07898571028519472, 1.0)

confint(result, tail=:left)

(0.0, 0.36259486197815627)

x = [5.6, 8.1, 6.6, 4.7, 4.8, 9.6, 5.6, 1.3, 4.1, 5.4];
y = [2.0, 3.4, 5.2, 8.0, 9.0, 3.6, 2.7, 0.7, 6.7, 5.7];
z = x .- y;

using HypothesisTests
result = OneSampleTTest(x, y)

One sample t-test
-----------------
Population details:
    parameter of interest:   Mean
    value under h_0:         0
    point estimate:          0.88
    95% confidence interval: (-1.614, 3.374)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.4454

Details:
    number of observations:   10
    t-statistic:              0.7981113921334072
    degrees of freedom:       9
    empirical standard error: 1.1026029808291529

result = OneSampleTTest(z)

One sample t-test
-----------------
Population details:
    parameter of interest:   Mean
    value under h_0:         0
    point estimate:          0.88
    95% confidence interval: (-1.614, 3.374)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.4454

Details:
    number of observations:   10
    t-statistic:              0.7981113921334072
    degrees of freedom:       9
    empirical standard error: 1.1026029808291529

pvalue(result)

0.4453534218554348

pvalue(result, tail=:both)

0.4453534218554348

pvalue(result, tail=:left)

0.7773232890722825

pvalue(result, tail=:right)

0.2226767109277174

confint(result)

(-1.6142612308053204, 3.3742612308053195)

confint(result, tail=:both)

(-1.6142612308053204, 3.3742612308053195)

confint(result, level=0.99)

(-2.7032783553640023, 4.463278355364001)

confint(result, tail=:right)

(-1.141195783743237, Inf)

confint(result, tail=:right, level=0.99)

(-2.2309258663578753, Inf)

confint(result, tail=:left)

(-Inf, 2.901195783743236)

result.n

10

result.xbar

0.8799999999999997

result.t

0.7981113921334072

result.df

9

result.xbar

0.8799999999999997

result.μ0

0

result.stderr

1.1026029808291529

x = [1, 2, 3, 6, 3, 1, 2, 1, 1, 1, 3, 4];
y = [2, 1, 2, 3, 4, 2, 1, 2, 3, 5];

result = EqualVarianceTTest(x, y)

Two sample t-test (equal variance)
----------------------------------
Population details:
    parameter of interest:   Mean difference
    value under h_0:         0
    point estimate:          -0.166667
    95% confidence interval: (-1.448, 1.115)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.7889

Details:
    number of observations:   [12,10]
    t-statistic:              -0.271312734545191
    degrees of freedom:       20
    empirical standard error: 0.6142972497994038

pvalue(result) # tail=:both 両側検定がデフォルト

0.788931157688499

pvalue(result, tail=:left) # 棄却域が左にある片側検定

0.3944655788442495

pvalue(result, tail=:right) # 棄却域が右にある片側検定

0.6055344211557505

confint(result) #デフォルトで両側に棄却域がある場合の信頼率 95% の信頼区間

(-1.448068275504171, 1.114734942170838)

confint(result, level=0.99) # 信頼率 99%

(-1.9145510251333062, 1.5812176917999732)

confint(result, tail=:left, level=0.99) # 片側と信頼率の指定

(-Inf, 1.386262653673622)

result.t

-0.271312734545191

result.df

20

result.n_x, result.n_y

(12, 10)

result.xbar, result.μ0

(-0.16666666666666652, 0)

result.stderr

0.6142972497994038

x = [1, 2, 3, 6, 3, 1, 2, 1, 1, 1, 3, 4];
y = [2, 1, 2, 3, 4, 2, 1, 2, 3, 5];

result = UnequalVarianceTTest(x, y)

Two sample t-test (unequal variance)
------------------------------------
Population details:
    parameter of interest:   Mean difference
    value under h_0:         0
    point estimate:          -0.166667
    95% confidence interval: (-1.424, 1.09)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.7849

Details:
    number of observations:   [12,10]
    t-statistic:              -0.2765775336645321
    degrees of freedom:       19.99676443938765
    empirical standard error: 0.6026037778933294

pvalue(result) # tail=:both 両側検定がデフォルト

0.784943053185432

pvalue(result, tail=:left) # 棄却域が左にある片側検定

0.392471526592716

pvalue(result, tail=:right) # 棄却域が右にある片側検定

0.607528473407284

confint(result) #デフォルトで両側に棄却域がある場合の信頼率 95% の信頼区間

(-1.423689159406056, 1.0903558260727229)

confint(result, level=0.99) # 信頼率 99%

(-1.8813078766019782, 1.5479745432686451)

confint(result, tail=:left, level=0.99) # 片側と信頼率の指定

(-Inf, 1.3567230542137687)

result.t

-0.2765775336645321

result.df

19.99676443938765

result.n_x, result.n_y

(12, 10)

result.xbar, result.μ0

(-0.16666666666666652, 0)

result.stderr

0.6026037778933294

result.t

-0.2765775336645321

x = [1, 2, 3, 6, 3, 1, 2, 1, 1, 1, 3, 4];
y = [2, 1, 2, 3, 4, 2, 1, 2, 3, 5];

approx_result = ApproximateMannWhitneyUTest(x, y)

Approximate Mann-Whitney U test
-------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          0.0

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.6334

Details:
    number of observations in each group: [12, 10]
    Mann-Whitney-U statistic:             52.5
    rank sums:                            [137.0, 116.0]
    adjustment for ties:                  672.0
    normal approximation (μ, σ):          (-7.5, 14.6784)

using RCall
R"""
x = c(1, 2, 3, 6, 3, 1, 2, 1, 1, 1, 3, 4)
y = c(2, 1, 2, 3, 4, 2, 1, 2, 3, 5)
wilcox.test(x, y)
"""

┌ Warning: RCall.jl: wilcox.test.default(x, y) で警告がありました:
│   タイがあるため、正確な p 値を計算することができません
└ @ RCall ~/.julia/packages/RCall/0ggIQ/src/io.jl:172

RObject{VecSxp}

	Wilcoxon rank sum test with continuity correction

data:  x and y
W = 52.5, p-value = 0.6334388940675
alternative hypothesis: true location shift is not equal to 0

exact_result = ExactMannWhitneyUTest(x, y)

Exact Mann-Whitney U test
-------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          0.0

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.6305

Details:
    number of observations in each group: [12, 10]
    Mann-Whitney-U statistic:             52.5
    rank sums:                            [137.0, 116.0]
    adjustment for ties:                  672.0

using RCall
R"""
x = c(1, 2, 3, 6, 3, 1, 2, 1, 1, 1, 3, 4)
y = c(2, 1, 2, 3, 4, 2, 1, 2, 3, 5)
val = c(x, y)
g = factor(rep(c("a", "b"), c(length(x), length(y))))
df = data.frame(val, g)
library(coin)
wilcox_test(val ~ g, data=df, distribution="exact")
"""

┌ Warning: RCall.jl: 要求されたパッケージ survival をロード中です
└ @ RCall ~/.julia/packages/RCall/0ggIQ/src/io.jl:172

RObject{S4Sxp}

	Exact Wilcoxon-Mann-Whitney Test

data:  val by g (a, b)
Z = -0.51095591724442, p-value = 0.6326831063673
alternative hypothesis: true mu is not equal to 0

x = [1.99, 14.93, 7.51, 11.58, 4.75, 3.86, 2.74, 5.66, 11.32, 4.63, 8.89, 17.99]
y = [3.04, 19.18, 13.72, 19.31, 15.48, 17.92, 16.94, 7.99, 18.75, 5.4]
approx_result = ApproximateMannWhitneyUTest(x, y)

Approximate Mann-Whitney U test
-------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          -9.625

Test summary:
    outcome with 95% confidence: reject h_0
    two-sided p-value:           0.0321

Details:
    number of observations in each group: [12, 10]
    Mann-Whitney-U statistic:             27.0
    rank sums:                            [164.0, 89.0]
    adjustment for ties:                  0.0
    normal approximation (μ, σ):          (-33.0, 15.1658)

exact_result = ExactMannWhitneyUTest(x, y)

Exact Mann-Whitney U test
-------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          -9.625

Test summary:
    outcome with 95% confidence: reject h_0
    two-sided p-value:           0.0300

Details:
    number of observations in each group: [12, 10]
    Mann-Whitney-U statistic:             27.0
    rank sums:                            [164.0, 89.0]
    adjustment for ties:                  0.0

pvalue(approx_result) |> println
pvalue(exact_result)  |> println

0.03211417930790749
0.029960751322980424

using RCall
R"""
x = c(1.99, 14.93, 7.51, 11.58, 4.75, 3.86, 2.74, 5.66, 11.32, 4.63, 8.89, 17.99)
y = c(3.04, 19.18, 13.72, 19.31, 15.48, 17.92, 16.94, 7.99, 18.75, 5.4)
val = c(x, y)
g = factor(rep(c("a", "b"), c(length(x), length(y))))
df = data.frame(val, g)
library(coin)
wilcox_test(val ~ g, data=df, distribution="exact")
"""

RObject{S4Sxp}

	Exact Wilcoxon-Mann-Whitney Test

data:  val by g (a, b)
Z = -2.1759555622061, p-value = 0.02996075132298
alternative hypothesis: true mu is not equal to 0

using StatsBase # tiedrank
function ExactU(x, y)
    z = tiedrank(vcat(x, y...))
    length(z) > 10 && throw(ArgumentError("sample size must be less than 11."))
    pvalue(ExactPermutationTest(z[1:length(x)], z[length(x)+1:end], mean))
end

x = [1, 2, 3, 6, 3];
y = [2, 1, 2, 3, 4];
ExactU(x, y) # p value = 0.7142857142857143

0.7142857142857143

using HypothesisTests

x = [3, 1, 5, 4, 2, 2, 4, 1, 5, 4]
y = [1, 1, 3, 2, 4, 1, 3, 2, 4, 2]
result = SignTest(x, y)

Sign Test
---------
Population details:
    parameter of interest:   Median
    value under h_0:         0.0
    point estimate:          1.0
    95% confidence interval: (0.0, 2.0)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.1797

Details:
    number of observations:       9
    observations larger than 0.0: 7

result2 = SignTest([1, 0, 1, 1, -1, 1, 1, -1, 1, 1])

Sign Test
---------
Population details:
    parameter of interest:   Median
    value under h_0:         0.0
    point estimate:          1.0
    95% confidence interval: (0.0, 1.0)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.1797

Details:
    number of observations:       9
    observations larger than 0.0: 7

using HypothesisTests
result3 = BinomialTest(7, 9)

Binomial test
-------------
Population details:
    parameter of interest:   Probability of success
    value under h_0:         0.5
    point estimate:          0.777778
    95% confidence interval: (0.3999, 0.9719)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.1797

Details:
    number of observations: 9
    number of successes:    7

pvalue(result)  |> println
pvalue(result2) |> println
pvalue(result3) |> println

0.17968750000000006
0.17968750000000006
0.17968750000000006

pvalue(result) # デフォルトで両側検定

0.17968750000000006

pvalue(result, tail=:left) # 棄却域が左にある場合の片側検定

0.98046875

pvalue(result, tail=:right) # 棄却域が右にある場合の片側検定

0.08984375000000003

confint(result) # デフォルトで両側に棄却域がある場合で，信頼率が 95% の信頼区間

(0, 2)

confint(result, level=0.99) # 信頼率の指定

(-1, 2)

confint(result, tail=:left, level=0.99) # 棄却域が左にある場合の信頼区間

(-1, 0.0)

confint(result, tail=:right, level=0.99) # 棄却域が右にある場合の信頼区間

(0.0, 2)

using HypothesisTests

x = [3, 1, 5, 4, 2, 2, 4, 1, 5, 4]
y = [1, 1, 3, 2, 4, 1, 3, 2, 4, 2]
result = SignedRankTest(x, y)

Exact Wilcoxon signed rank test
-------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          1.0
    95% confidence interval: (0.0, 2.0)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.1562

Details:
    number of observations:      10
    Wilcoxon rank-sum statistic: 35.5
    rank sums:                   [35.5, 9.5]
    adjustment for ties:         180.0

z = x .- y
result2 = SignedRankTest(z)

Exact Wilcoxon signed rank test
-------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          1.0
    95% confidence interval: (0.0, 2.0)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.1562

Details:
    number of observations:      10
    Wilcoxon rank-sum statistic: 35.5
    rank sums:                   [35.5, 9.5]
    adjustment for ties:         180.0

pvalue(result)

0.15625

pvalue(result, tail=:left)

0.9609375

pvalue(result, tail=:right)

0.078125

confint(result)

(0.0, 2.0)

confint(result, level=0.99)

(-1.0, 2.0)

confint(result, tail=:left, level=0.99)

(-0.5, Inf)

confint(result, tail=:right, level=0.99)

(-Inf, 2.0)

using Random; Random.seed!(123)
x = randn(100)
y = randn(100) .+ 0.1
result3 = ApproximateSignedRankTest(x, y)

Approximate Wilcoxon signed rank test
-------------------------------------
Population details:
    parameter of interest:   Location parameter (pseudomedian)
    value under h_0:         0
    point estimate:          0.0187289
    95% confidence interval: (-0.3522, 0.1781)

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.5763

Details:
    number of observations:      100
    Wilcoxon rank-sum statistic: 2362.0
    rank sums:                   [2362.0, 2688.0]
    adjustment for ties:         0.0
    normal approximation (μ, σ): (-163.0, 290.839)

result3.mu, result3.sigma

(-163.0, 290.8393027085576)

using Distributions
z = result3.mu / result3.sigma

-0.5604469495078456

2*cdf(Normal(), z)

0.5751746153622865

pvalue(result3) # 一致しない？

0.576347512365529

using Statistics, StatsBase, Distributions
roundn(x, digits) = round(x, digits=digits)
formatp(p) = p < 0.00001 ? "< 0.00001" : "= " * string(roundn(p, 5))
index(method) = indexin(method[1], ['p', 's', 'k'])[1]
function CorTest(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}; level=0.95, method="pearson")
    func = [cor, corspearman, corkendall][index(method)]
    CorTest(length(x), func(x, y); level, method)
end
function CorTest(n::Int, r::Float64; level=0.95, method="pearson")
    if method != "kendall"
        t = r * sqrt((n-2)/(1-r^2))
        df = n-2
        p = 2ccdf(TDist(df), abs(t))
        q = quantile(Normal(), 0.5 - 0.5level)
        conf = tanh.(atanh(r) .+ [q, -q] ./ sqrt(n-3))
        println("n = $n, r = $(roundn(r, 5)), t = $(roundn(t, 5)), df = $df, p-value $(formatp(p))")
        println("$(Int(100level)) percent confidence interval = $(roundn.(conf, 5))")
    else
        z = r / sqrt((4n + 10) / (9n * (n - 1)))
        p = 2ccdf(Normal(), abs(z))
        println("n = $n, r = $(roundn(r, 5)), z = $(roundn(z, 5)), p-value $(formatp(p))")
    end
end;

x = [58.3, 43.6, 49.9, 64.1, 51.6, 36.6, 58.7, 66.7, 50.7, 53.1, 
     56.1, 42.0, 45.5, 30.9, 42.3]
y = [46.3, 51.5, 64.5, 55.6, 63.9, 35.6, 61.2, 56.9, 44.2, 57.3, 
     42.7, 47.1, 32.4, 39.7, 51.0]
CorTest(x, y)

n = 15, r = 0.52225, t = 2.20803, df = 13, p-value = 0.04582
95 percent confidence interval = [0.01363, 0.81616]

CorTest(x, y, method="spearman")

n = 15, r = 0.47143, t = 1.92737, df = 13, p-value = 0.07607
95 percent confidence interval = [-0.05384, 0.79234]

CorTest(x, y, method="kendall")

n = 15, r = 0.31429, z = 1.63308, p-value = 0.10245

CorTest(x, y, level=0.99) # 信頼率の設定は，level= で指定する

n = 15, r = 0.52225, t = 2.20803, df = 13, p-value = 0.04582
99 percent confidence interval = [-0.16269, 0.86753]

CorTest(length(x), cor(x, y))

n = 15, r = 0.52225, t = 2.20803, df = 13, p-value = 0.04582
95 percent confidence interval = [0.01363, 0.81616]

CorTest(24, 0.476, level=0.9)

n = 24, r = 0.476, t = 2.53869, df = 22, p-value = 0.01871
90 percent confidence interval = [0.15754, 0.70478]

using HypothesisTests

g1 = [27.7, 45.2, 32.8, 49.5, 31.0, 55.5, 31.7, 53.9];
g2 = [38.0, 47.4, 55.3, 52.5, 39.2, 34.9];
g3 = [44.0, 29.8, 42.5, 23.4, 20.7, 57.3, 42.0, 56.8, 44.1, 32.7];

result = OneWayANOVATest(g1, g2, g3)

One-way analysis of variance (ANOVA) test
-----------------------------------------
Population details:
    parameter of interest:   Means
    value under h_0:         "all equal"
    point estimate:          NaN

Test summary:
    outcome with 95% confidence: fail to reject h_0
    p-value:                     0.6612

Details:
    number of observations: [8, 6, 10]
    F statistic:            0.421994
    degrees of freedom:     (2, 21)

using Statistics, Distributions

function EqualVarianceOneWayANOVATest(x::AbstractVector{<:Real}...)
    roundn(x, digits) = round(x, digits=digits)
    formatp(p) = p < 0.00001 ? "< 0.00001" : "= " * string(roundn(p, 5))
    X = vcat(x...)
    n, grandmean = length(X), mean(X)
    St = var(X) * (n -1)
    k = length(x)
    ns = zeros(Int, k)
    means = zeros(k)
    for (i, d) in enumerate(x)
        ns[i] = length(d)
        means[i] = mean(d)
    end
    Sb = sum(ni * (meani - grandmean)^2 for (ni, meani) in zip(ns, means))
    Sw = St - Sb
    dfb, dfw = k - 1, n - k
    Vb, Vw = Sb / dfb, Sw / dfw
    F = Vb / Vw
    p = ccdf(FDist(dfb, dfw), F)
    println("F = $(roundn(F, 5)),  df1 = $dfb,  df2 = $dfw,  p-value $(formatp(p))")
end

EqualVarianceOneWayANOVATest (generic function with 1 method)

g1 = [27.7, 45.2, 32.8, 49.5, 31.0, 55.5, 31.7, 53.9];
g2 = [38.0, 47.4, 55.3, 52.5, 39.2, 34.9];
g3 = [44.0, 29.8, 42.5, 23.4, 20.7, 57.3, 42.0, 56.8, 44.1, 32.7];

EqualVarianceOneWayANOVATest(g1, g2, g3)

F = 0.40416,  df1 = 2,  df2 = 21,  p-value = 0.67262

using Statistics, Distributions

function UnequalVarianceOneWayANOVATest(groups::AbstractVector{<:Real}...)
    roundn(x, digits) = round(x, digits=digits)
    formatp(p) = p < 0.00001 ? "< 0.00001" : "= " * string(roundn(p, 5))
    ni = Int[]
    mi = Float64[]
    vi = Float64[]
    for x in groups
        append!(ni, length(x))
        append!(mi, mean(x))
        append!(vi, var(x))
    end
    k = length(ni)
    wi = ni ./ vi
    sum_wi = sum(wi)
    tmp = sum((1 .- wi ./ sum_wi).^2 ./ (ni .- 1)) ./ (k^2 - 1)
    m = sum(wi .* mi) / sum_wi
    df1, df2 = k - 1, 1 / 3tmp
    F = sum(wi .* (mi .- m).^2) / (df1 * (1 + 2 * (k - 2) * tmp))
    p = ccdf(FDist(df1, df2), F)
    println("F = $(roundn(F, 5)),  df1 = $df1,  df2 = $(roundn(df2, 5)),  p-value $(formatp(p))")
end;

UnequalVarianceOneWayANOVATest(g1, g2, g3)

F = 0.51339,  df1 = 2,  df2 = 13.589,  p-value = 0.60962

g1 = [27.7, 45.2, 32.8, 49.5, 31.0, 55.5, 31.7, 53.9];
g2 = [38.0, 47.4, 55.3, 52.5, 39.2, 34.9];
g3 = [44.0, 29.8, 42.5, 23.4, 20.7, 57.3, 42.0, 56.8, 44.1, 32.7];

result = KruskalWallisTest(g1, g2, g3)

Kruskal-Wallis rank sum test (chi-square approximation)
-------------------------------------------------------
Population details:
    parameter of interest:   Location parameters
    value under h_0:         "all equal"
    point estimate:          NaN

Test summary:
    outcome with 95% confidence: fail to reject h_0
    one-sided p-value:           0.7082

Details:
    number of observation in each group: [8, 6, 10]
    χ²-statistic:                        0.69
    rank sums:                           [98.0, 87.0, 115.0]
    degrees of freedom:                  2
    adjustment for ties:                 1.0

result.H

0.6899999999999977

result.df

2

pvalue(result)

0.7082203534678008

using HypothesisTests
using Distributions
using Random; Random.seed!(123)
x = randn(1000);

result = ExactOneSampleKSTest(x, Normal()) # 標準正規分布に從うか

Exact one sample Kolmogorov-Smirnov test
----------------------------------------
Population details:
    parameter of interest:   Supremum of CDF differences
    value under h_0:         0.0
    point estimate:          0.0300133

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.3222

Details:
    number of observations:   1000

pvalue(result) # 両側検定

0.32218030504370365

pvalue(result, tail=:left) # 左片側検定

0.985395153969924

pvalue(result, tail=:right) # 右片側検定

0.16177235356103092

result2 = ApproximateOneSampleKSTest(randn(100), Normal()) # 一様分布に從うか

Approximate one sample Kolmogorov-Smirnov test
----------------------------------------------
Population details:
    parameter of interest:   Supremum of CDF differences
    value under h_0:         0.0
    point estimate:          0.0944015

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.3349

Details:
    number of observations:   100
    KS-statistic:             0.9440152502489629

pvalue(result2) # 両側検定

0.3348891803417554

pvalue(result2, tail=:left) # 左片側検定

0.16824574754375227

pvalue(result2, tail=:right) # 右片側検定

0.9803720417635794

using Distributions
using Random; Random.seed!(456)
x = randn(100);         # 標準正規乱数
y = randn(100) .+ 0.15; # 母平均が 0.15
result = ApproximateTwoSampleKSTest(x, y)

Approximate two sample Kolmogorov-Smirnov test
----------------------------------------------
Population details:
    parameter of interest:   Supremum of CDF differences
    value under h_0:         0.0
    point estimate:          0.2

Test summary:
    outcome with 95% confidence: reject h_0
    two-sided p-value:           0.0366

Details:
    number of observations:   [100,100]
    KS-statistic:              1.4142135623730954

pvalue(result)

0.03663105270711932

pvalue(result, tail=:left)

0.9607894391523232

pvalue(result, tail=:right)

0.018315638888734147

using Distributions
using Random; Random.seed!(123)
x = randn(1000);

result = OneSampleADTest(x, Normal()) # 標準正規分布に從うか

One sample Anderson-Darling test
--------------------------------
Population details:
    parameter of interest:   not implemented yet
    value under h_0:         NaN
    point estimate:          NaN

Test summary:
    outcome with 95% confidence: fail to reject h_0
    one-sided p-value:           0.2543

Details:
    number of observations:   1000
    sample mean:              -0.050027717988504875
    sample SD:                0.9862398192221139
    A² statistic:             1.2358386455035488

pvalue(result)

0.2542809445499802

result.A²

1.2358386455035488

using Distributions
using Random; Random.seed!(456)
x = randn(100);         # 標準正規乱数
y = randn(100) .+ 0.15; # 母平均が 0.15
result = KSampleADTest(x, y)

k-sample Anderson-Darling test
------------------------------
Population details:
    parameter of interest:   not implemented yet
    value under h_0:         NaN
    point estimate:          NaN

Test summary:
    outcome with 95% confidence: reject h_0
    one-sided p-value:           0.0040

Details:
    number of samples:        2
    number of observations:   200
    SD of A²k:                0.7541939849473112
    A²k statistic:            4.655692755994917
    standardized statistic:   4.847151832230945
    modified test:            true
    p-value calculation:      asymptotic

pvalue(result)

0.00403630676510518

result.A²k

4.655692755994917

result = KSampleADTest(x, y, modified=false)

k-sample Anderson-Darling test
------------------------------
Population details:
    parameter of interest:   not implemented yet
    value under h_0:         NaN
    point estimate:          NaN

Test summary:
    outcome with 95% confidence: reject h_0
    one-sided p-value:           0.0041

Details:
    number of samples:        2
    number of observations:   200
    SD of A²k:                0.7541939849473112
    A²k statistic:            4.639424885412627
    standardized statistic:   4.825581956433769
    modified test:            false
    p-value calculation:      asymptotic

result = KSampleADTest(x, y, randn(100) .+ 1.0, nsim=10000)

k-sample Anderson-Darling test
------------------------------
Population details:
    parameter of interest:   not implemented yet
    value under h_0:         NaN
    point estimate:          NaN

Test summary:
    outcome with 95% confidence: reject h_0
    one-sided p-value:           <1e-99

Details:
    number of samples:        3
    number of observations:   300
    SD of A²k:                1.0681421196124696
    A²k statistic:            38.555683139480266
    standardized statistic:   34.22361357002096
    modified test:            true
    p-value calculation:      simulation
    number of simulations:    10000

using HypothesisTests
using Distributions
using Random; Random.seed!(456)
x = rand(1:5, 100);
result = WaldWolfowitzTest(x)

Wald-Wolfowitz Test
-------------------
Population details:
    parameter of interest:   Number of runs
    value under h_0:         48.58
    point estimate:          53

Test summary:
    outcome with 95% confidence: fail to reject h_0
    two-sided p-value:           0.3502

Details:
    number of runs:  53
    z-statistic:     0.9341742794043235

pvalue(result)

0.350214000986417

pvalue(result, tail=:left)

0.8248929995067915

pvalue(result, tail=:right)

0.1751070004932085

result.nabove, result.nbelow, result.nruns, result.μ, result.σ, result.z

(61, 39, 53, 48.58, 4.731451183625411, 0.9341742794043235)

(result.nruns - result.μ) / result.σ # = result.z

0.9341742794043235

using Distributions
2ccdf(Normal(), result.z) # = pvalur(result)

0.350214000986417

using Statistics
x = [2, 1, 3, 2, 5];
y = [4, 3, 2, 5, 4];
result = ExactPermutationTest(x, y, mean)

Permutation Test
----------------
Population details:
    parameter of interest:   not implemented yet
    value under h_0:         NaN
    point estimate:          NaN

Test summary:
    outcome with 95% confidence: fail to reject h_0
    p-value:                     0.3730

Details:
    observation: -1.0
    samples: [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0  …  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

pvalue(result)

0.373015873015873

pvalue(result, tail=:left)

0.1865079365079365

pvalue(result, tail=:right)

0.9126984126984127

result.observation

-1.0

first(result.samples, 5)

5-element Vector{Float64}:
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0

using Plots
histogram(result.samples)

using FreqTables
freq = freqtable(result.samples)

12-element Named Vector{Int64}
Dim1  │ 
──────┼───────
-2.2  │  28800
-1.8  │  72000
-1.4  │ 216000
-1.0  │ 360000
-0.6  │ 504000
-0.2  │ 633600
0.2   │ 633600
0.6   │ 504000
1.0   │ 360000
1.4   │ 216000
1.8   │  72000
2.2   │  28800

n = sum(freq)

3628800

leftn = sum(freq[names(freq)[1] .<= -1])

676800

leftn/n

0.1865079365079365

result = ExactPermutationTest(x, y, median)

Permutation Test
----------------
Population details:
    parameter of interest:   not implemented yet
    value under h_0:         NaN
    point estimate:          NaN

Test summary:
    outcome with 95% confidence: fail to reject h_0
    p-value:                     0.3333

Details:
    observation: -2.0
    samples: [-2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0, -2.0  …  2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]

result = ApproximatePermutationTest(x, y, mean, 10000)

Permutation Test
----------------
Population details:
    parameter of interest:   not implemented yet
    value under h_0:         NaN
    point estimate:          NaN

Test summary:
    outcome with 95% confidence: fail to reject h_0
    p-value:                     0.3710

Details:
    observation: -1.0
    samples: [1.0, -0.2, 0.2, -0.6, 0.6, -1.0, -0.6, 0.6, 1.8, -1.0  …  -1.0, 0.6, -0.2, -0.2, 0.2, 0.2, -0.2, -1.4, -1.0, -0.2]

using Plots
histogram(result.samples)

freqtable(result.samples)

12-element Named Vector{Int64}
Dim1  │ 
──────┼─────
-2.2  │   79
-1.8  │  172
-1.4  │  609
-1.0  │ 1039
-0.6  │ 1432
-0.2  │ 1661
0.2   │ 1761
0.6   │ 1436
1.0   │  974
1.4   │  582
1.8   │  169
2.2   │   86

mean(abs.(result.samples) .>=  abs(result.observation)) # -1 以下，または 1 以上の割合が $p$ 値

0.371

lambda	適用される検定統計量
λ = 1	ピアソンの $\chi^2$ 検定統計量
λ → 0	対数尤度比検定統計量 $G^2$ に収束
λ → −1	最小判別情報量統計量に収束(Gokhale and Kullback, 1978)
λ = −2	修正された $\chi^2$ 検統計量(Neyman, 1949)
λ = −1/2	Freeman-Tukey 統計量(Freeman and Tukey, 1950)

`method`	相関係数
`"pearson"`	ピアソンの積率相関係数
`"spearman"`	スピアマンの順位相関係数
`"kendall"`	ケンドールの順位相関係数

Julia で統計解析　第 7 章　検定と推定¶

検定関数関数の呼び出し方¶

検定関数関数により得られる結果の利用法¶

分布の検定¶

観察度数が一様かどうかの検定¶

ピアソンの $\chi^2$ 検定¶

対数尤度比検定（$G^2$ 検定）¶

観察度数が理論比に從うかどうかの検定¶

ピアソンの $\chi^2$ 検定¶

対数尤度比検定（$G^2$ 検定）¶

独立性の検定¶

ピアソンの $\chi^2$ 検定¶

対数尤度比検定（$G^2$ 検定）¶

パワーダイバージェンス検定¶

フィッシャーの正確検定¶

二項検定¶

$t$ 検定¶

一標本の検定（母平均の検定）¶

等分散の場合の $t$ 検定¶

等分散でない場合 Welch の方法¶

マン・ホイットニーの $U$ 検定¶

符号検定¶

ウィルコクソンの符号付き順位和検定¶

相関係数の検定¶

対応のない $k$ 標本（独立 $k$ 標本）¶

一元元配置分散分析¶

一元元配置分散分析（ウェルチの方法）¶

クラスカル・ウォリス検定¶

コルモゴロフ・スミルノフ検定¶

1 標本の分布の検定¶

2 標本の分布の差の検定¶

アンダーソン・ダーリング検定¶

1 標本の場合¶

$k$ 標本の場合¶

ワルド・ウォルフォビッツ連検定¶

並べ替え検定（無作為検定）¶

Julia で統計解析 第 7 章 検定と推定¶

検定関数関数の呼び出し方¶

検定関数関数により得られる結果の利用法¶

分布の検定¶

観察度数が一様かどうかの検定¶

ピアソンの $\chi^2$ 検定¶

対数尤度比検定（$G^2$ 検定）¶

観察度数が理論比に從うかどうかの検定¶

ピアソンの $\chi^2$ 検定¶

対数尤度比検定（$G^2$ 検定）¶

独立性の検定¶

ピアソンの $\chi^2$ 検定¶

対数尤度比検定（$G^2$ 検定）¶

パワーダイバージェンス検定¶

フィッシャーの正確検定¶

二項検定¶

$t$ 検定¶

一標本の検定（母平均の検定）¶

等分散の場合の $t$ 検定¶

等分散でない場合 Welch の方法¶

マン・ホイットニーの $U$ 検定¶

符号検定¶

ウィルコクソンの符号付き順位和検定¶

相関係数の検定¶

対応のない $k$ 標本（独立 $k$ 標本）¶

一元元配置分散分析¶

一元元配置分散分析（ウェルチの方法）¶

クラスカル・ウォリス検定¶

コルモゴロフ・スミルノフ検定¶

1 標本の分布の検定¶

2 標本の分布の差の検定¶

アンダーソン・ダーリング検定¶

1 標本の場合¶

$k$ 標本の場合¶

ワルド・ウォルフォビッツ連検定¶

並べ替え検定（無作為検定）¶

Julia で統計解析　第 7 章　検定と推定¶