Artifact [5ab190bcc6]
Not logged in

## Artifact 5ab190bcc61f4e9c54593d412f18f449e2db3dc4:

``````# statistics_new.tcl --
#     Implementation of the Wilcoxon test: test if the medians
#     of two samples are the same
#

# test-Wilcoxon
#     Compute the statistic that indicates if the medians of two
#     samples are the same
#
# Arguments:
#     sample_a       List of values in the first sample
#     sample_b       List of values in the second sample
#
# Result:
#     Statistic for the test (if both samples have 10 or more
#     values, the statistic behaves as a standard normal variable)
#
proc ::math::statistics::test-Wilcoxon {sample_a sample_b} {

#
# Construct the sorted list for both
#
set sorted  {}
set count_a 0
set count_b 0
foreach sample {sample_a sample_b} code {0 1} count {count_a count_b} {
foreach v [set \$sample] {
if { \$v ne {} } {
incr \$count
lappend sorted [list \$v \$code]
}
}
}

set raw_sorted [lsort -index 0 -real \$sorted]

#
# Resolve the ties (TODO)
# - Make sure the previous value is never equal to the first
# - Take care of the last part of the sorted samples
#
set previous [expr {0.5*[lindex \$raw_sorted 0 0] - 1.0}]

set sorted    \$raw_sorted
set rank      0
set sum_ranks 0
set count     0
set first     0
set index     0
foreach v [concat \$raw_sorted {{} -1}] {
set  sum_ranks [expr {\$sum_ranks + \$rank}]
incr count
set  current [lindex \$v 0]
if { \$current != \$previous } {
set new_rank [expr {\$sum_ranks / \$count}]

if { \$index > [llength \$raw_sorted] } {
set index [llength \$raw_sorted]
}

for {set elem \$first} {\$elem < \$index} {incr elem} {
lset sorted \$elem 0 \$new_rank
}

set previous  \$current
set first     \$index
set count     0
set sum_ranks 0
}

incr index
incr rank
}

#
# Sum the ranks for the first sample and determine
# the statistic
#
if { \$count_a < 2 || \$count_b < 2 } {
return -code error \
-errorcode DATA -errorinfo {Too few data in one or both samples}
}

set sum  0
foreach v \$sorted {
if { [lindex \$v 1] == 0 } {
set rank [lindex \$v 0]
set sum  [expr {\$sum + \$rank}]
}
}

set expected  [expr {\$count_a * (\$count_a + \$count_b + 1)/2.0}]
set stdev     [expr {sqrt(\$count_b * \$expected/6.0)}]
set statistic [expr {(\$sum-\$expected)/\$stdev}]

return \$statistic
}

# SpearmanRankData --
#     Auxiliary procedure to rank the data
#
# Arguments:
#     sample             Series of data to be ranked
#
# Returns:
#     Ranks of the data
#
proc ::math::statistics::SpearmanRankData {sample} {

set counted_sample {}
set count          0
foreach v \$sample {
if { \$v ne {} } {
incr count
lappend counted_sample [list \$v 0 \$count]
}
}

set raw_sorted [lsort -index 0 -real \$counted_sample]

#
# Resolve the ties (TODO)
# - Make sure the previous value is never equal to the first
# - Take care of the last part of the sorted samples
#
set previous [expr {0.5*[lindex \$raw_sorted 0 0] - 1.0}]

set sorted    \$raw_sorted
set rank      0
set sum_ranks 0
set count     0
set first     0
set index     0
foreach v [concat \$raw_sorted {{} -1}] {
set  sum_ranks [expr {\$sum_ranks + \$rank}]
incr count
set  current [lindex \$v 0]
if { \$current != \$previous } {
set new_rank [expr {\$sum_ranks / \$count}]

if { \$index > [llength \$raw_sorted] } {
set index [llength \$raw_sorted]
}

for {set elem \$first} {\$elem < \$index} {incr elem} {
lset sorted \$elem 1 \$new_rank
}

set previous  \$current
set first     \$index
set count     0
set sum_ranks 0
}

incr index
incr rank
}

#
# Return the ranks of the data in the original order
#
set ranks {}
foreach values [lsort -index 2 -integer \$sorted] {
lappend ranks [lindex \$values 1]
}

return \$ranks
}

# spearman-rank-extended --
#     Compute the Spearman's rank correlation coefficient and
#     associated parameters
#
# Arguments:
#     sample_a       List of values in the first sample
#     sample_b       List of values in the second sample
#
# Result:
#     List of:
#     - Rank correlation coefficient
#     - Number of data
#     - z-score to test the null hyothesis
#
proc ::math::statistics::spearman-rank-extended {sample_a sample_b} {

#
# Filter out missing data
#
if { [llength \$sample_a] != [llength \$sample_b] } {
return -code error \
-errorcode DATA -errorinfo {The two samples should have the same number of data}
}

set new_sample_a {}
set new_sample_b {}
foreach a \$sample_a b \$sample_b {
if { \$a != {} && \$b != {} } {
lappend new_sample_a \$a
lappend new_sample_b \$b
}
}

#
# Construct the ranks
#
set rank_a [SpearmanRankData \$new_sample_a]
set rank_b [SpearmanRankData \$new_sample_b]

set rcorr  [corr \$rank_a \$rank_b]
set number [llength \$new_sample_a]
set zscore [expr {sqrt((\$number-3)/1.06) * 0.5 * log((1.0+\$rcorr)/(1.0-\$rcorr))}]

return [list \$rcorr \$number \$zscore]
}

# spearman-rank --
#     Compute the Spearman's rank correlation coefficient
#
# Arguments:
#     sample_a       List of values in the first sample
#     sample_b       List of values in the second sample
#
# Result:
#     Rank correlation coefficient
#
proc ::math::statistics::spearman-rank {sample_a sample_b} {
return [lindex [spearman-rank-extended \$sample_a \$sample_b] 0]
}
```
```