ANOVA toolbox

ANOVA toolbox Commit Details

Date:2013-08-02 12:08:36 (5 years 1 month ago)
Author:Holger Nahrstaedt
Commit:1
Message:initial commit
Changes:
A/macros/anova_manova.sci
A/demos
A/macros/anova_anova1.sci
A/etc/anova.start
A/etc
A/readme.txt
A/tests/nonreg_tests
A/help/en_US/build_help.sce
A/macros
A/macros/cleanmacros.sce
A/help
A/demos/anova.dem.gateway.sce
A/demos/anova_anova1_demo.sce
A/builder.sce
A/help/builder_help.sce
A/etc/anova.quit
A/license.txt
A/help/en_US
A/macros/buildmacros.sce
A/tests
A/tests/unit_tests
A/help/en_US/update_help.sce
A/changelog.txt

File differences

macros/buildmacros.sce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// Copyright (C) 2012 - Michael Baudin
// Copyright (C) 2008-2009 - INRIA - Michael Baudin
//
// This file must be used under the terms of the CeCILL.
// This source file is licensed as described in the file COPYING, which
// you should have received as part of this distribution. The terms
// are also available at
// http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
//
// buildmacros.sce --
// Builder for the Statistical Hypothesis Testing Scilab Toolbox
//
function hypt_buildMacros()
path = get_absolute_file_path("buildmacros.sce")
tbx_build_macros("hypt", path);
endfunction
hypt_buildMacros();
clear hypt_buildMacros;
macros/cleanmacros.sce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// ====================================================================
// Allan CORNET
// DIGITEO 2009
// This file is released into the public domain
// ====================================================================
libpath = get_absolute_file_path('cleanmacros.sce');
binfiles = ls(libpath+'/*.bin');
for i = 1:size(binfiles,'*')
mdelete(binfiles(i));
end
mdelete(libpath+'/names');
mdelete(libpath+'/lib');
// ====================================================================
macros/anova_manova.sci
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
function anova_manova (x, g)
// Copyright (C) 1996-2011 Kurt Hornik
//
// This file is part of Octave.
//
// Octave is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or (at
// your option) any later version.
//
// Octave is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Octave; see the file COPYING. If not, see
// <http://www.gnu.org/licenses/>
// -*- texinfo -*-
// @deftypefn {Function File} {} manova (@var{x}, @var{g})
// Perform a one-way multivariate analysis of variance (MANOVA). The
// goal is to test whether the p-dimensional population means of data
// taken from @var{k} different groups are all equal. All data are
// assumed drawn independently from p-dimensional normal distributions
// with the same covariance matrix.
//
// The data matrix is given by @var{x}. As usual, rows are observations
// and columns are variables. The vector @var{g} specifies the
// corresponding group labels (e.g., numbers from 1 to @var{k}).
//
// The LR test statistic (Wilks' Lambda) and approximate p-values are
// computed and displayed.
// @end deftypefn
//
// Three test statistics (Wilks, Hotelling-Lawley, and Pillai-Bartlett)
// and corresponding approximate p-values are calculated and displayed.
// (Currently NOT because the f_cdf respectively betai code is too bad.)
//
// Author: TF <Thomas.Fuereder@ci.tuwien.ac.at>
// Adapted-By: KH <Kurt.Hornik@wu-wien.ac.at>
// Description: One-way multivariate analysis of variance (MANOVA)
[nargout,nargin]=argn(0);
if (nargin ~= 2)
error("wrong number of parameters") ;
end
if (isvector (x))
error ("manova: Y must not be a vector");
end
[n, p] = size (x);
if (~isvector (g) | (length (g) ~= n))
error ("manova: G must be a vector of length rows (Y)");
end
s = sort (g);
i = find (s (2:n) > s(1:(n-1)));
k = length (i) + 1;
if (k == 1)
error ("manova: there should be at least 2 groups");
else
group_label = s ([1, (matrix (i, 1, k - 1) + 1)]);
end
x = x - ones (n, 1) * mean (x);
SST = x' * x;
s = zeros (1, p);
SSB = zeros (p, p);
for i = 1 : k;
v = x (find (g == group_label (i)), :);
s = sum (v);
SSB = SSB + s' * s / rows (v);
end
n_b = k - 1;
SSW = SST - SSB;
n_w = n - k;
l = real (spec (SSB / SSW));
// if (isa (l, "single"))
// l (l < eps ("single")) = 0;
// else
l (l < %eps) = 0;
// end
// Wilks' Lambda
// =============
Lambda = prod (1 ./ (1 + l));
delta = n_w + n_b - (p + n_b + 1) / 2;
df_num = p * n_b;
W_pval_1 = 1 - chi2cdf (- delta * log (Lambda), df_num);
if (p < 3)
eta = p;
else
eta = sqrt ((p^2 * n_b^2 - 4) / (p^2 + n_b^2 - 5));
end
df_den = delta * eta - df_num / 2 + 1;
WT = exp (- log (Lambda) / eta) - 1;
W_pval_2 = 1 - f_cdf (WT * df_den / df_num, df_num, df_den);
if 0 then
// Hotelling-Lawley Test
// =====================
HL = sum (l);
theta = min (p, n_b);
u = (abs (p - n_b) - 1) / 2;
v = (n_w - p - 1) / 2;
df_num = theta * (2 * u + theta + 1);
df_den = 2 * (theta * v + 1);
HL_pval = 1 - f_cdf (HL * df_den / df_num, df_num, df_den);
// Pillai-Bartlett
// ===============
PB = sum (l ./ (1 + l));
df_den = theta * (2 * v + theta + 1);
PB_pval = 1 - f_cdf (PB * df_den / df_num, df_num, df_den);
printf ("\n");
printf ("One-way MANOVA Table:\n");
printf ("\n");
printf ("Test Test Statistic Approximate p\n");
printf ("**************************************************\n");
printf ("Wilks %10.4f %10.9f \n", Lambda, W_pval_1);
printf (" %10.9f \n", W_pval_2);
printf ("Hotelling-Lawley %10.4f %10.9f \n", HL, HL_pval);
printf ("Pillai-Bartlett %10.4f %10.9f \n", PB, PB_pval);
printf ("\n");
end
printf ("\n");
printf ("MANOVA Results:\n");
printf ("\n");
printf ("# of groups: %d\n", k);
printf ("# of samples: %d\n", n);
printf ("# of variables: %d\n", p);
printf ("\n");
printf ("Wilks'' Lambda: %5.4f\n", Lambda);
printf ("Approximate p: %10.9f (chisquare approximation)\n", W_pval_1);
printf (" %10.9f (F approximation)\n", W_pval_2);
printf ("\n");
endfunction
macros/anova_anova1.sci
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
// Copyright (C) 2013 - Holger Nahrstaedt
// Copyright (C) 1995-2011 Kurt Hornik
//
// This file must be used under the terms of the CeCILL.
// This source file is licensed as described in the file COPYING, which
// you should have received as part of this distribution. The terms
// are also available at
// http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
function [pval, f, df_b, df_w] = anova_anova1 (y, g,alpha)
// Perform a one-way analysis of variance (ANOVA)
// Calling Sequence
// [pval, f, df_b, df_w] = anova_anova1 (y)
// [pval, f, df_b, df_w] = anova_anova1 (y,alpha)
// [pval, f, df_b, df_w] = anova_anova1 (y,g)
// [pval, f, df_b, df_w] = anova_anova1 (y,g,alpha)
// Parameters
//
// Description
// Perform a one-way analysis of variance (ANOVA). The goal is to test
// whether the population means of data taken from k different
// groups are all equal.
//
// Data may be given in a single vector y with groups specified by
// a corresponding vector of group labels g (e.g., numbers from 1
// to k). This is the general form which does not impose any
// restriction on the number of data in each group or the group labels.
//
// If y is a matrix and g is omitted, each column of y
// is treated as a group. This form is only appropriate for balanced
// ANOVA in which the numbers of samples from each group are all equal.
//
// Under the null of constant means, the statistic f follows an F
// distribution with df_b and df_w degrees of freedom.
//
// The p-value (1 minus the CDF of this distribution at f) is
// returned in pval.
//
// If no output argument is given, the standard one-way ANOVA table is
// printed.
// Examples
// y=[17,25,22,26;19,27,21,24;20,18,19,30;24,22,26,28];
// anova_anova1(y,0.01);
//
// One-way ANOVA Table:
//
// Source of Variation Sum of Squares df Empirical Var
// *********************************************************
// Between Groups 104.0000 3 34.6667
// Within Groups 118.0000 12 9.8333
// ---------------------------------------------------------
// Total 222.0000 15
//
// Test Statistic f 3.5254
// p-value 0.0487
// Fcrit (alpha=0.0100) 5.9525
//
// y_list=list(y(:,1),y(:,2),y(:,3),y(:,4));
// anova_anova1(y_list,0.01);
//
// y_vec=y(:);
// groups=[1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4];
// anova_anova1(y_vec,groups,0.01);
//
// Authors
// Copyright (C) 1995-2011 Kurt Hornik
// H. Nahrstaedt - 2012
[lhs,rhs]=argn();
apifun_checkrhs ( "anova_anova1" , rhs , 1:3 );
apifun_checklhs ( "anova_anova1" , lhs , 0:4 )
if (rhs < 3) & type(y)==[15] then
if (length (y)==1)
error ("anova_anova1: for anova (Y), Y must not be a vector");
end
if rhs==2 then
alpha=g;
else
alpha=[];
end;
k=length(y);
group_count=zeros(1,k);
group_mean=zeros(1,k);
n=0;
y_tmp=y;
y=[];
for i=1:k
group_count(i)=length(thrownan(y_tmp(i)));
group_mean(i) = mean (thrownan(y_tmp(i)));
n=n+group_count(i);
tmp=y_tmp(i);
y=[y;tmp(:)];
end
elseif (~isvector (y)) & rhs<3 & or(type(y)==[1 5 8]) then
if (isvector (y))
error ("anova_anova1: for anova (Y), Y must not be a vector");
end
if rhs==2 then
alpha=g;
else
alpha=[];
end;
[group_count, k] = size (y);
n = group_count * k;
group_mean = nanmean (y,'r');
else
if rhs<3 then
alpha=[];
end;
if (~ isvector (y))
error ("anova_anova1: for anova (Y, G), Y must be a vector");
end
n = length (y);
if (~ isvector (g) | (length (g) ~= n))
error ("anova_anova1: G must be a vector of the same length as Y");
end
// remove parted in which g has nan
y(find(isnan(g)))=[];
g(find(isnan(g)))=[];
n = length (y);
s = mtlb_sort (g);
i = find (s (2 : n) > s(1 : (n-1)));
k = length (i) + 1;
if (k == 1)
error ("anova_anova1: there should be at least 2 groups");
else
group_label = s ([1, (matrix (i, 1, k-1) + 1)]);
end
n=0
group_count=zeros(1,k);
group_mean=zeros(1,k);
y_tmp=y;
y=[];
for i = 1 : k;
v = y_tmp (find (g == group_label (i)));
group_count (i) = length (thrownan(v));
group_mean (i) = nanmean (thrownan(v));
n=n+group_count(i);
y=[y;v(:)];
end
end
total_mean = nanmean (y);
SSB = nansum (group_count .* (group_mean - total_mean) .^ 2);
SST = nansum( (thrownan(y) - total_mean).^2);
SSW = SST - SSB;
df_b = k - 1;
df_w = n - k;
v_b = SSB / df_b;
v_w = SSW / df_w;
f = v_b / v_w;
pval = 1 - distfun_fcdf (f, df_b, df_w);
//if (length(df_b)==1 & length(df_w)==1)
//pval = 1- (1 - nan_betainc (1 ./ (1 + df_b .* f ./ df_w), df_w / 2, df_b / 2));
if ~isempty(alpha)
fcrit=cdff("F",df_b,df_w,1-alpha,alpha)
end;
//if (nargout == 0)
// This eventually needs to be done more cleanly ...
printf ("\n");
printf ("One-way ANOVA Table:\n");
printf ("\n");
printf ("Source of Variation Sum of Squares df Empirical Var\n");
printf ("*********************************************************\n");
printf ("Between Groups %15.4f %4d %13.4f\n", SSB, df_b, v_b);
printf ("Within Groups %15.4f %4d %13.4f\n", SSW, df_w, v_w);
printf ("---------------------------------------------------------\n");
printf ("Total %15.4f %4d\n", SST, n - 1);
printf ("\n");
printf ("Test Statistic f %15.4f\n", f);
printf ("p-value %15.4f\n", pval);
if ~isempty(alpha) then
printf ("Fcrit (alpha=%1.4f) %15.4f\n",alpha,fcrit);
end;
printf ("\n");
//end
endfunction
changelog.txt
1
2
3
4
changelog of the ANOVA Scilab Toolbox
anova (0.1)
* first version
demos/anova.dem.gateway.sce
1
2
3
4
5
6
7
8
9
10
11
12
13
// Copyright (C) 2012 - Michael Baudin
//
// This file must be used under the terms of the CeCILL.
// This source file is licensed as described in the file COPYING, which
// you should have received as part of this distribution. The terms
// are also available at
// http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
demopath = get_absolute_file_path("hypt.dem.gateway.sce");
subdemolist = [
"anova1 demo", "anova_anova1_demo.sce"; ..
];
subdemolist(:,2) = demopath + subdemolist(:,2)
demos/anova_anova1_demo.sce
1
2
3
4
mode(1);
X = meshgrid(1:5);
X = X + distfun_normrnd(0,1,5,5);
p = anova_anova1(X)
help/builder_help.sce
1
2
3
4
5
6
7
8
9
// ====================================================================
// Copyright INRIA 2008
// This file is released into the public domain
// ====================================================================
help_dir = get_absolute_file_path('builder_help.sce');
tbx_builder_help_lang("en_US", help_dir);
clear help_dir;
help/en_US/build_help.sce
1
2
3
4
5
6
7
8
9
10
11
12
// ====================================================================
// Copyright INRIA 2008
// Allan CORNET
// Simon LIPP
// This file is released into the public domain
// ====================================================================
help_lang_dir = get_absolute_file_path('build_help.sce');
tbx_build_help(TOOLBOX_TITLE, help_lang_dir);
clear help_lang_dir;
help/en_US/update_help.sce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
// Copyright (C) 2010 - 2011 - Michael Baudin
//
// This file must be used under the terms of the CeCILL.
// This source file is licensed as described in the file COPYING, which
// you should have received as part of this distribution. The terms
// are also available at
// http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
// Updates the .xml files by deleting existing files and
// creating them again from the .sci with help_from_sci.
//
cwd = get_absolute_file_path("update_help.sce");
mprintf("Working dir = %s\n",cwd);
//
// Generate the library help
mprintf("Updating hypt\n");
helpdir = cwd;
funmat = [
"hypt_ttest"
"hypt_ttest2"
"hypt_vartest"
"hypt_vartest2"
"hypt_signtest"
"hypt_signrank"
"hypt_ztest"
"hypt_barttest"
];
macrosdir = cwd +"../../macros";
demosdir = [];
modulename = "hypt";
helptbx_helpupdate ( funmat , helpdir , macrosdir , demosdir , modulename , %t );
etc/anova.start
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
// Copyright (C) 2012 - Michael Baudin
// Copyright (C) 2010 - DIGITEO - Michael Baudin
//
// This file must be used under the terms of the CeCILL.
// This source file is licensed as described in the file COPYING, which
// you should have received as part of this distribution. The terms
// are also available at
// http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
function anovalib = anova_load()
TOOLBOX_NAME = "anova"
TOOLBOX_TITLE = "ANOVA toolbox"
mprintf("Start %s\n",TOOLBOX_TITLE);
etc_tlbx = get_absolute_file_path(TOOLBOX_NAME+".start");
etc_tlbx = getshortpathname(etc_tlbx);
root_tlbx = strncpy( etc_tlbx, length(etc_tlbx)-length("\etc\") );
//Load functions library
// =============================================================================
mprintf("\tLoad macros\n");
pathmacros = pathconvert( root_tlbx ) + "macros" + filesep();
anovalib = lib(pathmacros);
clear pathmacros;
// load gateways
// =============================================================================
if ( %f ) then
mprintf("\tLoad gateways\n");
ilib_verbose(0);
exec( pathconvert(root_tlbx+"/sci_gateway/loader_gateway.sce",%f));
end
// Load and add help chapter
// =============================================================================
if ( %t ) then
if or(getscilabmode() == ["NW";"STD"]) then
mprintf("\tLoad help\n");
path_addchapter = pathconvert(root_tlbx+"/jar");
if ( isdir(path_addchapter) <> [] ) then
add_help_chapter(TOOLBOX_TITLE, path_addchapter, %F);
end
end
end
// add demos
// =============================================================================
if ( %t ) then
if or(getscilabmode() == ["NW";"STD"]) then
mprintf("\tLoad demos\n");
demoscript = TOOLBOX_NAME + ".dem.gateway.sce"
pathdemos = pathconvert(fullfile(root_tlbx,"demos",demoscript),%f,%t);
add_demo(TOOLBOX_TITLE,pathdemos);
end
end
// ====================================================================
// A Welcome message.
//mprintf("\tType ""help anova_overview"" for quick start.\n");
mprintf("\tType ""demo_gui()"" and search for "+TOOLBOX_TITLE+" for Demonstrations.\n");
endfunction
if ( isdef("anovalib") ) then
warning("Library is already loaded (""ulink(); clear anovalib;"" to unload.)");
return;
end
anovalib = anova_load ();
clear anova_load;
readme.txt
1
2
3
4
5
ANOVA module : anova
anova : Perform a one-way analysis of variance (ANOVA) (http://en.wikipedia.org/wiki/Anova)
manova : a one-way multivariate analysis of variance (MANOVA) (http://en.wikipedia.org/wiki/MANOVA)
builder.sce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
// Copyright (C) 2012 - Michael Baudin
//
// This file must be used under the terms of the CeCILL.
// This source file is licensed as described in the file COPYING, which
// you should have received as part of this distribution. The terms
// are also available at
// http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt
function distfun_buildToolbox()
mode(-1);
lines(0);
try
getversion("scilab");
catch
error(gettext("Scilab 5.0 or more is required."));
end;
// ====================================================================
if ~with_module("development_tools") then
error(msprintf(gettext("%s module not installed."),"development_tools"));
end
// ====================================================================
TOOLBOX_NAME = "hypt";
TOOLBOX_TITLE = "Statistical Hypothesis Testing";
// ====================================================================
toolbox_dir = get_absolute_file_path("builder.sce");
//tbx_builder_src(toolbox_dir);
//tbx_builder_gateway(toolbox_dir);
tbx_builder_macros(toolbox_dir);
tbx_builder_help(toolbox_dir);
tbx_build_loader(TOOLBOX_NAME, toolbox_dir);
tbx_build_cleaner(TOOLBOX_NAME, toolbox_dir);
endfunction
distfun_buildToolbox();
clear distfun_buildToolbox;
license.txt
1
2
3
4
5
Hypt Scilab Toolbox
This toolbox is released under the terms of the CeCILL_V2 license :
http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt

Archive Download the corresponding diff file

Revision: 1