flat_package.Rmd
- Authorship and license
- Data simulation
- Simulate a single trajectory
- Simulate trajectoris from two samples diverging by a delta
- Graphical representation of simulated spatial clustered data
- Statistics
- MO median statistic
- MED median statistic
- WMW statistic
- HKR statistics
- CFF statistic
- Permutation-based computation of p-values
- References
title: "Functional statistical testing: funStatTest package"
output: html_document
editor_options:
chunk_output_type: console
library(testthat)
library(checkmate)
library(vdiffr)
# Load already included functions if relevant
pkgload::load_all(export_all = FALSE)
Authorship and license
This package was developped by:
#' List `funStatTest` package contributors
#'
#' @keywords internal
#' @filename authorship.R
#' @rdname authorship
#'
#' @author
#' Zaineb Smida
#' Ghislain DURIF
authorship <- function() {}
Data simulation
Here are some functions used to simulate clustered trajectories of functional data associated to a spatial location.
Simulate a single trajectory
The functional data simulation process is described in [1, section 3.1].
#' Single trajectory simulation
#'
#' @filename simulation.R
#' @rdname simulation
#'
#' @description
#' Simulate a trajectory of length `n_point` using a random generator
#' associated to different probability distribution.
#'
#' @param n_point integer value, number of points in the trajectory.
#' @param distrib character string, type of probability distribution used to
#' simulate the data among `"normal"`, `"cauchy"`, `"dexp"`, `"student"`.
#' @param max_iter integer, maximum number of iteration for the iterative
#' simulation process.
#'
#' @return
#' Vector of size `n_point` with the trajectory values.
#'
#' @importFrom stats rnorm rcauchy rt
#' @importFrom distr r DExp
#'
#' @inherit authorship author
#'
#' @references Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022)
#' A median test for functional data, Journal of Nonparametric Statistics,
#' 34:2, 520-553,
#' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997)
#' [<hal-03658578>](https://hal.science/hal-03658578)
#'
#' @export
#'
#' @seealso [funStatTest::simul_data()]
#'
#' @examples
simul_traj <- function(n_point, distrib = "normal", max_iter = 10000) {
assert_count(n_point, positive = TRUE)
assert_choice(distrib, c("normal", "cauchy", "dexp", "student"))
assert_count(max_iter, positive = TRUE)
# init
vect <- (0:(n_point-1))/(n_point-1)
vecY <- numeric(n_point)
n_iter <- 0
# iter
for(k in 1:max_iter) {
sigma <- 1/((k-0.5)*pi)
y <- switch(
distrib,
normal = rnorm(1, mean = 0, sd = sigma),
cauchy = sigma * rcauchy(1, location = 0, scale = 1),
dexp = sigma * r(DExp(rate=1))(1),
student = sigma*rt(1, df = 5)
)
vecyphi <- sqrt(2)*sin(vect/sigma)
# store previous value of vecY
exvecY <- vecY
# update
vecY <- vecY + y * vecyphi
# stopping criterion (depending on the processus convergence)
if(sum((vecY-exvecY)^2)/sum((vecY)^2) < 0.001) break
# record current iteration
n_iter <- k
}
# check convergence
if(n_iter == max_iter) warning("Simulation process did not converge.")
# output
return(vecY)
}
test_that("simul_traj", {
# test ok
simu_vec <- simul_traj(100)
expect_equal(length(simu_vec), 100)
checkmate::qexpect(simu_vec, "N100")
# warning
n_point = 10
distrib = "normal"
max_iter = 2
expect_warning(simul_traj(n_point, distrib, max_iter))
})
simu_vec <- simul_traj(100)
plot(simu_vec, xlab = "point", ylab = "value")
Simulate trajectoris from two samples diverging by a delta
#' Simulation of trajectories from two samples diverging by a delta function
#'
#' @description
#' Simulate `n_obs1` trajectories of length `n_point` in the first sample and
#' `n_obs2` trajectories of length `n_point` in the second sample.
#'
#' @filename simulation.R
#' @rdname simulation
#'
#' @importFrom tibble lst
#'
#' @inheritParams funStatTest::simul_traj
#' @param n_obs1 integer value, number of trajectories in the first sample.
#' @param n_obs2 integer value, number of trajectories in the second sample.
#' @param c_val numeric value, level of divergence between the two samples.
#' @param delta_shape character string, shape of the divergence between the
#' two samples, among `"constant"`, `"linear"`, `"quadratic"`.
#'
#' @return A list with the following elements
#' - `mat_sample1`: numeric matrix of size `n_point x n_obs1` with the
#' trajectory values in columns in sample 1.
#' - `mat_sample2`: numeric matrix of size `n_point x n_obs2` with the
#' trajectory values in columns in sample 2.
#'
#' @seealso [funStatTest::plot_simu()], [funStatTest::simulate_traj()]
#'
#' @inherit authorship author
#'
#' @references Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022)
#' A median test for functional data, Journal of Nonparametric Statistics,
#' 34:2, 520-553,
#' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997)
#' [<hal-03658578>](https://hal.science/hal-03658578)
#'
#' @export
#'
#' @examples
simul_data <- function(
n_point, n_obs1, n_obs2, c_val = 0, delta_shape = "constant",
distrib = "normal", max_iter = 10000) {
assert_count(n_point, positive = TRUE)
assert_count(n_obs1, positive = TRUE)
assert_count(n_obs2, positive = TRUE)
assert_numeric(c_val)
assert_choice(delta_shape, c("constant", "linear", "quadratic"))
# init
t_vec <- (0:(n_point-1))/(n_point-1)
# delta values
delta <- switch (
delta_shape,
constant = c_val,
linear = c_val * t_vec,
quadratic = c_val * t_vec * (1 - t_vec)
)
# MatX
mat_sample1 <- matrix(0, n_point, n_obs1)
for(i in 1:n_obs1)
mat_sample1[,i] <- simul_traj(n_point, distrib, max_iter) + delta
# MatY
mat_sample2 <- matrix(0, n_point, n_obs2)
for(j in 1:n_obs2)
mat_sample2[,j] <- simul_traj(n_point, distrib, max_iter)
# output
return(lst(mat_sample1, mat_sample2, c_val, delta_shape))
}
test_that("simul_data", {
n_point <- 100
n_obs1 <- 50
n_obs2 <- 75
c_val = 0
delta_shape = "constant"
distrib = "normal"
max_iter = 10000
simu_data <- simul_data(n_point, n_obs1, n_obs2, c_val, delta_shape,
distrib, max_iter)
expect_equal(
names(simu_data),
c("mat_sample1", "mat_sample2", "c_val", "delta_shape")
)
expect_true(is.numeric(simu_data$mat_sample1))
expect_equal(dim(simu_data$mat_sample1), c(n_point, n_obs1))
expect_true(is.numeric(simu_data$mat_sample2))
expect_equal(dim(simu_data$mat_sample2), c(n_point, n_obs2))
})
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10,
delta_shape = "constant", distrib = "normal"
)
str(simu_data)
Graphical representation of simulated spatial clustered data
#' Graphical representation of simulated data
#'
#' @filename simulation.R
#' @rdname simulation
#'
#' @importFrom dplyr mutate row_number bind_rows n
#' @importFrom ggplot2 ggplot aes geom_point geom_line
#' theme_bw scale_colour_brewer
#' @importFrom stats setNames
#' @importFrom stringr str_c str_extract
#' @importFrom tibble as_tibble
#' @importFrom tidyr pivot_longer
#'
#' @param simu list, output of [funStatTest::simul_data()]
#'
#' @return the ggplot2 graph of simulated tajectories.
#'
#' @seealso [funStatTest::simul_data()]
#'
#' @inherit authorship author
#'
#' @export
#'
#' @examples
plot_simu <- function(simu) {
# function to prepare simulated data for plotting
prepare_data <- function(mat, sample_id) {
qassert(sample_id, "X1[1,2]")
mat_sample <- c("mat_sample1", "mat_sample2")[sample_id]
return(
as_tibble(
simu[[mat_sample]],
.name_repair = "minimal") %>%
setNames(str_c("traj", 1:length(names(.)))) %>%
mutate(sample = sample_id, point = 1L:n()) %>%
pivot_longer(
cols = starts_with("traj"),
names_to = "trajectory", values_to = "value") %>%
mutate(
sample = factor(sample),
trajectory = as.integer(str_extract(trajectory, "[0-9]+"))
)
)
}
data2plot <- bind_rows(
prepare_data(simu$mat_sample1, sample_id = 1),
prepare_data(simu$mat_sample2, sample_id = 2)
)
graph <- ggplot(
data2plot, aes(x=point, y=value, col=sample,
group=interaction(sample, trajectory))) +
geom_line(alpha = 0.5) +
scale_colour_brewer(palette="Dark2") +
theme_bw()
return(graph)
}
test_that("plot_simu", {
set.seed(1234)
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10,
delta_shape = "constant", distrib = "normal"
)
vdiffr::expect_doppelganger(
"plot_simu", plot_simu(simu_data))
})
# constant delta
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 5,
delta_shape = "constant", distrib = "normal"
)
plot_simu(simu_data)
# linear delta
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 5,
delta_shape = "linear", distrib = "normal"
)
plot_simu(simu_data)
# quadratic delta
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 5,
delta_shape = "quadratic", distrib = "normal"
)
plot_simu(simu_data)
Statistics
MO median statistic
The MO median statistic [1] is implemented in the stat_mo()
function.
#' MO median statistic
#'
#' @filename statistics.R
#'
#' @description The MO median statistics defined in Smida et al (2022) is
#' computed to compare two sets of functional trajectories.
#'
#' @param MatX numeric matrix of dimension `n_point x n` containing `n`
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m`
#' trajectories (in columns) of size `n_point` (in rows).
#'
#' @return numeric value corresponding to the MO median statistic value
#'
#' @inherit authorship author
#'
#' @references Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022)
#' A median test for functional data, Journal of Nonparametric Statistics,
#' 34:2, 520-553,
#' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997)
#' [<hal-03658578>](https://hal.science/hal-03658578)
#'
#' @seealso [funStatTest::permut_pval()]
#'
#' @export
#'
#' @examples
stat_mo <- function(MatX, MatY) {
n <- ncol(MatY)
m <- ncol(MatX)
n_point <- nrow(MatX)
numM <- numeric(n_point)
Med <- numeric(n_point)
v <- numeric(n_point)
for(i in 1:n){
num1 <- numeric(n_point)
num2 <- numeric(n_point)
for(k in 1:n){
if(k!=i){
v <- MatY[,i]-MatY[,k]
norm_v <- norm2(v)
if(norm_v != 0) {
num1 <- num1+v/norm_v
}
}
for(j in 1:m){
v <- MatY[,i]-MatX[,j]
norm_v <- norm2(v)
if(norm_v != 0) {
num2 <- num2+v/norm_v
}
}
}
numM <- num1+num2
denomM <- norm2(numM)
if(denomM != 0) {
Med <- Med+(numM/denomM)
}
}
Med <- Med/n
return(norm2(Med))
}
test_that("stat_mo", {
MatX <- matrix(rnorm(100), nrow = 25, ncol = 4)
MatY <- matrix(rnorm(100), nrow = 25, ncol = 4) + 10
checkmate::qexpect(stat_mo(MatX, MatY), "N1[0,)")
expect_true(stat_mo(MatX, MatX) < stat_mo(MatX, MatY))
})
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10,
delta_shape = "constant", distrib = "normal"
)
MatX <- simu_data$mat_sample1
MatY <- simu_data$mat_sample2
stat_mo(MatX, MatY)
MED median statistic
The MED median statistic [1] is implemented in the stat_med()
function.
#' MED median statistic
#'
#' @filename statistics.R
#'
#' @description The MED median statistics defined in Smida et al (2022) is
#' computed to compare two sets of functional trajectories.
#'
#' @param MatX numeric matrix of dimension `n_point x n` containing `n`
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m`
#' trajectories (in columns) of size `n_point` (in rows).
#'
#' @return numeric value corresponding to the MED median statistic value
#'
#' @inherit authorship author
#'
#' @references Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022)
#' A median test for functional data, Journal of Nonparametric Statistics,
#' 34:2, 520-553,
#' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997)
#' [<hal-03658578>](https://hal.science/hal-03658578)
#'
#' @seealso [funStatTest::permut_pval()]
#'
#' @export
#'
#' @examples
stat_med <- function(MatX, MatY) {
n <- ncol(MatY)
m <- ncol(MatX)
n_point <- nrow(MatX)
numM <- numeric(n_point)
Med <- numeric(n_point)
v <- numeric(n_point)
for(i in 1:n){
num1 <- numeric(n_point)
num2 <- numeric(n_point)
for(k in 1:n){
if(k!=i){
v <- MatY[,i]-MatY[,k]
norm_v <- norm2(v)
if(norm_v != 0) {
num1 <- num1+v/norm_v
}
}
for(j in 1:m){
v <- MatY[,i]-MatX[,j]
norm_v <- norm2(v)
if(norm_v != 0) {
num2 <- num2+v/norm_v
}
}
}
numM <- num1+num2
denomM <- norm2(numM)
if(denomM != 0) {
Med <- Med+(numM/denomM)
}
}
Med <- Med/n
return(norm2(Med))
}
test_that("stat_med", {
MatX <- matrix(rnorm(100), nrow = 25, ncol = 4)
MatY <- matrix(rnorm(100), nrow = 25, ncol = 4) + 10
checkmate::qexpect(stat_med(MatX, MatY), "N1[0,)")
expect_true(stat_med(MatX, MatX) < stat_med(MatX, MatY))
})
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10,
delta_shape = "constant", distrib = "normal"
)
MatX <- simu_data$mat_sample1
MatY <- simu_data$mat_sample2
stat_med(MatX, MatY)
WMW statistic
The Wilcoxon-Mann-Whitney statistic [2] (noted WMW in [1]) is implemented in the stat_wmw()
function.
#' Wilcoxon-Mann-Whitney (WMW) statistic
#'
#' @filename statistics.R
#'
#' @description The Wilcoxon-Mann-Whitney statistic defined in
#' Chakraborty & Chaudhuri (2015) (and noted WMW in Smida et al 2022) is
#' computed to compare two sets of functional trajectories.
#'
#' @param MatX numeric matrix of dimension `n_point x n` containing `n`
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m`
#' trajectories (in columns) of size `n_point` (in rows).
#'
#' @return numeric value corresponding to the WMW statistic value
#'
#' @inherit authorship author
#'
#' @references
#' Anirvan Chakraborty, Probal Chaudhuri,
#' A Wilcoxon–Mann–Whitney-type test for infinite-dimensional data,
#' Biometrika, Volume 102, Issue 1, March 2015, Pages 239–246,
#' [<DOI:10.1093/biomet/asu072>](https://doi.org/10.1093/biomet/asu072)
#'
#' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022)
#' A median test for functional data, Journal of Nonparametric Statistics,
#' 34:2, 520-553,
#' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997)
#' [<hal-03658578>](https://hal.science/hal-03658578)
#'
#' @seealso [funStatTest::permut_pval()]
#'
#' @export
#'
#' @examples
stat_wmw <- function(MatX, MatY) {
n <- ncol(MatY)
m <- ncol(MatX)
npoints <- nrow(MatX)
vecWMW <- rep(0,npoints)
for (i in 1:m) {
for(j in 1:n){
diff <- MatY[,j]-MatX[,i]
norm_diff <- norm2(diff)
if(norm_diff > 0) {
vecWMW <- vecWMW + diff/norm_diff
}
}
}
vecWMW <- vecWMW/(n*m)
return(norm2(vecWMW))
}
test_that("stat_wmw", {
MatX <- matrix(rnorm(100), nrow = 25, ncol = 4)
MatY <- matrix(rnorm(100), nrow = 25, ncol = 4) + 10
checkmate::qexpect(stat_wmw(MatX, MatY), "N1[0,)")
expect_true(stat_wmw(MatX, MatX) < stat_wmw(MatX, MatY))
})
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10,
delta_shape = "constant", distrib = "normal"
)
MatX <- simu_data$mat_sample1
MatY <- simu_data$mat_sample2
stat_wmw(MatX, MatY)
HKR statistics
The Horváth-Kokoszka-Reeder statistics [3] (noted HKR1 and HKR2 in [1]) are implemented in the stat_hkr()
function.
#' Horváth-Kokoszka-Reeder statistics
#'
#' @filename statistics.R
#'
#' @description The Horváth-Kokoszka-Reeder statistics defined in
#' Chakraborty & Chaudhuri (2015) (and noted HKR1 and HKR2 in Smida et al 2022)
#' are computed to compare two sets of functional trajectories.
#'
#' @param MatX numeric matrix of dimension `n_point x n` containing `n`
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m`
#' trajectories (in columns) of size `n_point` (in rows).
#'
#' @return A list with the following elements
#' - `T1`: numeric value corresponding to the HKR1 statistic value
#' - `T2`: numeric value corresponding to the HKR2 statistic value
#' - `eigenval`: numeric vector of eigen values from the empirical
#' pooled covariance matrix of `MatX` and `MatY` (see Smida et al, 2022, for
#' more details)
#'
#' @importFrom Matrix rankMatrix
#' @importFrom tibble lst
#'
#' @inherit authorship author
#'
#' @references
#' Horváth, L., Kokoszka, P., & Reeder, R. (2013). Estimation of the mean of
#' functional time series and a two-sample problem. Journal of the Royal
#' Statistical Society. Series B (Statistical Methodology), 75(1), 103–122.
#' [<JSTOR-23361016>](http://www.jstor.org/stable/23361016)
#'
#' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022)
#' A median test for functional data, Journal of Nonparametric Statistics,
#' 34:2, 520-553,
#' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997)
#' [<hal-03658578>](https://hal.science/hal-03658578)
#'
#' @seealso [funStatTest::permut_pval()]
#'
#' @export
#'
#' @examples
stat_hkr <- function(MatX, MatY) {
X <- MatX
Y <- MatY
M <- ncol(X)
N <- ncol(Y)
npoints <- nrow(X)
X_bar = matrix(0, npoints, 1) #initializing sum
for (index in 1:M) {
X_bar = X_bar+X[,index]
}
X_bar = X_bar/M
X_bar_matrix = matrix(X_bar, npoints, M)
Y_bar = matrix(0, npoints, 1) #initializing sum
for (index in 1:N) {
Y_bar = Y_bar+Y[,index]
}
Y_bar = Y_bar/N
Y_bar_matrix = matrix(Y_bar, npoints, N)
Z = matrix(0, npoints, N+M)
Z[,1:M] = sqrt(N/M)*(X-X_bar_matrix)
Z[,(M+1):(N+M)] = sqrt(M/N)*(Y-Y_bar_matrix)
Z = sqrt((N+M-1)/(N+M))*Z
# pca on Z
tmp <- prcomp(t(Z), center = FALSE, scale. = FALSE)
#proportion de la variance 85% pour obtenir les valeurs propres.
#exp_var <- cumsum(tmp$sdev^2)/sum(tmp$sdev^2)
#k <- head(which(exp_var >= 0.85), 1)
#D <- (1/(N+M-1)) * Z %*% t(Z)
#rk_D <- rankMatrix(D) #package{Matrix}
#rk_D
rk_Z <- rankMatrix(Z)
rk_Z
eigenvec <- as.matrix(tmp$rotation[,1:rk_Z])
eigenval <- tmp$sdev[1:rk_Z]^2
#eigenvec <- as.matrix(tmp$rotation[,1:k])
#eigenval <- tmp$sdev[1:k]^2
XY_bar_mat <- matrix(X_bar - Y_bar, nrow = 1, ncol = npoints, byrow = TRUE)
a <- XY_bar_mat %*% eigenvec
T1 = N*M / (N+M) * sum(a^2/eigenval)
T2 = N*M / (N+M) * sum(a^2)
return(lst(T1, T2, eigenval))
}
test_that("stat_hkr", {
MatX <- matrix(rnorm(100), nrow = 25, ncol = 4)
MatY <- matrix(rnorm(100), nrow = 25, ncol = 4) + 10
res1 <- stat_hkr(MatX, MatY)
expect_equal(names(res1), c("T1", "T2", "eigenval"))
checkmate::qexpect(res1$T1, "N1[0,)")
checkmate::qexpect(res1$T2, "N1[0,)")
checkmate::qexpect(res1$eigenval, "N+[0,)")
res2 <- stat_hkr(MatX, MatX)
expect_true(res2$T1 < res1$T1)
expect_true(res2$T2 < res1$T2)
})
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10,
delta_shape = "constant", distrib = "normal"
)
MatX <- simu_data$mat_sample1
MatY <- simu_data$mat_sample2
stat_hkr(MatX, MatY)
CFF statistic
The Cuevas-Febrero-Fraiman statistic [4] (noted CFF in [1]) ais implemented in the stat_cff()
function.
#' Cuevas-Febrero-Fraiman statistic
#'
#' @filename statistics.R
#'
#' @description The Horváth-Kokoszka-Reeder statistics defined in
#' Chakraborty & Chaudhuri (2015) (and noted HKR1 and HKR2 in Smida et al 2022)
#' are computed to compare two sets of functional trajectories.
#'
#' @param MatX numeric matrix of dimension `n_point x n` containing `n`
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m`
#' trajectories (in columns) of size `n_point` (in rows).
#'
#' @return numeric value corresponding to the WMW statistic value
#'
#' @inherit authorship author
#'
#' @references
#' Horváth, L., Kokoszka, P., & Reeder, R. (2013). Estimation of the mean of
#' functional time series and a two-sample problem. Journal of the Royal
#' Statistical Society. Series B (Statistical Methodology), 75(1), 103–122.
#' [<JSTOR-23361016>](http://www.jstor.org/stable/23361016)
#'
#' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022)
#' A median test for functional data, Journal of Nonparametric Statistics,
#' 34:2, 520-553,
#' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997)
#' [<hal-03658578>](https://hal.science/hal-03658578)
#'
#' @seealso [funStatTest::permut_pval()]
#'
#' @export
#'
#' @examples
stat_cff <- function(MatX, MatY) {
m <- ncol(MatX)
n <- ncol(MatY)
N <- n+m
npoints <- nrow(MatX)
X_bar <- apply(MatX, 1, mean)
Y_bar <- apply(MatY, 1, mean)
stat <- m * (norm2(X_bar - Y_bar)^2)
return(stat)
}
test_that("stat_cff", {
MatX <- matrix(rnorm(100), nrow = 25, ncol = 4)
MatY <- matrix(rnorm(100), nrow = 25, ncol = 4) + 10
checkmate::qexpect(stat_cff(MatX, MatY), "N1[0,)")
expect_true(stat_cff(MatX, MatX) < stat_cff(MatX, MatY))
})
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10,
delta_shape = "constant", distrib = "normal"
)
MatX <- simu_data$mat_sample1
MatY <- simu_data$mat_sample2
stat_cff(MatX, MatY)
Permutation-based computation of p-values
#' Permutation-based computation of p-values
#'
#' @filename pvalues.R
#'
#' @description Computation of the p-values associated to any statistics
#' described in the package with the permutation methods. See Smida et al
#' (2022) for more details.
#'
#' @param MatX numeric matrix of dimension `n_point x n` containing `n`
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m`
#' trajectories (in columns) of size `n_point` (in rows).
#' @param stat character string or vector of character string, name of the
#' statistics for which the p-values will be computed, among `"mo"`, `"med"`,
#' `"wmw"`, `"hkr"`, `"cff"`.
#' @param n_perm interger, number of permutation to compute the p-values.
#'
#' @return numeric value corresponding to the WMW statistic value
#'
#' @inherit authorship author
#'
#' @references
#' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022)
#' A median test for functional data, Journal of Nonparametric Statistics,
#' 34:2, 520-553,
#' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997)
#' [<hal-03658578>](https://hal.science/hal-03658578)
#'
#' @seealso [funStatTest::stat_mo()], [funStatTest::stat_med()],
#' [funStatTest::stat_wmw()], [funStatTest::stat_hkr()],
#' [funStatTest::stat_cff()]
#'
#' @export
#'
#' @examples
permut_pval <- function(MatX, MatY, n_perm = 100, stat = c("mo", "med")) {
assert_count(n_perm, positive = TRUE)
assert_choice(stat, c("mo", "med", "wmw", "hkr", "cff"))
}
test_that("permut_pval", {
expect_true(TRUE)
})
simu_data <- simul_data(
n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10,
delta_shape = "constant", distrib = "normal"
)
MatX <- simu_data$mat_sample1
MatY <- simu_data$mat_sample2
References
[1] Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) A median test for functional data, Journal of Nonparametric Statistics, 34:2, 520-553, DOI:10.1080/10485252.2022.2064997
[2] Anirvan Chakraborty, Probal Chaudhuri, A Wilcoxon–Mann–Whitney-type test for infinite-dimensional data, Biometrika, Volume 102, Issue 1, March 2015, Pages 239–246, DOI:10.1093/biomet/asu072
[3] Horváth, L., Kokoszka, P., & Reeder, R. (2013). Estimation of the mean of functional time series and a two-sample problem. Journal of the Royal Statistical Society. Series B (Statistical Methodology), 75(1), 103–122.
[4] Antonio Cuevas, Manuel Febrero, Ricardo Fraiman, An anova test for functional data, Computational Statistics & Data Analysis, Volume 47, Issue 1, 2004, Pages 111-122, ISSN 0167-9473, DOI:10.1016/j.csda.2003.10.021