Skip to content
Snippets Groups Projects
Select Git revision
  • 88a6a3a0735b43a8d3c6abbcb4cf425d4e60cb21
  • main default protected
2 results

flat_package.Rmd

Blame
  • user avatar
    Ghislain Durif authored
    88a6a3a0
    History
    title: "Functional statistical testing: funStatTest package"
    output: html_document
    editor_options: 
      chunk_output_type: console
    library(testthat)
    library(checkmate)
    library(vdiffr)
    # Load already included functions if relevant
    pkgload::load_all(export_all = FALSE)

    Authorship and license

    This package was developped by:

    #' List `funStatTest` package contributors
    #' 
    #' @keywords internal
    #' @filename authorship.R
    #' @rdname authorship
    #' 
    #' @author
    #' Zaineb Smida
    #' Ghislain DURIF
    authorship <- function() {}

    Data simulation

    Here are some functions used to simulate clustered trajectories of functional data associated to a spatial location.

    Simulate a single trajectory

    The functional data simulation process is described in [1, section 3.1].

    #' Single trajectory simulation
    #' 
    #' @filename simulation.R
    #' @rdname simulation
    #' 
    #' @description
    #' Simulate a trajectory of length `n_point` using a random generator 
    #' associated to different probability distribution.
    #'  
    #' @param n_point integer value, number of points in the trajectory.
    #' @param distrib character string, type of probability distribution used to 
    #' simulate the data among `"normal"`, `"cauchy"`, `"dexp"`, `"student"`.
    #' @param max_iter integer, maximum number of iteration for the iterative 
    #' simulation process.
    #' 
    #' @return
    #' Vector of size `n_point` with the trajectory values.
    #' 
    #' @importFrom stats rnorm rcauchy rt
    #' @importFrom distr r DExp
    #' 
    #' @inherit authorship author
    #' 
    #' @references Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
    #' A median test for functional data, Journal of Nonparametric Statistics, 
    #' 34:2, 520-553,  
    #' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997) 
    #' [<hal-03658578>](https://hal.science/hal-03658578)
    #' 
    #' @export
    #' 
    #' @seealso [funStatTest::simul_data()]
    #' 
    #' @examples
    simul_traj <- function(n_point, distrib = "normal", max_iter = 10000) {
        
        assert_count(n_point, positive = TRUE)
        assert_choice(distrib, c("normal", "cauchy", "dexp", "student"))
        assert_count(max_iter, positive = TRUE)
        
        # init
        vect <- (0:(n_point-1))/(n_point-1)
        vecY <- numeric(n_point)
        n_iter <- 0
        
        # iter
        for(k in 1:max_iter) {
            
            sigma <- 1/((k-0.5)*pi)
            
            y <- switch(
                distrib,
                normal = rnorm(1, mean = 0, sd = sigma),
                cauchy = sigma * rcauchy(1, location = 0, scale = 1),
                dexp = sigma * r(DExp(rate=1))(1),
                student = sigma*rt(1, df = 5)
            )
            
            vecyphi <- sqrt(2)*sin(vect/sigma)
            
            # store previous value of vecY
            exvecY <- vecY
            # update
            vecY <- vecY + y * vecyphi
            
            # stopping criterion (depending on the processus convergence)
            if(sum((vecY-exvecY)^2)/sum((vecY)^2) < 0.001) break
            # record current iteration
            n_iter <- k
        }
        
        # check convergence
        if(n_iter == max_iter) warning("Simulation process did not converge.")
        
        # output
        return(vecY)
    }
    test_that("simul_traj", {
        # test ok
        simu_vec <- simul_traj(100)
        expect_equal(length(simu_vec), 100)
        checkmate::qexpect(simu_vec, "N100")
        
        # warning
        n_point = 10
        distrib = "normal"
        max_iter = 2
        expect_warning(simul_traj(n_point, distrib, max_iter))
    })
    simu_vec <- simul_traj(100)
    plot(simu_vec, xlab = "point", ylab = "value")

    Simulate trajectoris from two samples diverging by a delta

    #' Simulation of trajectories from two samples diverging by a delta function
    #' 
    #' @description
    #' Simulate `n_obs1` trajectories of length `n_point` in the first sample and 
    #' `n_obs2` trajectories of length `n_point` in the second sample.
    #' 
    #' @filename simulation.R
    #' @rdname simulation
    #' 
    #' @importFrom tibble lst
    #' 
    #' @inheritParams funStatTest::simul_traj
    #' @param n_obs1 integer value, number of trajectories in the first sample.
    #' @param n_obs2 integer value, number of trajectories in the second sample.
    #' @param c_val numeric value, level of divergence between the two samples.
    #' @param delta_shape character string, shape of the divergence between the 
    #' two samples, among `"constant"`, `"linear"`, `"quadratic"`.
    #'
    #' @return A list with the following elements
    #' - `mat_sample1`: numeric matrix of size `n_point x n_obs1` with the 
    #' trajectory values in columns in sample 1.
    #' - `mat_sample2`: numeric matrix of size `n_point x n_obs2` with the 
    #' trajectory values in columns in sample 2.
    #' 
    #' @seealso [funStatTest::plot_simu()], [funStatTest::simulate_traj()]
    #' 
    #' @inherit authorship author
    #' 
    #' @references Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
    #' A median test for functional data, Journal of Nonparametric Statistics, 
    #' 34:2, 520-553,  
    #' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997) 
    #' [<hal-03658578>](https://hal.science/hal-03658578)
    #' 
    #' @export
    #'
    #' @examples
    simul_data <- function(
            n_point, n_obs1, n_obs2, c_val = 0, delta_shape = "constant", 
            distrib = "normal", max_iter = 10000) {
        
        assert_count(n_point, positive = TRUE)
        assert_count(n_obs1, positive = TRUE)
        assert_count(n_obs2, positive = TRUE)
        assert_numeric(c_val)
        assert_choice(delta_shape, c("constant", "linear", "quadratic"))
        
        # init
        t_vec <- (0:(n_point-1))/(n_point-1)
        
        # delta values
        delta <- switch (
            delta_shape,
            constant = c_val,
            linear = c_val * t_vec,
            quadratic = c_val * t_vec * (1 - t_vec)
        )
        
        # MatX
        mat_sample1 <- matrix(0, n_point, n_obs1)
        for(i in 1:n_obs1)
            mat_sample1[,i] <- simul_traj(n_point, distrib, max_iter) + delta
        
        # MatY
        mat_sample2 <- matrix(0, n_point, n_obs2)
        for(j in 1:n_obs2)
            mat_sample2[,j] <- simul_traj(n_point, distrib, max_iter)
        
        # output
        return(lst(mat_sample1, mat_sample2, c_val, delta_shape))
    }
    test_that("simul_data", {
        n_point <- 100
        n_obs1 <- 50
        n_obs2 <- 75
        c_val = 0
        delta_shape = "constant" 
        distrib = "normal"
        max_iter = 10000
        simu_data <- simul_data(n_point, n_obs1, n_obs2, c_val, delta_shape, 
            distrib, max_iter)
        expect_equal(
            names(simu_data), 
            c("mat_sample1", "mat_sample2", "c_val", "delta_shape")
        )
        expect_true(is.numeric(simu_data$mat_sample1))
        expect_equal(dim(simu_data$mat_sample1), c(n_point, n_obs1))
        expect_true(is.numeric(simu_data$mat_sample2))
        expect_equal(dim(simu_data$mat_sample2), c(n_point, n_obs2))
    })
    simu_data <- simul_data(
        n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
        delta_shape = "constant", distrib = "normal"
    )
    str(simu_data)

    Graphical representation of simulated spatial clustered data

    #' Graphical representation of simulated data
    #' 
    #' @filename simulation.R
    #' @rdname simulation
    #' 
    #' @importFrom dplyr mutate row_number bind_rows n
    #' @importFrom ggplot2 ggplot aes geom_point geom_line 
    #' theme_bw scale_colour_brewer
    #' @importFrom stats setNames
    #' @importFrom stringr str_c str_extract
    #' @importFrom tibble as_tibble
    #' @importFrom tidyr pivot_longer
    #' 
    #' @param simu list, output of [funStatTest::simul_data()]
    #' 
    #' @return the ggplot2 graph of simulated tajectories.
    #' 
    #' @seealso [funStatTest::simul_data()]
    #' 
    #' @inherit authorship author
    #' 
    #' @export
    #'
    #' @examples
    plot_simu <- function(simu) {
        
        # function to prepare simulated data for plotting
        prepare_data <- function(mat, sample_id) {
            
            qassert(sample_id, "X1[1,2]")
            mat_sample <- c("mat_sample1", "mat_sample2")[sample_id]
            
            return(
                as_tibble(
                    simu[[mat_sample]], 
                    .name_repair = "minimal") %>%
                setNames(str_c("traj", 1:length(names(.)))) %>%
                mutate(sample = sample_id, point = 1L:n()) %>%
                pivot_longer(
                    cols = starts_with("traj"), 
                    names_to = "trajectory", values_to = "value") %>%
                mutate(
                    sample = factor(sample),
                    trajectory = as.integer(str_extract(trajectory, "[0-9]+"))
                )
            )
        }
        
        data2plot <- bind_rows(
            prepare_data(simu$mat_sample1, sample_id = 1),
            prepare_data(simu$mat_sample2, sample_id = 2)
        )
            
            
        
        graph <- ggplot(
            data2plot, aes(x=point, y=value, col=sample, 
                           group=interaction(sample, trajectory))) +
            geom_line(alpha = 0.5) +
            scale_colour_brewer(palette="Dark2") +
            theme_bw()
        
        return(graph)
    }
    test_that("plot_simu", {
        set.seed(1234)
        simu_data <- simul_data(
            n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
            delta_shape = "constant", distrib = "normal"
        )
        vdiffr::expect_doppelganger(
            "plot_simu", plot_simu(simu_data))
    })
    # constant delta
    simu_data <- simul_data(
        n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 5, 
        delta_shape = "constant", distrib = "normal"
    )
    plot_simu(simu_data)
    # linear delta
    simu_data <- simul_data(
        n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 5, 
        delta_shape = "linear", distrib = "normal"
    )
    plot_simu(simu_data)
    # quadratic delta
    simu_data <- simul_data(
        n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 5, 
        delta_shape = "quadratic", distrib = "normal"
    )
    plot_simu(simu_data)

    Statistics

    MO median statistic

    The MO median statistic [1] is implemented in the stat_mo() function.

    #' MO median statistic
    #' 
    #' @filename statistics.R
    #' 
    #' @description The MO median statistics defined in Smida et al (2022) is 
    #' computed to compare two sets of functional trajectories.
    #' 
    #' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' 
    #' @return numeric value corresponding to the MO median statistic value
    #' 
    #' @inherit authorship author
    #' 
    #' @references Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
    #' A median test for functional data, Journal of Nonparametric Statistics, 
    #' 34:2, 520-553,  
    #' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997) 
    #' [<hal-03658578>](https://hal.science/hal-03658578)
    #' 
    #' @seealso [funStatTest::permut_pval()]
    #' 
    #' @export
    #' 
    #' @examples
    stat_mo <- function(MatX, MatY) {
        n <- ncol(MatY)
        m <- ncol(MatX)
        n_point <- nrow(MatX)
        numM <- numeric(n_point)
        Med <- numeric(n_point)
        v <- numeric(n_point)
        
        for(i in 1:n){
            num1 <- numeric(n_point)
            num2 <- numeric(n_point)
            for(k in 1:n){
                if(k!=i){
                    v <- MatY[,i]-MatY[,k]
                    norm_v <- norm2(v)
                    if(norm_v != 0) {
                        num1 <- num1+v/norm_v
                    }
                }
                
                for(j in 1:m){
                    v <- MatY[,i]-MatX[,j]
                    norm_v <- norm2(v)
                    if(norm_v != 0) {
                        num2 <- num2+v/norm_v
                    }
                }
            }
            numM <- num1+num2
            denomM <- norm2(numM)
            if(denomM != 0) {
                Med <- Med+(numM/denomM)
            }
        }
        
        Med <- Med/n
        return(norm2(Med))
    }
    test_that("stat_mo", {
        MatX <- matrix(rnorm(100), nrow = 25, ncol = 4)
        MatY <- matrix(rnorm(100), nrow = 25, ncol = 4) + 10
        
        checkmate::qexpect(stat_mo(MatX, MatY), "N1[0,)")
        
        expect_true(stat_mo(MatX, MatX) < stat_mo(MatX, MatY))
    })
    simu_data <- simul_data(
        n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
        delta_shape = "constant", distrib = "normal"
    )
    
    MatX <- simu_data$mat_sample1
    MatY <- simu_data$mat_sample2
    
    stat_mo(MatX, MatY)

    MED median statistic

    The MED median statistic [1] is implemented in the stat_med() function.

    #' MED median statistic
    #' 
    #' @filename statistics.R
    #' 
    #' @description The MED median statistics defined in Smida et al (2022) is 
    #' computed to compare two sets of functional trajectories.
    #' 
    #' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' 
    #' @return numeric value corresponding to the MED median statistic value
    #' 
    #' @inherit authorship author
    #' 
    #' @references Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
    #' A median test for functional data, Journal of Nonparametric Statistics, 
    #' 34:2, 520-553,  
    #' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997) 
    #' [<hal-03658578>](https://hal.science/hal-03658578)
    #' 
    #' @seealso [funStatTest::permut_pval()]
    #' 
    #' @export
    #' 
    #' @examples
    stat_med <- function(MatX, MatY) {
        n <- ncol(MatY)
        m <- ncol(MatX)
        n_point <- nrow(MatX)
        numM <- numeric(n_point)
        Med <- numeric(n_point)
        v <- numeric(n_point)
        
        for(i in 1:n){
            num1 <- numeric(n_point)
            num2 <- numeric(n_point)
            for(k in 1:n){
                if(k!=i){
                    v <- MatY[,i]-MatY[,k]
                    norm_v <- norm2(v)
                    if(norm_v != 0) {
                        num1 <- num1+v/norm_v
                    }
                }
                
                for(j in 1:m){
                    v <- MatY[,i]-MatX[,j]
                    norm_v <- norm2(v)
                    if(norm_v != 0) {
                        num2 <- num2+v/norm_v
                    }
                }
            }
            numM <- num1+num2  
            denomM <- norm2(numM)
            if(denomM != 0) {
                Med <- Med+(numM/denomM)
            }
        }
        
        Med <- Med/n
        return(norm2(Med))
    }
    test_that("stat_med", {
        MatX <- matrix(rnorm(100), nrow = 25, ncol = 4)
        MatY <- matrix(rnorm(100), nrow = 25, ncol = 4) + 10
        
        checkmate::qexpect(stat_med(MatX, MatY), "N1[0,)")
        
        expect_true(stat_med(MatX, MatX) < stat_med(MatX, MatY))
    })
    simu_data <- simul_data(
        n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
        delta_shape = "constant", distrib = "normal"
    )
    
    MatX <- simu_data$mat_sample1
    MatY <- simu_data$mat_sample2
    
    stat_med(MatX, MatY)

    WMW statistic

    The Wilcoxon-Mann-Whitney statistic [2] (noted WMW in [1]) is implemented in the stat_wmw() function.

    #' Wilcoxon-Mann-Whitney (WMW) statistic
    #' 
    #' @filename statistics.R
    #' 
    #' @description The Wilcoxon-Mann-Whitney statistic defined in 
    #' Chakraborty & Chaudhuri (2015) (and noted WMW in Smida et al 2022) is 
    #' computed to compare two sets of functional trajectories.
    #' 
    #' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' 
    #' @return numeric value corresponding to the WMW statistic value
    #' 
    #' @inherit authorship author
    #' 
    #' @references
    #' Anirvan Chakraborty, Probal Chaudhuri, 
    #' A Wilcoxon–Mann–Whitney-type test for infinite-dimensional data, 
    #' Biometrika, Volume 102, Issue 1, March 2015, Pages 239–246,
    #' [<DOI:10.1093/biomet/asu072>](https://doi.org/10.1093/biomet/asu072)
    #' 
    #' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
    #' A median test for functional data, Journal of Nonparametric Statistics, 
    #' 34:2, 520-553,  
    #' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997) 
    #' [<hal-03658578>](https://hal.science/hal-03658578)
    #' 
    #' @seealso [funStatTest::permut_pval()]
    #' 
    #' @export
    #' 
    #' @examples
    stat_wmw <- function(MatX, MatY) {
        n <- ncol(MatY)
        m <- ncol(MatX)
        npoints <- nrow(MatX)
        vecWMW <- rep(0,npoints)
        for (i in 1:m) {
            for(j in 1:n){
                diff <- MatY[,j]-MatX[,i]
                norm_diff <- norm2(diff)
                if(norm_diff > 0) {
                    vecWMW <- vecWMW + diff/norm_diff
                }
            }
        }
    
        vecWMW <- vecWMW/(n*m)
        return(norm2(vecWMW))
    }
    test_that("stat_wmw", {
        MatX <- matrix(rnorm(100), nrow = 25, ncol = 4)
        MatY <- matrix(rnorm(100), nrow = 25, ncol = 4) + 10
        
        checkmate::qexpect(stat_wmw(MatX, MatY), "N1[0,)")
        
        expect_true(stat_wmw(MatX, MatX) < stat_wmw(MatX, MatY))
    })
    simu_data <- simul_data(
        n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
        delta_shape = "constant", distrib = "normal"
    )
    
    MatX <- simu_data$mat_sample1
    MatY <- simu_data$mat_sample2
    
    stat_wmw(MatX, MatY)

    HKR statistics

    The Horváth-Kokoszka-Reeder statistics [3] (noted HKR1 and HKR2 in [1]) are implemented in the stat_hkr() function.

    #' Horváth-Kokoszka-Reeder statistics
    #' 
    #' @filename statistics.R
    #' 
    #' @description The Horváth-Kokoszka-Reeder statistics defined in 
    #' Chakraborty & Chaudhuri (2015) (and noted HKR1 and HKR2 in Smida et al 2022) 
    #' are computed to compare two sets of functional trajectories.
    #' 
    #' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' 
    #' @return A list with the following elements
    #' - `T1`: numeric value corresponding to the HKR1 statistic value
    #' - `T2`: numeric value corresponding to the HKR2 statistic value
    #' - `eigenval`: numeric vector of eigen values from the empirical
    #' pooled covariance matrix of `MatX` and `MatY` (see Smida et al, 2022, for
    #' more details)
    #' 
    #' @importFrom Matrix rankMatrix
    #' @importFrom tibble lst
    #' 
    #' @inherit authorship author
    #' 
    #' @references
    #' Horváth, L., Kokoszka, P., & Reeder, R. (2013). Estimation of the mean of 
    #' functional time series and a two-sample problem. Journal of the Royal 
    #' Statistical Society. Series B (Statistical Methodology), 75(1), 103–122. 
    #' [<JSTOR-23361016>](http://www.jstor.org/stable/23361016)
    #' 
    #' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
    #' A median test for functional data, Journal of Nonparametric Statistics, 
    #' 34:2, 520-553,  
    #' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997) 
    #' [<hal-03658578>](https://hal.science/hal-03658578)
    #' 
    #' @seealso [funStatTest::permut_pval()]
    #' 
    #' @export
    #' 
    #' @examples
    stat_hkr <- function(MatX, MatY) {
        X <- MatX
        Y <- MatY
    
        M <- ncol(X)
        N <- ncol(Y)
    
        npoints <- nrow(X)
        X_bar = matrix(0, npoints, 1)   #initializing sum
        for (index in 1:M) {
            X_bar = X_bar+X[,index]
        }
        X_bar = X_bar/M
        X_bar_matrix = matrix(X_bar, npoints, M)
    
        Y_bar = matrix(0, npoints, 1)   #initializing sum
        for (index in 1:N) {
            Y_bar = Y_bar+Y[,index]
        }
        Y_bar = Y_bar/N
        Y_bar_matrix = matrix(Y_bar, npoints, N)
    
        Z = matrix(0, npoints, N+M)
        Z[,1:M] = sqrt(N/M)*(X-X_bar_matrix)
        Z[,(M+1):(N+M)] = sqrt(M/N)*(Y-Y_bar_matrix)
        Z = sqrt((N+M-1)/(N+M))*Z
    
        # pca on Z
        tmp <- prcomp(t(Z), center = FALSE, scale. = FALSE)
    
        #proportion de la variance 85% pour obtenir les valeurs propres.
        #exp_var <- cumsum(tmp$sdev^2)/sum(tmp$sdev^2)
        #k <- head(which(exp_var >= 0.85), 1)
    
        #D <- (1/(N+M-1)) * Z %*% t(Z)
        #rk_D <- rankMatrix(D) #package{Matrix}
        #rk_D
        rk_Z <- rankMatrix(Z)
        rk_Z
    
        eigenvec <- as.matrix(tmp$rotation[,1:rk_Z])
        eigenval <- tmp$sdev[1:rk_Z]^2
        #eigenvec <- as.matrix(tmp$rotation[,1:k])
        #eigenval <- tmp$sdev[1:k]^2
    
        XY_bar_mat <- matrix(X_bar - Y_bar, nrow = 1, ncol = npoints, byrow = TRUE)
        a <- XY_bar_mat %*% eigenvec
    
        T1 = N*M / (N+M) * sum(a^2/eigenval)
        T2 = N*M / (N+M) * sum(a^2)
    
        return(lst(T1, T2, eigenval))
    }
    test_that("stat_hkr", {
        MatX <- matrix(rnorm(100), nrow = 25, ncol = 4)
        MatY <- matrix(rnorm(100), nrow = 25, ncol = 4) + 10
        
        res1 <- stat_hkr(MatX, MatY)
        
        expect_equal(names(res1), c("T1", "T2", "eigenval"))
        checkmate::qexpect(res1$T1, "N1[0,)")
        checkmate::qexpect(res1$T2, "N1[0,)")
        checkmate::qexpect(res1$eigenval, "N+[0,)")
        
        res2 <- stat_hkr(MatX, MatX)
        
        expect_true(res2$T1 < res1$T1)
        expect_true(res2$T2 < res1$T2)
    })
    simu_data <- simul_data(
        n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
        delta_shape = "constant", distrib = "normal"
    )
    
    MatX <- simu_data$mat_sample1
    MatY <- simu_data$mat_sample2
    
    stat_hkr(MatX, MatY)

    CFF statistic

    The Cuevas-Febrero-Fraiman statistic [4] (noted CFF in [1]) ais implemented in the stat_cff() function.

    #' Cuevas-Febrero-Fraiman statistic
    #' 
    #' @filename statistics.R
    #' 
    #' @description The Horváth-Kokoszka-Reeder statistics defined in 
    #' Chakraborty & Chaudhuri (2015) (and noted HKR1 and HKR2 in Smida et al 2022) 
    #' are computed to compare two sets of functional trajectories.
    #' 
    #' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' 
    #' @return numeric value corresponding to the WMW statistic value
    #' 
    #' @inherit authorship author
    #' 
    #' @references
    #' Horváth, L., Kokoszka, P., & Reeder, R. (2013). Estimation of the mean of 
    #' functional time series and a two-sample problem. Journal of the Royal 
    #' Statistical Society. Series B (Statistical Methodology), 75(1), 103–122. 
    #' [<JSTOR-23361016>](http://www.jstor.org/stable/23361016)
    #' 
    #' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
    #' A median test for functional data, Journal of Nonparametric Statistics, 
    #' 34:2, 520-553,  
    #' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997) 
    #' [<hal-03658578>](https://hal.science/hal-03658578)
    #' 
    #' @seealso [funStatTest::permut_pval()]
    #' 
    #' @export
    #' 
    #' @examples
    stat_cff <- function(MatX, MatY) {
        m <- ncol(MatX)
        n <- ncol(MatY)
        N <- n+m
        npoints <- nrow(MatX)
        X_bar <- apply(MatX, 1, mean)
        Y_bar <- apply(MatY, 1, mean)
        stat <- m * (norm2(X_bar - Y_bar)^2)
        return(stat)
    }
    test_that("stat_cff", {
        MatX <- matrix(rnorm(100), nrow = 25, ncol = 4)
        MatY <- matrix(rnorm(100), nrow = 25, ncol = 4) + 10
        
        checkmate::qexpect(stat_cff(MatX, MatY), "N1[0,)")
        
        expect_true(stat_cff(MatX, MatX) < stat_cff(MatX, MatY))
    })
    simu_data <- simul_data(
        n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
        delta_shape = "constant", distrib = "normal"
    )
    
    MatX <- simu_data$mat_sample1
    MatY <- simu_data$mat_sample2
    
    stat_cff(MatX, MatY)

    Permutation-based computation of p-values

    #' Permutation-based computation of p-values
    #' 
    #' @filename pvalues.R
    #' 
    #' @description Computation of the p-values associated to any statistics
    #' described in the package with the permutation methods. See Smida et al 
    #' (2022) for more details.
    #' 
    #' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
    #' trajectories (in columns) of size `n_point` (in rows).
    #' @param stat character string or vector of character string, name of the 
    #' statistics for which the p-values will be computed, among `"mo"`, `"med"`,
    #' `"wmw"`, `"hkr"`, `"cff"`.
    #' @param n_perm interger, number of permutation to compute the p-values.
    #' 
    #' @return numeric value corresponding to the WMW statistic value
    #' 
    #' @inherit authorship author
    #' 
    #' @references
    #' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
    #' A median test for functional data, Journal of Nonparametric Statistics, 
    #' 34:2, 520-553,  
    #' [<DOI:10.1080/10485252.2022.2064997>](https://dx.doi.org/10.1080/10485252.2022.2064997) 
    #' [<hal-03658578>](https://hal.science/hal-03658578)
    #' 
    #' @seealso [funStatTest::stat_mo()], [funStatTest::stat_med()], 
    #' [funStatTest::stat_wmw()], [funStatTest::stat_hkr()], 
    #' [funStatTest::stat_cff()]
    #' 
    #' @export
    #' 
    #' @examples
    permut_pval <- function(MatX, MatY, n_perm = 100, stat = c("mo", "med")) {
        assert_count(n_perm, positive = TRUE)
        assert_choice(stat, c("mo", "med", "wmw", "hkr", "cff"))
    }
    test_that("permut_pval", {
        expect_true(TRUE)
    })
    simu_data <- simul_data(
        n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
        delta_shape = "constant", distrib = "normal"
    )
    
    MatX <- simu_data$mat_sample1
    MatY <- simu_data$mat_sample2

    References

    [1] Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) A median test for functional data, Journal of Nonparametric Statistics, 34:2, 520-553, DOI:10.1080/10485252.2022.2064997

    [2] Anirvan Chakraborty, Probal Chaudhuri, A Wilcoxon–Mann–Whitney-type test for infinite-dimensional data, Biometrika, Volume 102, Issue 1, March 2015, Pages 239–246, DOI:10.1093/biomet/asu072

    [3] Horváth, L., Kokoszka, P., & Reeder, R. (2013). Estimation of the mean of functional time series and a two-sample problem. Journal of the Royal Statistical Society. Series B (Statistical Methodology), 75(1), 103–122.

    [4] Antonio Cuevas, Manuel Febrero, Ricardo Fraiman, An anova test for functional data, Computational Statistics & Data Analysis, Volume 47, Issue 1, 2004, Pages 111-122, ISSN 0167-9473, DOI:10.1016/j.csda.2003.10.021