.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/plot_knockoff_aggregation.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_plot_knockoff_aggregation.py>`
        to download the full example code.

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_plot_knockoff_aggregation.py:


Knockoff aggregation on simulated data
======================================

In this example, we show an example of variable selection using
model-X Knockoffs introduced by :footcite:t:`Candes_2018`. A notable
drawback of this procedure is the randomness associated with
the knockoff generation process. This can result in unstable
inference.

This example exhibits the two aggregation procedures described
by :footcite:t:`pmlr-v119-nguyen20a` and :footcite:t:`Ren_2023` to derandomize
inference.

References
----------
.. footbibliography::

.. GENERATED FROM PYTHON SOURCE LINES 22-24

Imports needed for this script
------------------------------

.. GENERATED FROM PYTHON SOURCE LINES 24-159

.. code-block:: Python


    import matplotlib.pyplot as plt
    import numpy as np
    from sklearn.utils import check_random_state

    from hidimstat.data_simulation import simu_data
    from hidimstat.knockoff_aggregation import knockoff_aggregation
    from hidimstat.knockoffs import model_x_knockoff
    from hidimstat.utils import cal_fdp_power

    plt.rcParams.update({"font.size": 26})

    # Number of observations
    n_subjects = 500
    # Number of variables
    n_clusters = 500
    # Correlation parameter
    rho = 0.7
    # Ratio of number of variables with non-zero coefficients over total
    # coefficients
    sparsity = 0.1
    # Desired controlled False Discovery Rate (FDR) level
    fdr = 0.1
    seed = 45
    n_bootstraps = 25
    n_jobs = 10
    runs = 20

    rng = check_random_state(seed)
    seed_list = rng.randint(1, np.iinfo(np.int32).max, runs)


    def single_run(
        n_subjects, n_clusters, rho, sparsity, fdr, n_bootstraps, n_jobs, seed=None
    ):
        # Generate data
        X, y, _, non_zero_index = simu_data(
            n_subjects, n_clusters, rho=rho, sparsity=sparsity, seed=seed
        )

        # Use model-X Knockoffs [1]
        mx_selection = model_x_knockoff(X, y, fdr=fdr, n_jobs=n_jobs, seed=seed)

        fdp_mx, power_mx = cal_fdp_power(mx_selection, non_zero_index)
        # Use p-values aggregation [2]
        aggregated_ko_selection = knockoff_aggregation(
            X,
            y,
            fdr=fdr,
            n_bootstraps=n_bootstraps,
            n_jobs=n_jobs,
            gamma=0.3,
            random_state=seed,
        )

        fdp_pval, power_pval = cal_fdp_power(aggregated_ko_selection, non_zero_index)

        # Use e-values aggregation [1]
        eval_selection = knockoff_aggregation(
            X,
            y,
            fdr=fdr,
            method="e-values",
            n_bootstraps=n_bootstraps,
            n_jobs=n_jobs,
            gamma=0.3,
            random_state=seed,
        )

        fdp_eval, power_eval = cal_fdp_power(eval_selection, non_zero_index)

        return fdp_mx, fdp_pval, fdp_eval, power_mx, power_pval, power_eval


    fdps_mx = []
    fdps_pval = []
    fdps_eval = []
    powers_mx = []
    powers_pval = []
    powers_eval = []

    for seed in seed_list:
        fdp_mx, fdp_pval, fdp_eval, power_mx, power_pval, power_eval = single_run(
            n_subjects, n_clusters, rho, sparsity, fdr, n_bootstraps, n_jobs, seed=seed
        )
        fdps_mx.append(fdp_mx)
        fdps_pval.append(fdp_pval)
        fdps_eval.append(fdp_eval)

        powers_mx.append(fdp_mx)
        powers_pval.append(power_pval)
        powers_eval.append(power_eval)

    # Plot FDP and Power distributions

    fdps = [fdps_mx, fdps_pval, fdps_eval]
    powers = [powers_mx, powers_pval, powers_eval]


    def plot_results(bounds, fdr, nsubjects, n_clusters, rho, power=False):
        plt.figure(figsize=(10, 10), layout="constrained")
        for nb in range(len(bounds)):
            for i in range(len(bounds[nb])):
                y = bounds[nb][i]
                x = np.random.normal(nb + 1, 0.05)
                plt.scatter(x, y, alpha=0.65, c="blue")

        plt.boxplot(bounds, sym="")
        if power:
            plt.xticks(
                [1, 2, 3],
                ["MX Knockoffs", "Quantile aggregation", "e-values aggregation"],
                rotation=45,
                ha="right",
            )
            plt.title(f"FDR = {fdr}, n = {nsubjects}, p = {n_clusters}, rho = {rho}")
            plt.ylabel("Empirical Power")

        else:
            plt.hlines(fdr, xmin=0.5, xmax=3.5, label="Requested FDR control", color="red")
            plt.xticks(
                [1, 2, 3],
                ["MX Knockoffs", "Quantile aggregation", "e-values aggregation"],
                rotation=45,
                ha="right",
            )
            plt.title(f"FDR = {fdr}, n = {nsubjects}, p = {n_clusters}, rho = {rho}")
            plt.ylabel("Empirical FDP")
            plt.legend(loc="best")

        plt.show()


    plot_results(fdps, fdr, n_subjects, n_clusters, rho)
    plot_results(powers, fdr, n_subjects, n_clusters, rho, power=True)


.. rst-class:: sphx-glr-horizontal


    *

      .. image-sg:: /auto_examples/images/sphx_glr_plot_knockoff_aggregation_001.png
         :alt: FDR = 0.1, n = 500, p = 500, rho = 0.7
         :srcset: /auto_examples/images/sphx_glr_plot_knockoff_aggregation_001.png
         :class: sphx-glr-multi-img

    *

      .. image-sg:: /auto_examples/images/sphx_glr_plot_knockoff_aggregation_002.png
         :alt: FDR = 0.1, n = 500, p = 500, rho = 0.7
         :srcset: /auto_examples/images/sphx_glr_plot_knockoff_aggregation_002.png
         :class: sphx-glr-multi-img


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (5 minutes 26.606 seconds)

**Estimated memory usage:**  353 MB


.. _sphx_glr_download_auto_examples_plot_knockoff_aggregation.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: plot_knockoff_aggregation.ipynb <plot_knockoff_aggregation.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: plot_knockoff_aggregation.py <plot_knockoff_aggregation.py>`

    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: plot_knockoff_aggregation.zip <plot_knockoff_aggregation.zip>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_