diff --git a/src/download_encode_eclip/create_gene_bed.py b/src/download_encode_eclip/create_gene_bed.py
new file mode 100644
index 0000000000000000000000000000000000000000..f695072dde4694a272714f73ab8c4237d7d85ace
--- /dev/null
+++ b/src/download_encode_eclip/create_gene_bed.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description: The goal of this script is to create a bed gene file \
+without the first and the last exons.
+"""
+
+
+from .config import Config, ConfigEncodeClip
+import pandas as pd
+from pathlib import Path
+import doctest
+
+
+class BadIDError(Exception):
+    """
+    Class defining a bad exon id error.
+    """
+    pass
+
+
+def check_id(mserie: pd.Series) -> None:
+    """
+    Check the id column of a row in a dataframe containing exons.
+
+    :param mserie: A row of a dataframe containing exon in a bed format
+
+    >>> check_id(pd.Series({"chr": 1, "start": 0, "stop": 50, "id": "1_10",
+    ... "score": 0, "strand": '-'}))
+    >>> check_id(pd.Series({"chr": 1, "start": 0, "stop": 50, "id": "cd_10",
+    ... "score": 0, "strand": '-'}))
+    Traceback (most recent call last):
+    ...
+    BadIDError: The exon cd_10 as a bad id format.
+    It should have the form id_pos where
+        1. id: Is the fasterDB gene id of the exons
+        2. pos: Is the position of the exon within the gene
+    >>> check_id(pd.Series({"chr": 1, "start": 0, "stop": 50, "id": "17",
+    ... "score": 0, "strand": '-'}))
+    Traceback (most recent call last):
+    ...
+    BadIDError: The exon 17 as a bad id format.
+    It should have the form id_pos where
+        1. id: Is the fasterDB gene id of the exons
+        2. pos: Is the position of the exon within the gene
+    """
+    res = mserie["id"].split("_")
+    if len(res) != 2 or not res[0].isdigit() or not res[1].isdigit():
+        raise BadIDError(f"The exon {mserie['id']} as a bad id format.\n"
+                         f"It should have the form id_pos where\n"
+                         f"    1. id: Is the fasterDB gene id of the exons\n"
+                         f"    2. pos: Is the position of the exon within the "
+                         f"gene")
+
+
+def load_exon_bed(exon_bed: Path) -> pd.DataFrame:
+    """
+    Load a bed file containing exons. The columns containing the \
+    names of intervals must have the following structure: id_pos where:
+        1. id is the fasterDB id of a gene
+        2. pos: Is the exons position on the gene
+
+    :param exon_bed: A bed file containing exons
+    :return: The loaded exons bed file as a dataframe
+
+    >>> load_exon_bed(Config.tests_files / "exons.bed")
+       chr     start      stop   id  score strand
+    0   18  28681865  28682388  1_1      0      -
+    1   18  28681183  28681432  1_2      0      -
+    2   18  28673521  28673606  1_3      0      -
+    3   18  28672063  28672263  1_4      0      -
+    4   18  28671489  28671530  1_5      0      -
+    5   18  28670990  28671110  1_6      0      -
+    6   18  28669401  28669557  1_7      0      -
+    7   18  28667631  28667776  1_8      0      -
+    8   18  28666538  28666705  1_9      0      -
+    """
+    df = pd.read_csv(exon_bed, sep="\t", names=["chr", "start", "stop", "id",
+                                                "score", "strand"])
+    df.apply(check_id, axis=1)
+    return df
+
+
+def get_first_and_last_exons(df_exon: pd.DataFrame) -> pd.DataFrame:
+    """
+    Return a dataframe indicating for each gene, it's first or last exons.
+
+    :param df_exon: A dataframe containing exons in bed format.
+    :return: dataframe indicating for each gene, it's first or last exons.
+
+    >>> d = pd.DataFrame({
+    ... 'chr': {0: 18, 1: 18, 2: 18, 3: 18, 4: 18, 5: 18, 6: 18, 7: 18, 8: 18},
+    ... 'start': {0: 28681865, 1: 28681183, 2: 28673521, 3: 28672063,
+    ...           4: 28671489, 5: 28670990, 6: 28669401, 7: 28667631,
+    ...           8: 28666538},
+    ... 'stop': {0: 28682388, 1: 28681432, 2: 28673606, 3: 28672263,
+    ...          4: 28671530, 5: 28671110, 6: 28669557, 7: 28667776,
+    ...          8: 28666705},
+    ... 'id': {0: '1_1', 1: '1_2', 2: '1_3', 3: '1_4', 4: '1_5', 5: '1_6',
+    ...        6: '1_7', 7: '1_8', 8: '1_9'},
+    ... 'score': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0},
+    ... 'strand': {0: '-', 1: '-', 2: '-', 3: '-', 4: '-', 5: '-', 6: '-',
+    ...            7: '-', 8: '-'}})
+    >>> get_first_and_last_exons(d)
+       chr  start_first  stop_first  start_last  stop_last  gene_id strand
+    0   18     28681865    28682388    28666538   28666705        1      -
+    """
+    df_exon["gene_id"] = df_exon["id"].str.replace(r"_\d+", "", regex=True)\
+        .astype(int)
+    df_exon["pos"] = df_exon["id"].str.replace(r"\d+_", "", regex=True)\
+        .astype(int)
+    df_exon.sort_values(["gene_id", "pos"], ascending=True, inplace=True)
+    df_first = df_exon.drop_duplicates(subset="gene_id", keep="first")
+    df_last = df_exon.drop_duplicates(subset="gene_id", keep="last")
+    good_col = ["chr", "start_first", "stop_first", "start_last", "stop_last",
+                "gene_id", "strand"]
+    df = pd.merge(df_first, df_last, how="outer",
+                  on=["gene_id", "chr", "strand", "score"],
+                  suffixes=["_first", "_last"])
+    return df[df["pos_first"] != df["pos_last"]][good_col]
+
+
+def get_gene_row(row: pd.Series) -> pd.Series:
+    """
+    From a row containing the position of the first and the last exons of \
+    a gene, return another row containing the intervals of a gene without \
+    this first and last exons.
+
+    :param row: A  row containing the position of the first and the \
+    last exons of a gene
+    :return: another row containing the intervals of a gene without \
+    this first and last exons.
+
+    >>> get_gene_row(pd.Series(
+    ... {'chr': 18, 'start_first': 900, 'stop_first': 1000,
+    ...  'start_last': 0, 'stop_last': 100, 'gene_id': 1,
+    ...  'strand': '-'})).to_dict() == {'chr': 18, 'start': 100,
+    ... 'stop': 900, 'id': 1, 'score': 0, 'strand': '-'}
+    True
+    >>> get_gene_row(pd.Series(
+    ... {'chr': 18, 'start_first': 0, 'stop_first': 100,
+    ...  'start_last': 900, 'stop_last': 1000, 'gene_id': 1,
+    ...  'strand': '+'})).to_dict() == {'chr': 18, 'start': 100,
+    ... 'stop': 900, 'id': 1, 'score': 0, 'strand': '+'}
+    True
+    """
+    if row["strand"] == "+":
+        return pd.Series({"chr": row["chr"],
+                          "start": row["stop_first"],
+                          "stop": row["start_last"],
+                          "id": row["gene_id"],
+                          "score": 0,
+                          "strand": row["strand"]})
+    return pd.Series({"chr": row["chr"],
+                      "start": row["stop_last"],
+                      "stop": row["start_first"],
+                      "id": row["gene_id"],
+                      "score": 0,
+                      "strand": row["strand"]})
+
+
+def create_new_bed(df_exons: pd.DataFrame) -> pd.DataFrame:
+    """
+    From a dataframe containing the first and the last exons of \
+    each gene create dataframe of genes without the first and the last exons.
+
+    :param df_exons: a dataframe containing the first and the last exons of \
+    each gene create
+    :return: The dataframe in bed format of genes without it's first or last \
+    exons
+
+    >>> d = pd.DataFrame({'chr': [18, 18], 'start_first': [900, 0],
+    ... 'stop_first': [1000, 100],
+    ...  'start_last': [0, 900], 'stop_last': [100, 1000], 'gene_id': [1, 1],
+    ...  'strand': ['-', '+']})
+    >>> create_new_bed(d)
+       chr  start  stop  id  score strand
+    0   18    100   900   1      0      -
+    1   18    100   900   1      0      +
+    """
+    df_exons = df_exons.apply(get_gene_row, axis=1)
+    df_exons["chr"] = "chr" + df_exons["chr"]
+    return df_exons
+
+
+def create_gene_bed():
+    """
+    Create a gene files without first and last exons.
+    """
+    df_exon = load_exon_bed(Config.bed_exon)
+    df_fl = get_first_and_last_exons(df_exon)
+    bed_gene = create_new_bed(df_fl)
+    ConfigEncodeClip.output_gene.parent.mkdir(exist_ok=True)
+    bed_gene.to_csv(ConfigEncodeClip.output_gene, sep="\t", index=False,
+                    header=False)
+
+
+if __name__ == "__main__":
+    doctest.testmod()