fix fastq_parse for triplet

5a1e92b0 · Laurent Modolo · 1df1429c · 5a1e92b0
Verified Commit 5a1e92b0 authored 4 months ago by Laurent Modolo
--- a/fastq_parse.py
+++ b/fastq_parse.py
@@ -30,10 +30,9 @@ def plot_statistics(statistics, condition: str, path: str):
            np.abs(pl.col('true size') - pl.col('read size')
                        ).alias('abs(true - observed)')
        )
-    print(df.filter(np.abs(pl.col('true - observed')) > 50).get_column('name').to_list())
    df = df.with_columns(pl.col('true size').mean().alias('mean size'))
    plot = (
-        ggplot(df.sample(fraction=0.1), aes(x='true size', y='true - observed', color='factor(read)')) +
+        ggplot(df.sample(fraction=1), aes(x='true size', y='true - observed', color='factor(read)')) +
        geom_point(alpha = 0.5, size = 0.1) +
        facet_wrap('~mapping')

@@ -93,9 +92,9 @@ def parse_read_name(read):
    """
    data = list(chain(*list(map(lambda x: re.split("[:|-]", x), read[0].split("_")))))
    # ['@chr1', '5', '1005', 'chr1', '113485', '114485', '83', '2', '[0,0]', 'chr1', '5', '1005', 'chr1', '113485', '114485', '83', '2', 'S']
-    if len(data) < 9:
-        length = int(data[-2])
-        index = int(data[-1])
+    if len(data) < 10:
+        length = int(data[-3])
+        index = int(data[-2])
        mapping = "Not split"
        combinaison = None
    else:
@@ -122,23 +121,17 @@ def eval_reads(reads: list, read_size: int):
    index, length, combinaison, mapping = parse_read_name(reads[0])
    obs_size = len(reads[index - 1][1])
    if combinaison is None:
-            return [length, obs_size, False, index, mapping, reads[0][0]]
-    # if index == 1 and mapping == 'M':
-    #     obs_size = read_size - obs_size
-    # if index == 2 and mapping == 'S':
-    #     obs_size = read_size - obs_size
+        return [length, obs_size, False, index, mapping, reads[0][0]]
    if combinaison == [0, 1]:
        return [length, obs_size, True, index, mapping, reads[0][0]]
-    elif combinaison == [0, 2]:
-        if index == 2:
-            length = read_size - length
+    if combinaison == [2, 3] and index == 1:
        return [length, obs_size, True, index, mapping, reads[0][0]]
-    elif combinaison == [1, 2]:
-        length = read_size - length
+    if combinaison == [0, 3] and index == 2:
        return [length, obs_size, True, index, mapping, reads[0][0]]
    return None


+
 if __name__ == "__main__":
    import doctest
    doctest.testmod()