Skip to content
Snippets Groups Projects
Verified Commit 5a1e92b0 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

fix fastq_parse for triplet

parent 1df1429c
No related branches found
No related tags found
No related merge requests found
......@@ -30,10 +30,9 @@ def plot_statistics(statistics, condition: str, path: str):
np.abs(pl.col('true size') - pl.col('read size')
).alias('abs(true - observed)')
)
print(df.filter(np.abs(pl.col('true - observed')) > 50).get_column('name').to_list())
df = df.with_columns(pl.col('true size').mean().alias('mean size'))
plot = (
ggplot(df.sample(fraction=0.1), aes(x='true size', y='true - observed', color='factor(read)')) +
ggplot(df.sample(fraction=1), aes(x='true size', y='true - observed', color='factor(read)')) +
geom_point(alpha = 0.5, size = 0.1) +
facet_wrap('~mapping')
......@@ -93,9 +92,9 @@ def parse_read_name(read):
"""
data = list(chain(*list(map(lambda x: re.split("[:|-]", x), read[0].split("_")))))
# ['@chr1', '5', '1005', 'chr1', '113485', '114485', '83', '2', '[0,0]', 'chr1', '5', '1005', 'chr1', '113485', '114485', '83', '2', 'S']
if len(data) < 9:
length = int(data[-2])
index = int(data[-1])
if len(data) < 10:
length = int(data[-3])
index = int(data[-2])
mapping = "Not split"
combinaison = None
else:
......@@ -122,23 +121,17 @@ def eval_reads(reads: list, read_size: int):
index, length, combinaison, mapping = parse_read_name(reads[0])
obs_size = len(reads[index - 1][1])
if combinaison is None:
return [length, obs_size, False, index, mapping, reads[0][0]]
# if index == 1 and mapping == 'M':
# obs_size = read_size - obs_size
# if index == 2 and mapping == 'S':
# obs_size = read_size - obs_size
return [length, obs_size, False, index, mapping, reads[0][0]]
if combinaison == [0, 1]:
return [length, obs_size, True, index, mapping, reads[0][0]]
elif combinaison == [0, 2]:
if index == 2:
length = read_size - length
if combinaison == [2, 3] and index == 1:
return [length, obs_size, True, index, mapping, reads[0][0]]
elif combinaison == [1, 2]:
length = read_size - length
if combinaison == [0, 3] and index == 2:
return [length, obs_size, True, index, mapping, reads[0][0]]
return None
if __name__ == "__main__":
import doctest
doctest.testmod()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment