diff --git a/fastq_parse.py b/fastq_parse.py index 73606c77ca26d0154422073cf5727d6c71e5a929..3c43024c001721fe99e87cf33475e7fa0cef2848 100644 --- a/fastq_parse.py +++ b/fastq_parse.py @@ -30,10 +30,9 @@ def plot_statistics(statistics, condition: str, path: str): np.abs(pl.col('true size') - pl.col('read size') ).alias('abs(true - observed)') ) - print(df.filter(np.abs(pl.col('true - observed')) > 50).get_column('name').to_list()) df = df.with_columns(pl.col('true size').mean().alias('mean size')) plot = ( - ggplot(df.sample(fraction=0.1), aes(x='true size', y='true - observed', color='factor(read)')) + + ggplot(df.sample(fraction=1), aes(x='true size', y='true - observed', color='factor(read)')) + geom_point(alpha = 0.5, size = 0.1) + facet_wrap('~mapping') @@ -93,9 +92,9 @@ def parse_read_name(read): """ data = list(chain(*list(map(lambda x: re.split("[:|-]", x), read[0].split("_"))))) # ['@chr1', '5', '1005', 'chr1', '113485', '114485', '83', '2', '[0,0]', 'chr1', '5', '1005', 'chr1', '113485', '114485', '83', '2', 'S'] - if len(data) < 9: - length = int(data[-2]) - index = int(data[-1]) + if len(data) < 10: + length = int(data[-3]) + index = int(data[-2]) mapping = "Not split" combinaison = None else: @@ -122,23 +121,17 @@ def eval_reads(reads: list, read_size: int): index, length, combinaison, mapping = parse_read_name(reads[0]) obs_size = len(reads[index - 1][1]) if combinaison is None: - return [length, obs_size, False, index, mapping, reads[0][0]] - # if index == 1 and mapping == 'M': - # obs_size = read_size - obs_size - # if index == 2 and mapping == 'S': - # obs_size = read_size - obs_size + return [length, obs_size, False, index, mapping, reads[0][0]] if combinaison == [0, 1]: return [length, obs_size, True, index, mapping, reads[0][0]] - elif combinaison == [0, 2]: - if index == 2: - length = read_size - length + if combinaison == [2, 3] and index == 1: return [length, obs_size, True, index, mapping, reads[0][0]] - elif combinaison == [1, 2]: - length = read_size - length + if combinaison == [0, 3] and index == 2: return [length, obs_size, True, index, mapping, reads[0][0]] return None + if __name__ == "__main__": import doctest doctest.testmod()