fetch_mt_and_mate.py 1.32 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python3

import pysam
import sys

def read_pair_generator(bam):
    read_pair = [None,None]
    for read in bam:
        if not read.is_proper_pair or read.is_secondary or read.is_supplementary:
            continue
        if read.is_read1:
            read_pair[0] = read
            read_pair[1] = None
        else:
            read_pair[1] = read
            if read_pair[0] is not None and read_pair[1] is not None:
                if read_pair[0].reference_name == 'MT' or read_pair[1].reference_name == 'MT':
                    if read_pair[0].query_name == read_pair[1].query_name:
                        yield read_pair[0], read_pair[1]
                    else:
                        sys.stderr.write("Pairs not paired (or file not sorted by name)")
                        sys.stderr.write(read_pair[0].query_name + "\n")
                        sys.stderr.write(read_pair[1].query_name + "\n")
                        sys.exit("Pairs not paired (or file not sorted by name)")
            read_pair[0] = None
            read_pair[1] = None

if (len(sys.argv)<2):
    sys.exit("Input bam file required!")
bam = pysam.AlignmentFile(sys.argv[1], 'rb')
outfile = pysam.AlignmentFile("-", 'w', template=bam)
for read1, read2 in read_pair_generator(bam):
    outfile.write(read1)
    outfile.write(read2)
outfile.close()