初步评估组装质量

用于计算组装后的N50;L50;序列数;总长度等

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import os
import sys

seq = ""
lenlist = []
infasta = sys.argv[1]
with open(infasta,'r')as file:
for line in file:
tmp = line.replace("\n","")
if tmp[0] == ">":
if seq != "":
lenlist.append(len(seq))
seq = ""
else:
seq += tmp
lenlist.append(len(seq))
lenmax = max(lenlist)
lenmin = min(lenlist)
n50 = 0
l50 = 0
totallen = sum(lenlist)
length = 0
ind = 0
lenlist.sort(reverse=True)
for i in lenlist:
length += i
ind += 1
if length >= totallen / 2:
n50 = i
l50 = ind
print(f"N50 = {n50}")
print(f"L50 = {l50}")
break

print(f"max = {lenmax}")
print(f"min = {lenmin}")
print(f"seqnum = {len(lenlist)}")
print(f"totallen = {totallen}")