hmms_clean

用于获取结构域内以半胱氨酸为标记

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import sys
import re

domalist = []

id = ""
seq = ""
domain = ""
file = sys.argv[1]
file2 = sys.argv[2]
infile = open(file,'r')
for line in infile:
tmp = line.replace('\n','').split()
id = tmp[0]
seq = tmp[1].replace('-','')
domadict = {}
domadict[id] = seq
domalist.append(domadict)
#print(domalist)

infile.close()
outfile = open(file2,'w')
newlist = []
for dict in domalist:
for i,j in dict.items():
#print(i,j)
elem = re.findall(r'C|[^C]+',j)
# print(elem)
newlist = []
for x in elem:
if x == "C":
newlist.append(x)
else:
x = (f"X[{len(x)}]")
newlist.append(x)
# if newlist[0] != "C":
# newlist = newlist[1:]
domain = "".join(newlist)
outfile.write(f"{domain}\n")
print(domain)
outfile.close()