-
Notifications
You must be signed in to change notification settings - Fork 1
/
EZbiocloud_16_ID.py
134 lines (107 loc) · 5.88 KB
/
EZbiocloud_16_ID.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import requests
import json
import time
import re
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from browsermobproxy import Server
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from Bio import SeqIO
import argparse
parser = argparse.ArgumentParser(description="Use selenium to upload 16S rRNA gene sequences automatically!")
parser.add_argument('-i', required=True, dest='fasta', help='16S rRNA gene files!')
parser.add_argument('-a', required=True, dest='account', help='EZBiocloud account, e.g. [email protected]')
parser.add_argument('-p', required=True, dest='passwd', help='EZBiocloud password')
args = parser.parse_args()
#write the install location of bowsermob-proxy
server = Server(r'D:\project\EZBio_16S_ID\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat')
server.start()
proxy = server.create_proxy()
chrome_options = Options()
chrome_options.page_load_strategy = 'eager'
chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy))
chrome_options.add_argument('--ignore-certificate-errors')
s=Service('C:/Users/lianz/Downloads/chromedriver.exe')
driver = webdriver.Chrome(service=s)
base_url="https://www.ezbiocloud.net/identify"
account=args.account
passwd=args.passwd
wait = WebDriverWait(driver.get(base_url),10)
email = driver.find_element(By.XPATH,'//*[@id="emailVal"]').send_keys(account)
code= driver.find_element(By.XPATH,'//*[@id="passwordVal"]').send_keys(passwd)
driver.find_element(By.XPATH,'//*[@id="loginBtn"]').click()
#driver.find_elements_by_class_name("16S-based ID").click()
driver.refresh()
driver.refresh()
seq_count=0
for seq_record in SeqIO.parse(args.fasta, "fasta"):
seqname=seq_record.id
sequence=seq_record.seq
driver.find_element(By.XPATH,'//span[@class="iconText"]').click()
container1 = driver.find_element(By.ID,'modalSingleStrainUp')
driver.execute_script("arguments[0].style.display = 'block';", container1)
seqid = driver.find_element(By.XPATH,'//*[@id="sequenceName"]').send_keys(seqname)
seqs = driver.find_element(By.XPATH,'//*[@id="ssurrnSeq"]').send_keys(sequence)
driver.find_element(By.XPATH,'//*[@id="submitForReview"]').click()
container2 = driver.find_element(By.ID,'modalMetaDataEdit')
driver.execute_script("arguments[0].style.display = 'block';", container2)
driver.find_element(By.XPATH,'//*[@id="completeSubmit"]').click()
driver.refresh()
time.sleep(3)
seq_count += 1;
print('Upload finished and wait for identify!!!')
time.sleep(10)
driver.refresh()
import pandas as pd
df1 = pd.DataFrame(columns=['Name','Top-hit taxon','Top-hit strain','Similarity','Top-hit taxonomy','Phylum'])
page = seq_count // 25
res = seq_count % 25
if(seq_count <= 25):
for i in range(1, seq_count+1):
i = str(i)
name = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[3]').text
hits = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[4]').text
strain = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[5]').text
simi = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[6]').text
taxon = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[7]').text
phylum = taxon.split(";")[1]
df2 = pd.DataFrame([[name,hits,strain,simi,taxon,phylum]],columns=['Name','Top-hit taxon','Top-hit strain','Similarity','Top-hit taxonomy','Phylum'])
df1 = pd.concat([df1,df2],ignore_index=True)
else:
for j in range(1, page +1):
time.sleep(10)
for i in range(1, 26):
i = str(i)
name = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[3]').text
hits = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[4]').text
print(hits)
strain = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[5]').text
simi = float(driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[6]').text)
taxon = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[7]').text
phylum = taxon.split(";")[1]
df2 = pd.DataFrame([[name,hits,strain,simi,taxon,phylum]],columns=['Name','Top-hit taxon','Top-hit strain','Similarity','Top-hit taxonomy','Phylum'])
df1 = pd.concat([df1,df2],ignore_index=True)
driver.find_element(By.LINK_TEXT, '›').click()
driver.find_element(By.LINK_TEXT, '›').click()
for i in range(1, res+1):
i = str(i)
name = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[3]').text
hits = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[4]').text
print(hits)
strain = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[5]').text
simi = float(driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[6]').text)
taxon = driver.find_element(By.XPATH,'//*[@id="idResultTable"]/tbody/tr[' + i + ']/td[7]').text
phylum = taxon.split(";")[1]
df2 = pd.DataFrame([[name,hits,strain,simi,taxon,phylum]],columns=['Name','Top-hit taxon','Top-hit strain','Similarity','Top-hit taxonomy','Phylum'])
df1 = pd.concat([df1,df2],ignore_index=True)
df1.to_csv('user_data_ezbiocloud.csv')
import matplotlib.pyplot as plt
g1= df1.groupby('Phylum')
plt.figure(figsize=(8, 6))
plt.pie(g1.size(),labels=g1.size().index)
plt.savefig('pie.png')
#plt.show()