-
Notifications
You must be signed in to change notification settings - Fork 10
/
count.py
74 lines (54 loc) · 1.39 KB
/
count.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# use this script to count the stats
#! /bin/python
import multiprocessing as mp
import sqlite3
import zlib
import sys,os,getopt
import hashlib
import time
def usage():
print "Usage: python " % (sys.argv[0])
#worker is a singal process for each cpu on each computer
def worker(queue, lock, cpuid):
cnt = 0
while queue.qsize() > 0:
db = queue.get()
conn = sqlite3.connect(db)
cur = conn.cursor()
cmd = "select count(*) from mainpages where valid=1"
cur.execute(cmd)
dbcnt = cur.fetchone()[0]
cnt += dbcnt
#print "%30s:%4d" % (db, dbcnt)
print "T:%d" % (cnt)
def manager(dbs):
# leave one cpu
tasks = mp.cpu_count() -1
#tasks = 1
que = mp.Queue()
for db in dbs:
que.put(db)
lock = mp.Lock()
plist = []
for i in xrange(tasks):
p = mp.Process(target=worker, args=(que, lock, i+1))
p.start()
plist.append(p)
for p in plist:
p.join()
def getdbs(path):
dbs = os.listdir(path)
os.chdir(path)
dblist = []
for db in dbs:
if db.endswith('.db'):
dblist.append(db)
return dblist
def main():
path = '/work/db'
if len(sys.argv) == 2:
path = sys.argv[1]
manager(getdbs(path))
return
if __name__ == '__main__':
main()