-
Notifications
You must be signed in to change notification settings - Fork 4
/
show_text_clusters.rb
executable file
·58 lines (45 loc) · 1.33 KB
/
show_text_clusters.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/ruby
$:.unshift File.dirname(__FILE__)
# require 'ripl'
# require 'exprlib'
def read_pa(pafn)
pa=File.readlines(pafn).collect{|x|x.strip}
header_end = -1
(1..10).each{|i| header_end =i if /\----+/.match(pa[i])}
return nil if header_end == -1
labels = pa[(header_end+1)..-1].collect{|x|x.to_i}
k = pa[1].to_i
return [labels,k]
end
def show_cluster_samples(clusters)
k=clusters.size
#clu_str =""
for i in 0..(k-1)
#puts "#{i}:"+clusters[i].sample(10).inspect
items = clusters[i].sample(20).collect{|x|"<li>#{x}</li>"}.join(" ")
#puts "#{i} #{clusters[i].size} "+clusters[i].sample(10).inspect
puts "<hr/><b>Cluster #{i+1} (size=#{clusters[i].size}</b>, 20 samples) <ul>#{items}</ul>"
end
end
dsfn=ARGV[0]
pafn=ARGV[1]
lines=File.readlines(dsfn).collect{|x|x.strip}
#puts lines[0..2]
#puts pa[0..5]
(labels,k)=read_pa(pafn)
clusters=[]
#Ripl.start :binding => binding
#k=12
#labels=label_str.split().collect{|x|x.to_i}
#lines=File.readlines("data/countries_sub_K12_n50_tp0100.txt").collect{|x|x.strip}
#Ripl.start :binding => binding
for i in 1..k
clusters[i-1] = []
end
for j in 0..(lines.size-1)
clusters[labels[j]-1] << lines[j]
end
puts "<html><head></head><body>"
show_cluster_samples(clusters)
puts "</body></html>"
#Ripl.start :binding => binding