-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper_test.go
65 lines (55 loc) · 2.12 KB
/
scraper_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
package main
import (
"net/url"
"testing"
)
func TestGetUrls_absolute(t *testing.T) {
pageUrl, _ := url.Parse("https://www.github.com/Deependra-Patel/")
actual := GetUrls(pageUrl, "<html><a href=\"https://www.deependra.com/\"><div><a href=\"http://www.xyz.com/\"><div></html>")
var expected [2]*url.URL
expected[0], _ = url.Parse("https://www.deependra.com/")
expected[1], _ = url.Parse("http://www.xyz.com/")
if len(actual) != 2 || *actual[0] != *expected[0] || *actual[1] != *expected[1] {
t.Error(actual, expected)
}
}
func TestGetUrls_relativeRoot(t *testing.T) {
pageUrl, _ := url.Parse("https://www.github.com/Deependra-Patel/")
actual := GetUrls(pageUrl, "<html><a href=\"/notifications\"></html>")
expected, _ := url.Parse("https://www.github.com/notifications")
if len(actual) != 1 || *actual[0] != *expected {
t.Error(actual, expected)
}
}
func TestGetUrls_relativeSibling(t *testing.T) {
pageUrl, _ := url.Parse("https://www.github.com/Deependra-Patel/")
actual := GetUrls(pageUrl, "<html><a href=\"./notifications\"></html>")
expected, _ := url.Parse("https://www.github.com/Deependra-Patel/notifications")
if len(actual) != 1 || *actual[0] != *expected {
t.Error(actual, expected)
}
}
func TestFilterToSameDomain(t *testing.T) {
host := "www.github.com"
url1, _ := url.Parse("https://www.github.com/Deependra-Patel")
url2, _ := url.Parse("https://www.xyz.com/a")
actual := FilterToSameDomain(host, []*url.URL{url1, url2})
expected := "https://www.github.com/Deependra-Patel"
if len(actual) != 1 || actual[0] != expected {
t.Error(actual, expected)
}
}
func TestScraper_GetSameDomainLinks(t *testing.T) {
mockResponse := make(map[string]string, 0)
pageUrl := "https://www.github.com"
mockResponse[pageUrl] = "<a href=\"https://www.github.com/notification\"><a href=\"https://www.xyz.com\">"
clientMock := ClientMock{mockResponse}
scraper := Scraper{&clientMock}
actual := scraper.GetSameDomainLinks(pageUrl)
if actual.link != pageUrl {
t.Error(actual, pageUrl)
}
if len(actual.sameDomainLinks) != 1 || actual.sameDomainLinks[0] != "https://www.github.com/notification" {
t.Error(actual, mockResponse)
}
}