diff --git a/file_checker.go b/file_checker.go
index a338ad5..cb605a8 100644
--- a/file_checker.go
+++ b/file_checker.go
@@ -2,10 +2,8 @@ package main
import (
"bytes"
- "errors"
"io/ioutil"
"net/url"
- "path"
"strings"
"sync"
"time"
@@ -15,13 +13,12 @@ import (
)
type fileChecker struct {
- urlChecker urlChecker
- documentRoot string
- semaphore semaphore
+ urlChecker urlChecker
+ semaphore semaphore
}
-func newFileChecker(timeout time.Duration, r string, s semaphore) fileChecker {
- return fileChecker{newURLChecker(timeout, s), r, s}
+func newFileChecker(timeout time.Duration, d string, s semaphore) fileChecker {
+ return fileChecker{newURLChecker(timeout, d, s), s}
}
func (c fileChecker) Check(f string) ([]urlResult, error) {
@@ -96,18 +93,6 @@ func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
us := make(map[string]bool)
ns := []*html.Node{n}
- addURL := func(u string) error {
- u, err := c.resolveURL(u)
-
- if err != nil {
- return err
- }
-
- us[u] = true
-
- return nil
- }
-
for len(ns) > 0 {
i := len(ns) - 1
n := ns[i]
@@ -118,20 +103,14 @@ func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
case "a":
for _, a := range n.Attr {
if a.Key == "href" && isURL(a.Val) {
- if err := addURL(a.Val); err != nil {
- return nil, err
- }
-
+ us[a.Val] = true
break
}
}
case "img":
for _, a := range n.Attr {
if a.Key == "src" && isURL(a.Val) {
- if err := addURL(a.Val); err != nil {
- return nil, err
- }
-
+ us[a.Val] = true
break
}
}
@@ -146,18 +125,6 @@ func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
return stringSetToSlice(us), nil
}
-func (c fileChecker) resolveURL(u string) (string, error) {
- abs := strings.HasPrefix(u, "/")
-
- if abs && c.documentRoot != "" {
- return path.Join(c.documentRoot, u), nil
- } else if abs {
- return "", errors.New("document root directory is not specified")
- }
-
- return u, nil
-}
-
func isURL(s string) bool {
if strings.HasPrefix(s, "#") {
return false
diff --git a/file_checker_test.go b/file_checker_test.go
index 31888aa..f2c54e7 100644
--- a/file_checker_test.go
+++ b/file_checker_test.go
@@ -144,62 +144,6 @@ func TestFileCheckerExtractURLs(t *testing.T) {
}
}
-func TestFileCheckerExtractURLsWithInvalidHTML(t *testing.T) {
- c := newFileChecker(0, "", newSemaphore(42))
-
- for _, s := range []string{
- `link`,
- `
`,
- } {
- n, err := html.Parse(strings.NewReader(s))
-
- assert.Equal(t, nil, err)
-
- us, err := c.extractURLs(n)
-
- assert.Equal(t, ([]string)(nil), us)
- assert.NotEqual(t, nil, err)
- }
-}
-
-func TestFileCheckerResolveURL(t *testing.T) {
- f := newFileChecker(0, "", newSemaphore(1024))
-
- for _, c := range []struct{ source, target string }{
- {"foo", "foo"},
- {"https://google.com", "https://google.com"},
- } {
- u, err := f.resolveURL(c.source)
-
- assert.Equal(t, nil, err)
- assert.Equal(t, c.target, u)
- }
-}
-
-func TestFileCheckerResolveURLWithAbsolutePath(t *testing.T) {
- f := newFileChecker(0, "", newSemaphore(1024))
-
- u, err := f.resolveURL("/foo")
-
- assert.NotEqual(t, nil, err)
- assert.Equal(t, "", u)
-}
-
-func TestFileCheckerResolveURLWithDocumentRoot(t *testing.T) {
- f := newFileChecker(0, "foo", newSemaphore(1024))
-
- for _, c := range []struct{ source, target string }{
- {"foo", "foo"},
- {"https://google.com", "https://google.com"},
- {"/foo", "foo/foo"},
- } {
- u, err := f.resolveURL(c.source)
-
- assert.Equal(t, nil, err)
- assert.Equal(t, c.target, u)
- }
-}
-
func TestURLParse(t *testing.T) {
u, err := url.Parse("file-path")
diff --git a/url_checker.go b/url_checker.go
index 8d897c6..7fab12d 100644
--- a/url_checker.go
+++ b/url_checker.go
@@ -1,9 +1,11 @@
package main
import (
+ "errors"
"net/url"
"os"
"path"
+ "strings"
"sync"
"time"
@@ -11,15 +13,22 @@ import (
)
type urlChecker struct {
- timeout time.Duration
- semaphore semaphore
+ timeout time.Duration
+ documentRoot string
+ semaphore semaphore
}
-func newURLChecker(t time.Duration, s semaphore) urlChecker {
- return urlChecker{t, s}
+func newURLChecker(t time.Duration, d string, s semaphore) urlChecker {
+ return urlChecker{t, d, s}
}
func (c urlChecker) Check(u string, f string) error {
+ u, err := c.resolveURL(u)
+
+ if err != nil {
+ return err
+ }
+
uu, err := url.Parse(u)
if err != nil {
@@ -58,6 +67,18 @@ func (c urlChecker) CheckMany(us []string, f string, rc chan<- urlResult) {
close(rc)
}
+func (c urlChecker) resolveURL(u string) (string, error) {
+ abs := strings.HasPrefix(u, "/")
+
+ if abs && c.documentRoot != "" {
+ return path.Join(c.documentRoot, u), nil
+ } else if abs {
+ return "", errors.New("document root directory is not specified")
+ }
+
+ return u, nil
+}
+
func checkRelativePath(p string, f string) error {
_, err := os.Stat(path.Join(path.Dir(f), p))
return err
diff --git a/url_checker_test.go b/url_checker_test.go
index 3949262..8afd0c2 100644
--- a/url_checker_test.go
+++ b/url_checker_test.go
@@ -8,7 +8,7 @@ import (
)
func TestURLCheckerCheck(t *testing.T) {
- c := newURLChecker(0, newSemaphore(1024))
+ c := newURLChecker(0, "", newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md"))
@@ -20,7 +20,7 @@ func TestURLCheckerCheck(t *testing.T) {
}
func TestURLCheckerCheckWithTimeout(t *testing.T) {
- c := newURLChecker(30*time.Second, newSemaphore(1024))
+ c := newURLChecker(30*time.Second, "", newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md"))
@@ -32,7 +32,7 @@ func TestURLCheckerCheckWithTimeout(t *testing.T) {
}
func TestURLCheckerCheckMany(t *testing.T) {
- c := newURLChecker(0, newSemaphore(1024))
+ c := newURLChecker(0, "", newSemaphore(1024))
for _, us := range [][]string{{}, {"https://google.com", "README.md"}} {
rc := make(chan urlResult, 1024)
@@ -44,3 +44,40 @@ func TestURLCheckerCheckMany(t *testing.T) {
}
}
}
+func TestURLCheckerResolveURL(t *testing.T) {
+ f := newURLChecker(0, "", newSemaphore(1024))
+
+ for _, c := range []struct{ source, target string }{
+ {"foo", "foo"},
+ {"https://google.com", "https://google.com"},
+ } {
+ u, err := f.resolveURL(c.source)
+
+ assert.Equal(t, nil, err)
+ assert.Equal(t, c.target, u)
+ }
+}
+
+func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) {
+ f := newURLChecker(0, "", newSemaphore(1024))
+
+ u, err := f.resolveURL("/foo")
+
+ assert.NotEqual(t, nil, err)
+ assert.Equal(t, "", u)
+}
+
+func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) {
+ f := newURLChecker(0, "foo", newSemaphore(1024))
+
+ for _, c := range []struct{ source, target string }{
+ {"foo", "foo"},
+ {"https://google.com", "https://google.com"},
+ {"/foo", "foo/foo"},
+ } {
+ u, err := f.resolveURL(c.source)
+
+ assert.Equal(t, nil, err)
+ assert.Equal(t, c.target, u)
+ }
+}