Browse Source

Resolve URLs while checking them

renovate/configure
Yota Toyama 8 years ago
parent
commit
522a2c18e7
  1. 45
      file_checker.go
  2. 56
      file_checker_test.go
  3. 29
      url_checker.go
  4. 43
      url_checker_test.go

45
file_checker.go

@ -2,10 +2,8 @@ package main
import ( import (
"bytes" "bytes"
"errors"
"io/ioutil" "io/ioutil"
"net/url" "net/url"
"path"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -15,13 +13,12 @@ import (
) )
type fileChecker struct { type fileChecker struct {
urlChecker urlChecker urlChecker urlChecker
documentRoot string semaphore semaphore
semaphore semaphore
} }
func newFileChecker(timeout time.Duration, r string, s semaphore) fileChecker { func newFileChecker(timeout time.Duration, d string, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, s), r, s} return fileChecker{newURLChecker(timeout, d, s), s}
} }
func (c fileChecker) Check(f string) ([]urlResult, error) { func (c fileChecker) Check(f string) ([]urlResult, error) {
@ -96,18 +93,6 @@ func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
us := make(map[string]bool) us := make(map[string]bool)
ns := []*html.Node{n} ns := []*html.Node{n}
addURL := func(u string) error {
u, err := c.resolveURL(u)
if err != nil {
return err
}
us[u] = true
return nil
}
for len(ns) > 0 { for len(ns) > 0 {
i := len(ns) - 1 i := len(ns) - 1
n := ns[i] n := ns[i]
@ -118,20 +103,14 @@ func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
case "a": case "a":
for _, a := range n.Attr { for _, a := range n.Attr {
if a.Key == "href" && isURL(a.Val) { if a.Key == "href" && isURL(a.Val) {
if err := addURL(a.Val); err != nil { us[a.Val] = true
return nil, err
}
break break
} }
} }
case "img": case "img":
for _, a := range n.Attr { for _, a := range n.Attr {
if a.Key == "src" && isURL(a.Val) { if a.Key == "src" && isURL(a.Val) {
if err := addURL(a.Val); err != nil { us[a.Val] = true
return nil, err
}
break break
} }
} }
@ -146,18 +125,6 @@ func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
return stringSetToSlice(us), nil return stringSetToSlice(us), nil
} }
func (c fileChecker) resolveURL(u string) (string, error) {
abs := strings.HasPrefix(u, "/")
if abs && c.documentRoot != "" {
return path.Join(c.documentRoot, u), nil
} else if abs {
return "", errors.New("document root directory is not specified")
}
return u, nil
}
func isURL(s string) bool { func isURL(s string) bool {
if strings.HasPrefix(s, "#") { if strings.HasPrefix(s, "#") {
return false return false

56
file_checker_test.go

@ -144,62 +144,6 @@ func TestFileCheckerExtractURLs(t *testing.T) {
} }
} }
func TestFileCheckerExtractURLsWithInvalidHTML(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(42))
for _, s := range []string{
`<a href="/foo.html">link</a>`,
`<img src="/foo.png" />`,
} {
n, err := html.Parse(strings.NewReader(s))
assert.Equal(t, nil, err)
us, err := c.extractURLs(n)
assert.Equal(t, ([]string)(nil), us)
assert.NotEqual(t, nil, err)
}
}
func TestFileCheckerResolveURL(t *testing.T) {
f := newFileChecker(0, "", newSemaphore(1024))
for _, c := range []struct{ source, target string }{
{"foo", "foo"},
{"https://google.com", "https://google.com"},
} {
u, err := f.resolveURL(c.source)
assert.Equal(t, nil, err)
assert.Equal(t, c.target, u)
}
}
func TestFileCheckerResolveURLWithAbsolutePath(t *testing.T) {
f := newFileChecker(0, "", newSemaphore(1024))
u, err := f.resolveURL("/foo")
assert.NotEqual(t, nil, err)
assert.Equal(t, "", u)
}
func TestFileCheckerResolveURLWithDocumentRoot(t *testing.T) {
f := newFileChecker(0, "foo", newSemaphore(1024))
for _, c := range []struct{ source, target string }{
{"foo", "foo"},
{"https://google.com", "https://google.com"},
{"/foo", "foo/foo"},
} {
u, err := f.resolveURL(c.source)
assert.Equal(t, nil, err)
assert.Equal(t, c.target, u)
}
}
func TestURLParse(t *testing.T) { func TestURLParse(t *testing.T) {
u, err := url.Parse("file-path") u, err := url.Parse("file-path")

29
url_checker.go

@ -1,9 +1,11 @@
package main package main
import ( import (
"errors"
"net/url" "net/url"
"os" "os"
"path" "path"
"strings"
"sync" "sync"
"time" "time"
@ -11,15 +13,22 @@ import (
) )
type urlChecker struct { type urlChecker struct {
timeout time.Duration timeout time.Duration
semaphore semaphore documentRoot string
semaphore semaphore
} }
func newURLChecker(t time.Duration, s semaphore) urlChecker { func newURLChecker(t time.Duration, d string, s semaphore) urlChecker {
return urlChecker{t, s} return urlChecker{t, d, s}
} }
func (c urlChecker) Check(u string, f string) error { func (c urlChecker) Check(u string, f string) error {
u, err := c.resolveURL(u)
if err != nil {
return err
}
uu, err := url.Parse(u) uu, err := url.Parse(u)
if err != nil { if err != nil {
@ -58,6 +67,18 @@ func (c urlChecker) CheckMany(us []string, f string, rc chan<- urlResult) {
close(rc) close(rc)
} }
func (c urlChecker) resolveURL(u string) (string, error) {
abs := strings.HasPrefix(u, "/")
if abs && c.documentRoot != "" {
return path.Join(c.documentRoot, u), nil
} else if abs {
return "", errors.New("document root directory is not specified")
}
return u, nil
}
func checkRelativePath(p string, f string) error { func checkRelativePath(p string, f string) error {
_, err := os.Stat(path.Join(path.Dir(f), p)) _, err := os.Stat(path.Join(path.Dir(f), p))
return err return err

43
url_checker_test.go

@ -8,7 +8,7 @@ import (
) )
func TestURLCheckerCheck(t *testing.T) { func TestURLCheckerCheck(t *testing.T) {
c := newURLChecker(0, newSemaphore(1024)) c := newURLChecker(0, "", newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} { for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md")) assert.Equal(t, nil, c.Check(u, "README.md"))
@ -20,7 +20,7 @@ func TestURLCheckerCheck(t *testing.T) {
} }
func TestURLCheckerCheckWithTimeout(t *testing.T) { func TestURLCheckerCheckWithTimeout(t *testing.T) {
c := newURLChecker(30*time.Second, newSemaphore(1024)) c := newURLChecker(30*time.Second, "", newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} { for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md")) assert.Equal(t, nil, c.Check(u, "README.md"))
@ -32,7 +32,7 @@ func TestURLCheckerCheckWithTimeout(t *testing.T) {
} }
func TestURLCheckerCheckMany(t *testing.T) { func TestURLCheckerCheckMany(t *testing.T) {
c := newURLChecker(0, newSemaphore(1024)) c := newURLChecker(0, "", newSemaphore(1024))
for _, us := range [][]string{{}, {"https://google.com", "README.md"}} { for _, us := range [][]string{{}, {"https://google.com", "README.md"}} {
rc := make(chan urlResult, 1024) rc := make(chan urlResult, 1024)
@ -44,3 +44,40 @@ func TestURLCheckerCheckMany(t *testing.T) {
} }
} }
} }
func TestURLCheckerResolveURL(t *testing.T) {
f := newURLChecker(0, "", newSemaphore(1024))
for _, c := range []struct{ source, target string }{
{"foo", "foo"},
{"https://google.com", "https://google.com"},
} {
u, err := f.resolveURL(c.source)
assert.Equal(t, nil, err)
assert.Equal(t, c.target, u)
}
}
func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) {
f := newURLChecker(0, "", newSemaphore(1024))
u, err := f.resolveURL("/foo")
assert.NotEqual(t, nil, err)
assert.Equal(t, "", u)
}
func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) {
f := newURLChecker(0, "foo", newSemaphore(1024))
for _, c := range []struct{ source, target string }{
{"foo", "foo"},
{"https://google.com", "https://google.com"},
{"/foo", "foo/foo"},
} {
u, err := f.resolveURL(c.source)
assert.Equal(t, nil, err)
assert.Equal(t, c.target, u)
}
}

Loading…
Cancel
Save