Browse Source

Resolve URLs while checking them

renovate/configure
Yota Toyama 8 years ago
parent
commit
522a2c18e7
  1. 45
      file_checker.go
  2. 56
      file_checker_test.go
  3. 29
      url_checker.go
  4. 43
      url_checker_test.go

45
file_checker.go

@ -2,10 +2,8 @@ package main
import (
"bytes"
"errors"
"io/ioutil"
"net/url"
"path"
"strings"
"sync"
"time"
@ -15,13 +13,12 @@ import (
)
type fileChecker struct {
urlChecker urlChecker
documentRoot string
semaphore semaphore
urlChecker urlChecker
semaphore semaphore
}
func newFileChecker(timeout time.Duration, r string, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, s), r, s}
func newFileChecker(timeout time.Duration, d string, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, d, s), s}
}
func (c fileChecker) Check(f string) ([]urlResult, error) {
@ -96,18 +93,6 @@ func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
us := make(map[string]bool)
ns := []*html.Node{n}
addURL := func(u string) error {
u, err := c.resolveURL(u)
if err != nil {
return err
}
us[u] = true
return nil
}
for len(ns) > 0 {
i := len(ns) - 1
n := ns[i]
@ -118,20 +103,14 @@ func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
case "a":
for _, a := range n.Attr {
if a.Key == "href" && isURL(a.Val) {
if err := addURL(a.Val); err != nil {
return nil, err
}
us[a.Val] = true
break
}
}
case "img":
for _, a := range n.Attr {
if a.Key == "src" && isURL(a.Val) {
if err := addURL(a.Val); err != nil {
return nil, err
}
us[a.Val] = true
break
}
}
@ -146,18 +125,6 @@ func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
return stringSetToSlice(us), nil
}
func (c fileChecker) resolveURL(u string) (string, error) {
abs := strings.HasPrefix(u, "/")
if abs && c.documentRoot != "" {
return path.Join(c.documentRoot, u), nil
} else if abs {
return "", errors.New("document root directory is not specified")
}
return u, nil
}
func isURL(s string) bool {
if strings.HasPrefix(s, "#") {
return false

56
file_checker_test.go

@ -144,62 +144,6 @@ func TestFileCheckerExtractURLs(t *testing.T) {
}
}
func TestFileCheckerExtractURLsWithInvalidHTML(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(42))
for _, s := range []string{
`<a href="/foo.html">link</a>`,
`<img src="/foo.png" />`,
} {
n, err := html.Parse(strings.NewReader(s))
assert.Equal(t, nil, err)
us, err := c.extractURLs(n)
assert.Equal(t, ([]string)(nil), us)
assert.NotEqual(t, nil, err)
}
}
func TestFileCheckerResolveURL(t *testing.T) {
f := newFileChecker(0, "", newSemaphore(1024))
for _, c := range []struct{ source, target string }{
{"foo", "foo"},
{"https://google.com", "https://google.com"},
} {
u, err := f.resolveURL(c.source)
assert.Equal(t, nil, err)
assert.Equal(t, c.target, u)
}
}
func TestFileCheckerResolveURLWithAbsolutePath(t *testing.T) {
f := newFileChecker(0, "", newSemaphore(1024))
u, err := f.resolveURL("/foo")
assert.NotEqual(t, nil, err)
assert.Equal(t, "", u)
}
func TestFileCheckerResolveURLWithDocumentRoot(t *testing.T) {
f := newFileChecker(0, "foo", newSemaphore(1024))
for _, c := range []struct{ source, target string }{
{"foo", "foo"},
{"https://google.com", "https://google.com"},
{"/foo", "foo/foo"},
} {
u, err := f.resolveURL(c.source)
assert.Equal(t, nil, err)
assert.Equal(t, c.target, u)
}
}
func TestURLParse(t *testing.T) {
u, err := url.Parse("file-path")

29
url_checker.go

@ -1,9 +1,11 @@
package main
import (
"errors"
"net/url"
"os"
"path"
"strings"
"sync"
"time"
@ -11,15 +13,22 @@ import (
)
type urlChecker struct {
timeout time.Duration
semaphore semaphore
timeout time.Duration
documentRoot string
semaphore semaphore
}
func newURLChecker(t time.Duration, s semaphore) urlChecker {
return urlChecker{t, s}
func newURLChecker(t time.Duration, d string, s semaphore) urlChecker {
return urlChecker{t, d, s}
}
func (c urlChecker) Check(u string, f string) error {
u, err := c.resolveURL(u)
if err != nil {
return err
}
uu, err := url.Parse(u)
if err != nil {
@ -58,6 +67,18 @@ func (c urlChecker) CheckMany(us []string, f string, rc chan<- urlResult) {
close(rc)
}
func (c urlChecker) resolveURL(u string) (string, error) {
abs := strings.HasPrefix(u, "/")
if abs && c.documentRoot != "" {
return path.Join(c.documentRoot, u), nil
} else if abs {
return "", errors.New("document root directory is not specified")
}
return u, nil
}
func checkRelativePath(p string, f string) error {
_, err := os.Stat(path.Join(path.Dir(f), p))
return err

43
url_checker_test.go

@ -8,7 +8,7 @@ import (
)
func TestURLCheckerCheck(t *testing.T) {
c := newURLChecker(0, newSemaphore(1024))
c := newURLChecker(0, "", newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md"))
@ -20,7 +20,7 @@ func TestURLCheckerCheck(t *testing.T) {
}
func TestURLCheckerCheckWithTimeout(t *testing.T) {
c := newURLChecker(30*time.Second, newSemaphore(1024))
c := newURLChecker(30*time.Second, "", newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md"))
@ -32,7 +32,7 @@ func TestURLCheckerCheckWithTimeout(t *testing.T) {
}
func TestURLCheckerCheckMany(t *testing.T) {
c := newURLChecker(0, newSemaphore(1024))
c := newURLChecker(0, "", newSemaphore(1024))
for _, us := range [][]string{{}, {"https://google.com", "README.md"}} {
rc := make(chan urlResult, 1024)
@ -44,3 +44,40 @@ func TestURLCheckerCheckMany(t *testing.T) {
}
}
}
func TestURLCheckerResolveURL(t *testing.T) {
f := newURLChecker(0, "", newSemaphore(1024))
for _, c := range []struct{ source, target string }{
{"foo", "foo"},
{"https://google.com", "https://google.com"},
} {
u, err := f.resolveURL(c.source)
assert.Equal(t, nil, err)
assert.Equal(t, c.target, u)
}
}
func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) {
f := newURLChecker(0, "", newSemaphore(1024))
u, err := f.resolveURL("/foo")
assert.NotEqual(t, nil, err)
assert.Equal(t, "", u)
}
func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) {
f := newURLChecker(0, "foo", newSemaphore(1024))
for _, c := range []struct{ source, target string }{
{"foo", "foo"},
{"https://google.com", "https://google.com"},
{"/foo", "foo/foo"},
} {
u, err := f.resolveURL(c.source)
assert.Equal(t, nil, err)
assert.Equal(t, c.target, u)
}
}

Loading…
Cancel
Save