diff --git a/arguments.go b/arguments.go index 3b74435..1d00cea 100644 --- a/arguments.go +++ b/arguments.go @@ -23,20 +23,22 @@ var defaultConcurrency = func() int { const usage = `Link checker for Markdown and HTML Usage: - liche [-c ] [-r] [-t ] [-v] ... + liche [-c ] [-d ] [-r] [-t ] [-v] ... Options: -c, --concurrency Set max number of concurrent HTTP requests. [default: %v] + -d, --document-root Set document root directory for absolute paths. -r, --recursive Search Markdown and HTML files recursively -t, --timeout Set timeout for HTTP requests in seconds. Disabled by default. -v, --verbose Be verbose.` type arguments struct { - filenames []string - concurrency int - timeout time.Duration - recursive bool - verbose bool + filenames []string + documentRoot string + concurrency int + timeout time.Duration + recursive bool + verbose bool } func getArgs() (arguments, error) { @@ -54,6 +56,10 @@ func getArgs() (arguments, error) { t := 0.0 + if args["--document-root"] == nil { + args["--document-root"] = "" + } + if args["--timeout"] != nil { t, err = strconv.ParseFloat(args["--timeout"].(string), 64) @@ -64,6 +70,7 @@ func getArgs() (arguments, error) { return arguments{ args[""].([]string), + args["--document-root"].(string), int(c), time.Duration(t) * time.Second, args["--recursive"].(bool), diff --git a/file_checker.go b/file_checker.go index c2a1720..bcd9cea 100644 --- a/file_checker.go +++ b/file_checker.go @@ -2,8 +2,10 @@ package main import ( "bytes" + "errors" "io/ioutil" "net/url" + "path" "strings" "sync" "time" @@ -13,11 +15,12 @@ import ( ) type fileChecker struct { - urlChecker urlChecker + urlChecker urlChecker + documentRoot string } -func newFileChecker(timeout time.Duration, s semaphore) fileChecker { - return fileChecker{newURLChecker(timeout, s)} +func newFileChecker(timeout time.Duration, r string, s semaphore) fileChecker { + return fileChecker{newURLChecker(timeout, s), r} } func (c fileChecker) Check(f string) ([]urlResult, error) { @@ -27,7 +30,12 @@ func (c fileChecker) Check(f string) ([]urlResult, error) { return nil, err } - us := extractURLs(n) + us, err := c.extractURLs(n) + + if err != nil { + return nil, err + } + rc := make(chan urlResult, len(us)) rs := make([]urlResult, 0, len(us)) @@ -81,11 +89,23 @@ func parseFile(f string) (*html.Node, error) { return n, nil } -func extractURLs(n *html.Node) []string { +func (c fileChecker) extractURLs(n *html.Node) ([]string, error) { us := make(map[string]bool) ns := make([]*html.Node, 0, 1024) ns = append(ns, n) + addURL := func(u string) error { + u, err := c.resolveURL(u) + + if err != nil { + return err + } + + us[u] = true + + return nil + } + for len(ns) > 0 { i := len(ns) - 1 n := ns[i] @@ -96,14 +116,20 @@ func extractURLs(n *html.Node) []string { case "a": for _, a := range n.Attr { if a.Key == "href" && isURL(a.Val) { - us[a.Val] = true + if err := addURL(a.Val); err != nil { + return nil, err + } + break } } case "img": for _, a := range n.Attr { if a.Key == "src" && isURL(a.Val) { - us[a.Val] = true + if err := addURL(a.Val); err != nil { + return nil, err + } + break } } @@ -115,7 +141,19 @@ func extractURLs(n *html.Node) []string { } } - return stringSetToSlice(us) + return stringSetToSlice(us), nil +} + +func (c fileChecker) resolveURL(u string) (string, error) { + abs := strings.HasPrefix(u, "/") + + if abs && c.documentRoot != "" { + return path.Join(c.documentRoot, u), nil + } else if abs { + return "", errors.New("document root directory is not specified") + } + + return u, nil } func isURL(s string) bool { diff --git a/file_checker_test.go b/file_checker_test.go index 13d8ed7..bad2d25 100644 --- a/file_checker_test.go +++ b/file_checker_test.go @@ -9,35 +9,41 @@ import ( "golang.org/x/net/html" ) -func TestExtractURLs(t *testing.T) { - for _, c := range []struct { +func TestFileCheckerExtractURLs(t *testing.T) { + c := newFileChecker(0, "", newSemaphore(42)) + + for _, x := range []struct { html string numURLs int }{ {`Google`, 1}, { ` -
- Google - Google -
+
+ Google + Google +
`, 1, }, { ` -
- Google - Yahoo! -
+
+ Google + Yahoo! +
`, 2, }, } { - n, err := html.Parse(strings.NewReader(c.html)) + n, err := html.Parse(strings.NewReader(x.html)) + + assert.Equal(t, nil, err) + + us, err := c.extractURLs(n) assert.Equal(t, nil, err) - assert.Equal(t, c.numURLs, len(extractURLs(n))) + assert.Equal(t, x.numURLs, len(us)) } } diff --git a/main.go b/main.go index 7bf8806..ef29805 100644 --- a/main.go +++ b/main.go @@ -16,7 +16,7 @@ func main() { rc := make(chan fileResult, 1024) s := newSemaphore(args.concurrency) - c := newFileChecker(args.timeout, s) + c := newFileChecker(args.timeout, args.documentRoot, s) go c.CheckMany(fc, rc)