Browse Source

Add document root option

renovate/configure
Yota Toyama 8 years ago
parent
commit
e37030653f
  1. 9
      arguments.go
  2. 52
      file_checker.go
  3. 14
      file_checker_test.go
  4. 2
      main.go

9
arguments.go

@ -23,16 +23,18 @@ var defaultConcurrency = func() int {
const usage = `Link checker for Markdown and HTML const usage = `Link checker for Markdown and HTML
Usage: Usage:
liche [-c <num-requests>] [-r] [-t <timeout>] [-v] <filenames>... liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-v] <filenames>...
Options: Options:
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: %v] -c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: %v]
-d, --document-root <directory> Set document root directory for absolute paths.
-r, --recursive Search Markdown and HTML files recursively -r, --recursive Search Markdown and HTML files recursively
-t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default. -t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default.
-v, --verbose Be verbose.` -v, --verbose Be verbose.`
type arguments struct { type arguments struct {
filenames []string filenames []string
documentRoot string
concurrency int concurrency int
timeout time.Duration timeout time.Duration
recursive bool recursive bool
@ -54,6 +56,10 @@ func getArgs() (arguments, error) {
t := 0.0 t := 0.0
if args["--document-root"] == nil {
args["--document-root"] = ""
}
if args["--timeout"] != nil { if args["--timeout"] != nil {
t, err = strconv.ParseFloat(args["--timeout"].(string), 64) t, err = strconv.ParseFloat(args["--timeout"].(string), 64)
@ -64,6 +70,7 @@ func getArgs() (arguments, error) {
return arguments{ return arguments{
args["<filenames>"].([]string), args["<filenames>"].([]string),
args["--document-root"].(string),
int(c), int(c),
time.Duration(t) * time.Second, time.Duration(t) * time.Second,
args["--recursive"].(bool), args["--recursive"].(bool),

52
file_checker.go

@ -2,8 +2,10 @@ package main
import ( import (
"bytes" "bytes"
"errors"
"io/ioutil" "io/ioutil"
"net/url" "net/url"
"path"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -14,10 +16,11 @@ import (
type fileChecker struct { type fileChecker struct {
urlChecker urlChecker urlChecker urlChecker
documentRoot string
} }
func newFileChecker(timeout time.Duration, s semaphore) fileChecker { func newFileChecker(timeout time.Duration, r string, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, s)} return fileChecker{newURLChecker(timeout, s), r}
} }
func (c fileChecker) Check(f string) ([]urlResult, error) { func (c fileChecker) Check(f string) ([]urlResult, error) {
@ -27,7 +30,12 @@ func (c fileChecker) Check(f string) ([]urlResult, error) {
return nil, err return nil, err
} }
us := extractURLs(n) us, err := c.extractURLs(n)
if err != nil {
return nil, err
}
rc := make(chan urlResult, len(us)) rc := make(chan urlResult, len(us))
rs := make([]urlResult, 0, len(us)) rs := make([]urlResult, 0, len(us))
@ -81,11 +89,23 @@ func parseFile(f string) (*html.Node, error) {
return n, nil return n, nil
} }
func extractURLs(n *html.Node) []string { func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
us := make(map[string]bool) us := make(map[string]bool)
ns := make([]*html.Node, 0, 1024) ns := make([]*html.Node, 0, 1024)
ns = append(ns, n) ns = append(ns, n)
addURL := func(u string) error {
u, err := c.resolveURL(u)
if err != nil {
return err
}
us[u] = true
return nil
}
for len(ns) > 0 { for len(ns) > 0 {
i := len(ns) - 1 i := len(ns) - 1
n := ns[i] n := ns[i]
@ -96,14 +116,20 @@ func extractURLs(n *html.Node) []string {
case "a": case "a":
for _, a := range n.Attr { for _, a := range n.Attr {
if a.Key == "href" && isURL(a.Val) { if a.Key == "href" && isURL(a.Val) {
us[a.Val] = true if err := addURL(a.Val); err != nil {
return nil, err
}
break break
} }
} }
case "img": case "img":
for _, a := range n.Attr { for _, a := range n.Attr {
if a.Key == "src" && isURL(a.Val) { if a.Key == "src" && isURL(a.Val) {
us[a.Val] = true if err := addURL(a.Val); err != nil {
return nil, err
}
break break
} }
} }
@ -115,7 +141,19 @@ func extractURLs(n *html.Node) []string {
} }
} }
return stringSetToSlice(us) return stringSetToSlice(us), nil
}
func (c fileChecker) resolveURL(u string) (string, error) {
abs := strings.HasPrefix(u, "/")
if abs && c.documentRoot != "" {
return path.Join(c.documentRoot, u), nil
} else if abs {
return "", errors.New("document root directory is not specified")
}
return u, nil
} }
func isURL(s string) bool { func isURL(s string) bool {

14
file_checker_test.go

@ -9,8 +9,10 @@ import (
"golang.org/x/net/html" "golang.org/x/net/html"
) )
func TestExtractURLs(t *testing.T) { func TestFileCheckerExtractURLs(t *testing.T) {
for _, c := range []struct { c := newFileChecker(0, "", newSemaphore(42))
for _, x := range []struct {
html string html string
numURLs int numURLs int
}{ }{
@ -34,10 +36,14 @@ func TestExtractURLs(t *testing.T) {
2, 2,
}, },
} { } {
n, err := html.Parse(strings.NewReader(c.html)) n, err := html.Parse(strings.NewReader(x.html))
assert.Equal(t, nil, err)
us, err := c.extractURLs(n)
assert.Equal(t, nil, err) assert.Equal(t, nil, err)
assert.Equal(t, c.numURLs, len(extractURLs(n))) assert.Equal(t, x.numURLs, len(us))
} }
} }

2
main.go

@ -16,7 +16,7 @@ func main() {
rc := make(chan fileResult, 1024) rc := make(chan fileResult, 1024)
s := newSemaphore(args.concurrency) s := newSemaphore(args.concurrency)
c := newFileChecker(args.timeout, s) c := newFileChecker(args.timeout, args.documentRoot, s)
go c.CheckMany(fc, rc) go c.CheckMany(fc, rc)

Loading…
Cancel
Save