Browse Source

Add document root option

renovate/configure
Yota Toyama 8 years ago
parent
commit
e37030653f
  1. 19
      arguments.go
  2. 54
      file_checker.go
  3. 30
      file_checker_test.go
  4. 2
      main.go

19
arguments.go

@ -23,20 +23,22 @@ var defaultConcurrency = func() int {
const usage = `Link checker for Markdown and HTML
Usage:
liche [-c <num-requests>] [-r] [-t <timeout>] [-v] <filenames>...
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-v] <filenames>...
Options:
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: %v]
-d, --document-root <directory> Set document root directory for absolute paths.
-r, --recursive Search Markdown and HTML files recursively
-t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default.
-v, --verbose Be verbose.`
type arguments struct {
filenames []string
concurrency int
timeout time.Duration
recursive bool
verbose bool
filenames []string
documentRoot string
concurrency int
timeout time.Duration
recursive bool
verbose bool
}
func getArgs() (arguments, error) {
@ -54,6 +56,10 @@ func getArgs() (arguments, error) {
t := 0.0
if args["--document-root"] == nil {
args["--document-root"] = ""
}
if args["--timeout"] != nil {
t, err = strconv.ParseFloat(args["--timeout"].(string), 64)
@ -64,6 +70,7 @@ func getArgs() (arguments, error) {
return arguments{
args["<filenames>"].([]string),
args["--document-root"].(string),
int(c),
time.Duration(t) * time.Second,
args["--recursive"].(bool),

54
file_checker.go

@ -2,8 +2,10 @@ package main
import (
"bytes"
"errors"
"io/ioutil"
"net/url"
"path"
"strings"
"sync"
"time"
@ -13,11 +15,12 @@ import (
)
type fileChecker struct {
urlChecker urlChecker
urlChecker urlChecker
documentRoot string
}
func newFileChecker(timeout time.Duration, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, s)}
func newFileChecker(timeout time.Duration, r string, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, s), r}
}
func (c fileChecker) Check(f string) ([]urlResult, error) {
@ -27,7 +30,12 @@ func (c fileChecker) Check(f string) ([]urlResult, error) {
return nil, err
}
us := extractURLs(n)
us, err := c.extractURLs(n)
if err != nil {
return nil, err
}
rc := make(chan urlResult, len(us))
rs := make([]urlResult, 0, len(us))
@ -81,11 +89,23 @@ func parseFile(f string) (*html.Node, error) {
return n, nil
}
func extractURLs(n *html.Node) []string {
func (c fileChecker) extractURLs(n *html.Node) ([]string, error) {
us := make(map[string]bool)
ns := make([]*html.Node, 0, 1024)
ns = append(ns, n)
addURL := func(u string) error {
u, err := c.resolveURL(u)
if err != nil {
return err
}
us[u] = true
return nil
}
for len(ns) > 0 {
i := len(ns) - 1
n := ns[i]
@ -96,14 +116,20 @@ func extractURLs(n *html.Node) []string {
case "a":
for _, a := range n.Attr {
if a.Key == "href" && isURL(a.Val) {
us[a.Val] = true
if err := addURL(a.Val); err != nil {
return nil, err
}
break
}
}
case "img":
for _, a := range n.Attr {
if a.Key == "src" && isURL(a.Val) {
us[a.Val] = true
if err := addURL(a.Val); err != nil {
return nil, err
}
break
}
}
@ -115,7 +141,19 @@ func extractURLs(n *html.Node) []string {
}
}
return stringSetToSlice(us)
return stringSetToSlice(us), nil
}
func (c fileChecker) resolveURL(u string) (string, error) {
abs := strings.HasPrefix(u, "/")
if abs && c.documentRoot != "" {
return path.Join(c.documentRoot, u), nil
} else if abs {
return "", errors.New("document root directory is not specified")
}
return u, nil
}
func isURL(s string) bool {

30
file_checker_test.go

@ -9,35 +9,41 @@ import (
"golang.org/x/net/html"
)
func TestExtractURLs(t *testing.T) {
for _, c := range []struct {
func TestFileCheckerExtractURLs(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(42))
for _, x := range []struct {
html string
numURLs int
}{
{`<a href="https://google.com">Google</a>`, 1},
{
`
<div>
<a href="https://google.com">Google</a>
<a href="https://google.com">Google</a>
</div>
<div>
<a href="https://google.com">Google</a>
<a href="https://google.com">Google</a>
</div>
`,
1,
},
{
`
<div>
<a href="https://google.com">Google</a>
<a href="https://yahoo.com">Yahoo!</a>
</div>
<div>
<a href="https://google.com">Google</a>
<a href="https://yahoo.com">Yahoo!</a>
</div>
`,
2,
},
} {
n, err := html.Parse(strings.NewReader(c.html))
n, err := html.Parse(strings.NewReader(x.html))
assert.Equal(t, nil, err)
us, err := c.extractURLs(n)
assert.Equal(t, nil, err)
assert.Equal(t, c.numURLs, len(extractURLs(n)))
assert.Equal(t, x.numURLs, len(us))
}
}

2
main.go

@ -16,7 +16,7 @@ func main() {
rc := make(chan fileResult, 1024)
s := newSemaphore(args.concurrency)
c := newFileChecker(args.timeout, s)
c := newFileChecker(args.timeout, args.documentRoot, s)
go c.CheckMany(fc, rc)

Loading…
Cancel
Save