diff --git a/arguments.go b/arguments.go index a97bb99..626308a 100644 --- a/arguments.go +++ b/arguments.go @@ -14,12 +14,13 @@ const defaultConcurrency = maxOpenFiles / 2 const usage = `Link checker for Markdown and HTML Usage: - liche [-c ] [-d ] [-r] [-t ] [-x ] [-v] ... + liche [-c ] [-d ] [-r] [-l] [-t ] [-x ] [-v] ... Options: -c, --concurrency Set max number of concurrent HTTP requests. [default: %v] -d, --document-root Set document root directory for absolute paths. -r, --recursive Search Markdown and HTML files recursively + -l, --local-only Validate only local links (file:// and bare links) -t, --timeout Set timeout for HTTP requests in seconds. Disabled by default. -x, --exclude Regex of links to exclude from checking. -v, --verbose Be verbose.` @@ -32,6 +33,7 @@ type arguments struct { excludedPattern *regexp.Regexp recursive bool verbose bool + localOnly bool } func getArguments(argv []string) (arguments, error) { @@ -79,5 +81,6 @@ func getArguments(argv []string) (arguments, error) { r, args["--recursive"].(bool), args["--verbose"].(bool), + args["--local-only"].(bool), }, nil } diff --git a/arguments_test.go b/arguments_test.go index d56fcce..79750c8 100644 --- a/arguments_test.go +++ b/arguments_test.go @@ -15,55 +15,63 @@ func TestGetArguments(t *testing.T) { }{ { argv: []string{"file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false}, }, { argv: []string{"-c", "42", "file"}, - args: arguments{[]string{"file"}, "", 42, 0, nil, false, false}, + args: arguments{[]string{"file"}, "", 42, 0, nil, false, false, false}, }, { argv: []string{"--concurrency", "42", "file"}, - args: arguments{[]string{"file"}, "", 42, 0, nil, false, false}, + args: arguments{[]string{"file"}, "", 42, 0, nil, false, false, false}, }, { argv: []string{"-d", "directory", "file"}, - args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false}, + args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false, false}, }, { argv: []string{"--document-root", "directory", "file"}, - args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false}, + args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false, false}, }, { argv: []string{"-r", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false, false}, }, { argv: []string{"--recursive", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false, false}, }, { argv: []string{"-t", "42", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false, false}, }, { argv: []string{"--timeout", "42", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false, false}, }, { argv: []string{"-x", "^.*$", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false, false}, }, { argv: []string{"--exclude", "^.*$", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false, false}, }, { argv: []string{"-v", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true, false}, }, { argv: []string{"--verbose", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true, false}, + }, + { + argv: []string{"--local-only", "file"}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, true}, + }, + { + argv: []string{"-l", "file"}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, true}, }, } { args, err := getArguments(c.argv) diff --git a/file_checker.go b/file_checker.go index b60db75..935d55e 100644 --- a/file_checker.go +++ b/file_checker.go @@ -18,8 +18,8 @@ type fileChecker struct { semaphore semaphore } -func newFileChecker(timeout time.Duration, d string, r *regexp.Regexp, s semaphore) fileChecker { - return fileChecker{newURLChecker(timeout, d, r, s), s} +func newFileChecker(timeout time.Duration, d string, r *regexp.Regexp, s semaphore, l bool) fileChecker { + return fileChecker{newURLChecker(timeout, d, r, s, l), s} } func (c fileChecker) Check(f string) ([]urlResult, error) { @@ -132,5 +132,5 @@ func isURL(s string) bool { } u, err := url.Parse(s) - return err == nil && (u.Scheme == "" || u.Scheme == "http" || u.Scheme == "https") + return err == nil && (u.Scheme == "" || u.Scheme == "http" || u.Scheme == "https" || u.Scheme == "file") } diff --git a/file_checker_test.go b/file_checker_test.go index 06a70c5..8f5f34e 100644 --- a/file_checker_test.go +++ b/file_checker_test.go @@ -10,7 +10,7 @@ import ( ) func TestFileCheckerCheck(t *testing.T) { - c := newFileChecker(0, "", nil, newSemaphore(1024)) + c := newFileChecker(0, "", nil, newSemaphore(1024), false) for _, f := range []string{"README.md", "test/foo.md", "test/foo.html"} { rs, err := c.Check(f) @@ -47,8 +47,22 @@ func TestFileCheckerCheck(t *testing.T) { } } +func TestFileCheckerLocal(t *testing.T) { + c := newFileChecker(0, "", nil, newSemaphore(1024), true) + + for _, f := range []string{"test/remote.md", "test/remote.html"} { + rs, err := c.Check(f) + + assert.Equal(t, nil, err) + + for _, r := range rs { + assert.Equal(t, errSkipped, r.err) + } + } +} + func TestFileCheckerCheckMany(t *testing.T) { - c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles)) + c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles), false) for _, fs := range [][]string{ {"README.md"}, @@ -77,7 +91,7 @@ func TestFileCheckerCheckMany(t *testing.T) { } func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) { - c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles)) + c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles), false) for _, fs := range [][]string{ {"test/absolute_path.md"}, @@ -107,7 +121,7 @@ func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) { } func TestFileCheckerExtractURLs(t *testing.T) { - c := newFileChecker(0, "", nil, newSemaphore(42)) + c := newFileChecker(0, "", nil, newSemaphore(42), false) for _, x := range []struct { html string @@ -152,11 +166,11 @@ func TestURLParse(t *testing.T) { } func TestIsURL(t *testing.T) { - for _, s := range []string{"http://google.com", "https://google.com", "file-path"} { + for _, s := range []string{"http://google.com", "https://google.com", "file://file-path", "file-path"} { assert.True(t, isURL(s)) } - for _, s := range []string{"ftp://foo.com", "file://file-path", "#foo"} { + for _, s := range []string{"ftp://foo.com", "#foo"} { assert.False(t, isURL(s)) } } diff --git a/file_result.go b/file_result.go index 322db90..b03c262 100644 --- a/file_result.go +++ b/file_result.go @@ -45,7 +45,7 @@ func (r fileResult) Ok() bool { } for _, r := range r.urlResults { - if r.err != nil { + if r.err != nil && r.err != errSkipped { return false } } diff --git a/main.go b/main.go index 920f23f..2a25249 100644 --- a/main.go +++ b/main.go @@ -34,7 +34,8 @@ func main() { args.timeout, args.documentRoot, args.excludedPattern, - newSemaphore(args.concurrency)) + newSemaphore(args.concurrency), + args.localOnly) go c.CheckMany(m.Filenames(), rc) diff --git a/test.md b/test.md new file mode 100644 index 0000000..e783e79 --- /dev/null +++ b/test.md @@ -0,0 +1,15 @@ +# Markdown test file + +## Remote links + +http://should.be/ignored + +https://should.be/ignored + +[ignored](http://should.be/ignored) + +## Local links + +[self](test.md) +[self](./test.md) +[self](file://test.md) diff --git a/test/remote.html b/test/remote.html new file mode 100644 index 0000000..5ebd12b --- /dev/null +++ b/test/remote.html @@ -0,0 +1,14 @@ + + + + + My title + + + +
+ Google +
+ + + diff --git a/test/remote.md b/test/remote.md new file mode 100644 index 0000000..5538d8d --- /dev/null +++ b/test/remote.md @@ -0,0 +1 @@ +[google](https://www.google.com/) diff --git a/url_checker.go b/url_checker.go index 3104ce2..8f40948 100644 --- a/url_checker.go +++ b/url_checker.go @@ -1,6 +1,7 @@ package main import ( + "errors" "fmt" "net/http" "net/url" @@ -19,10 +20,13 @@ type urlChecker struct { documentRoot string excludedPattern *regexp.Regexp semaphore semaphore + localOnly bool } -func newURLChecker(t time.Duration, d string, r *regexp.Regexp, s semaphore) urlChecker { - return urlChecker{t, d, r, s} +var errSkipped = errors.New("skipped as instructed") + +func newURLChecker(t time.Duration, d string, r *regexp.Regexp, s semaphore, l bool) urlChecker { + return urlChecker{t, d, r, s, l} } func (c urlChecker) Check(u string, f string) error { @@ -38,6 +42,8 @@ func (c urlChecker) Check(u string, f string) error { if local { _, err := os.Stat(u) return err + } else if c.localOnly { + return errSkipped } c.semaphore.Request() @@ -83,7 +89,7 @@ func (c urlChecker) resolveURL(u string, f string) (string, bool, error) { return "", false, err } - if uu.Scheme != "" { + if uu.Scheme != "" && uu.Scheme != "file" { return u, false, nil } diff --git a/url_checker_test.go b/url_checker_test.go index 5d53912..cc3e861 100644 --- a/url_checker_test.go +++ b/url_checker_test.go @@ -9,7 +9,7 @@ import ( ) func TestURLCheckerCheck(t *testing.T) { - c := newURLChecker(0, "", nil, newSemaphore(1024)) + c := newURLChecker(0, "", nil, newSemaphore(1024), false) for _, u := range []string{"https://google.com", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -23,7 +23,7 @@ func TestURLCheckerCheck(t *testing.T) { } func TestURLCheckerCheckWithExclude(t *testing.T) { - c := newURLChecker(0, "", regexp.MustCompile(`^http:\/\/localhost:[13]$`), newSemaphore(1024)) + c := newURLChecker(0, "", regexp.MustCompile(`^http:\/\/localhost:[13]$`), newSemaphore(1024), false) for _, u := range []string{"http://localhost:1", "http://localhost:3", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -34,8 +34,20 @@ func TestURLCheckerCheckWithExclude(t *testing.T) { } } +func TestURLCheckerCheckLocal(t *testing.T) { + c := newURLChecker(0, "", nil, newSemaphore(1024), true) + + for _, u := range []string{"https://www.google.com"} { + assert.Equal(t, errSkipped, c.Check(u, "README.md")) + } + + for _, u := range []string{"README.md"} { + assert.Equal(t, nil, c.Check(u, "README.md")) + } +} + func TestURLCheckerCheckWithTimeout(t *testing.T) { - c := newURLChecker(30*time.Second, "", nil, newSemaphore(1024)) + c := newURLChecker(30*time.Second, "", nil, newSemaphore(1024), false) for _, u := range []string{"https://google.com", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -47,7 +59,7 @@ func TestURLCheckerCheckWithTimeout(t *testing.T) { } func TestURLCheckerCheckMany(t *testing.T) { - c := newURLChecker(0, "", nil, newSemaphore(1024)) + c := newURLChecker(0, "", nil, newSemaphore(1024), false) for _, us := range [][]string{{}, {"https://google.com", "README.md"}} { rc := make(chan urlResult, 1024) @@ -60,7 +72,7 @@ func TestURLCheckerCheckMany(t *testing.T) { } } func TestURLCheckerResolveURL(t *testing.T) { - f := newURLChecker(0, "", nil, newSemaphore(1024)) + f := newURLChecker(0, "", nil, newSemaphore(1024), false) for _, c := range []struct { source, target string @@ -78,7 +90,7 @@ func TestURLCheckerResolveURL(t *testing.T) { } func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) { - f := newURLChecker(0, "", nil, newSemaphore(1024)) + f := newURLChecker(0, "", nil, newSemaphore(1024), false) u, _, err := f.resolveURL("/foo", "foo.md") @@ -87,7 +99,7 @@ func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) { } func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) { - f := newURLChecker(0, "foo", nil, newSemaphore(1024)) + f := newURLChecker(0, "foo", nil, newSemaphore(1024), false) for _, c := range []struct { source, target string diff --git a/url_result.go b/url_result.go index f7ded01..c9c2217 100644 --- a/url_result.go +++ b/url_result.go @@ -16,6 +16,10 @@ func (r urlResult) String() string { return color.GreenString("OK") + "\t" + r.url } + if r.err == errSkipped { + return color.YellowString("SKIPPED") + "\t" + r.url + } + s := r.err.Error() return color.RedString("ERROR") + "\t" + r.url + "\n\t" +