diff --git a/README.md b/README.md index 74c1e03..6f0dfc7 100644 --- a/README.md +++ b/README.md @@ -23,13 +23,14 @@ go get -u github.com/raviqqe/liche Link checker for Markdown and HTML Usage: - liche [-c ] [-d ] [-r] [-t ] [-v] ... + liche [-c ] [-d ] [-r] [-t ] [-x ] [-v] ... Options: -c, --concurrency Set max number of concurrent HTTP requests. [default: 512] -d, --document-root Set document root directory for absolute paths. -r, --recursive Search Markdown and HTML files recursively -t, --timeout Set timeout for HTTP requests in seconds. Disabled by default. + -x, --exclude Regex of links to exclude from checking. -v, --verbose Be verbose. ``` diff --git a/arguments.go b/arguments.go index dd945de..735278f 100644 --- a/arguments.go +++ b/arguments.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "regexp" "strconv" "time" @@ -13,13 +14,14 @@ const defaultConcurrency = maxOpenFiles / 2 const usage = `Link checker for Markdown and HTML Usage: - liche [-c ] [-d ] [-r] [-t ] [-v] ... + liche [-c ] [-d ] [-r] [-t ] [-x ] [-v] ... Options: -c, --concurrency Set max number of concurrent HTTP requests. [default: %v] -d, --document-root Set document root directory for absolute paths. -r, --recursive Search Markdown and HTML files recursively -t, --timeout Set timeout for HTTP requests in seconds. Disabled by default. + -x, --exclude Regex of links to exclude from checking. -v, --verbose Be verbose.` type arguments struct { @@ -28,6 +30,7 @@ type arguments struct { concurrency int timeout time.Duration recursive bool + exclude *regexp.Regexp verbose bool } @@ -58,12 +61,21 @@ func getArguments(argv []string) (arguments, error) { } } + var exclude *regexp.Regexp + if args["--exclude"] != nil { + exclude, err = regexp.Compile(args["--exclude"].(string)) + if err != nil { + return arguments{}, err + } + } + return arguments{ args[""].([]string), args["--document-root"].(string), int(c), time.Duration(t) * time.Second, args["--recursive"].(bool), + exclude, args["--verbose"].(bool), }, nil } diff --git a/arguments_test.go b/arguments_test.go index eae6f38..76c4098 100644 --- a/arguments_test.go +++ b/arguments_test.go @@ -1,6 +1,7 @@ package main import ( + "regexp" "testing" "time" @@ -14,47 +15,55 @@ func TestGetArguments(t *testing.T) { }{ { argv: []string{"file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, false}, }, { argv: []string{"-c", "42", "file"}, - args: arguments{[]string{"file"}, "", 42, 0, false, false}, + args: arguments{[]string{"file"}, "", 42, 0, false, nil, false}, }, { argv: []string{"--concurrency", "42", "file"}, - args: arguments{[]string{"file"}, "", 42, 0, false, false}, + args: arguments{[]string{"file"}, "", 42, 0, false, nil, false}, }, { argv: []string{"-d", "directory", "file"}, - args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, false}, + args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, nil, false}, }, { argv: []string{"--document-root", "directory", "file"}, - args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, false}, + args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, nil, false}, }, { argv: []string{"-r", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, nil, false}, }, { argv: []string{"--recursive", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, nil, false}, }, { argv: []string{"-t", "42", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, nil, false}, }, { argv: []string{"--timeout", "42", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, nil, false}, + }, + { + argv: []string{"-x", "^.*$", "file"}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, regexp.MustCompile(`^.*$`), false}, + }, + { + argv: []string{"--exclude", "^.*$", "file"}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, regexp.MustCompile(`^.*$`), false}, }, { argv: []string{"-v", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, true}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, true}, }, { argv: []string{"--verbose", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, true}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, true}, }, } { args, err := getArguments(c.argv) diff --git a/file_checker.go b/file_checker.go index cb605a8..7c456be 100644 --- a/file_checker.go +++ b/file_checker.go @@ -4,6 +4,7 @@ import ( "bytes" "io/ioutil" "net/url" + "regexp" "strings" "sync" "time" @@ -17,8 +18,8 @@ type fileChecker struct { semaphore semaphore } -func newFileChecker(timeout time.Duration, d string, s semaphore) fileChecker { - return fileChecker{newURLChecker(timeout, d, s), s} +func newFileChecker(timeout time.Duration, d string, x *regexp.Regexp, s semaphore) fileChecker { + return fileChecker{newURLChecker(timeout, d, x, s), s} } func (c fileChecker) Check(f string) ([]urlResult, error) { diff --git a/file_checker_test.go b/file_checker_test.go index f2c54e7..06a70c5 100644 --- a/file_checker_test.go +++ b/file_checker_test.go @@ -10,7 +10,7 @@ import ( ) func TestFileCheckerCheck(t *testing.T) { - c := newFileChecker(0, "", newSemaphore(1024)) + c := newFileChecker(0, "", nil, newSemaphore(1024)) for _, f := range []string{"README.md", "test/foo.md", "test/foo.html"} { rs, err := c.Check(f) @@ -48,7 +48,7 @@ func TestFileCheckerCheck(t *testing.T) { } func TestFileCheckerCheckMany(t *testing.T) { - c := newFileChecker(0, "", newSemaphore(maxOpenFiles)) + c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles)) for _, fs := range [][]string{ {"README.md"}, @@ -77,7 +77,7 @@ func TestFileCheckerCheckMany(t *testing.T) { } func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) { - c := newFileChecker(0, "", newSemaphore(maxOpenFiles)) + c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles)) for _, fs := range [][]string{ {"test/absolute_path.md"}, @@ -107,7 +107,7 @@ func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) { } func TestFileCheckerExtractURLs(t *testing.T) { - c := newFileChecker(0, "", newSemaphore(42)) + c := newFileChecker(0, "", nil, newSemaphore(42)) for _, x := range []struct { html string diff --git a/main.go b/main.go index d4154b4..a2f8d4b 100644 --- a/main.go +++ b/main.go @@ -30,7 +30,7 @@ func main() { }() rc := make(chan fileResult, maxOpenFiles) - c := newFileChecker(args.timeout, args.documentRoot, newSemaphore(args.concurrency)) + c := newFileChecker(args.timeout, args.documentRoot, args.exclude, newSemaphore(args.concurrency)) go c.CheckMany(m.Filenames(), rc) diff --git a/url_checker.go b/url_checker.go index c570975..5e4713a 100644 --- a/url_checker.go +++ b/url_checker.go @@ -5,6 +5,7 @@ import ( "net/url" "os" "path" + "regexp" "sync" "time" @@ -14,20 +15,24 @@ import ( type urlChecker struct { timeout time.Duration documentRoot string + exclude *regexp.Regexp semaphore semaphore } -func newURLChecker(t time.Duration, d string, s semaphore) urlChecker { - return urlChecker{t, d, s} +func newURLChecker(t time.Duration, d string, x *regexp.Regexp, s semaphore) urlChecker { + return urlChecker{t, d, x, s} } func (c urlChecker) Check(u string, f string) error { u, local, err := c.resolveURL(u, f) - if err != nil { return err } + if c.exclude != nil && c.exclude.MatchString(u) { + return nil + } + if local { _, err := os.Stat(u) return err diff --git a/url_checker_test.go b/url_checker_test.go index 03b2d89..435976b 100644 --- a/url_checker_test.go +++ b/url_checker_test.go @@ -1,6 +1,7 @@ package main import ( + "regexp" "testing" "time" @@ -8,7 +9,7 @@ import ( ) func TestURLCheckerCheck(t *testing.T) { - c := newURLChecker(0, "", newSemaphore(1024)) + c := newURLChecker(0, "", nil, newSemaphore(1024)) for _, u := range []string{"https://google.com", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -19,8 +20,20 @@ func TestURLCheckerCheck(t *testing.T) { } } +func TestURLCheckerCheckWithExclude(t *testing.T) { + c := newURLChecker(0, "", regexp.MustCompile(`^http:\/\/localhost:[13]$`), newSemaphore(1024)) + + for _, u := range []string{"http://localhost:1", "http://localhost:3", "README.md"} { + assert.Equal(t, nil, c.Check(u, "README.md")) + } + + for _, u := range []string{"http://localhost:2", "READYOU.md"} { + assert.NotEqual(t, nil, c.Check(u, "README.md")) + } +} + func TestURLCheckerCheckWithTimeout(t *testing.T) { - c := newURLChecker(30*time.Second, "", newSemaphore(1024)) + c := newURLChecker(30*time.Second, "", nil, newSemaphore(1024)) for _, u := range []string{"https://google.com", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -32,7 +45,7 @@ func TestURLCheckerCheckWithTimeout(t *testing.T) { } func TestURLCheckerCheckMany(t *testing.T) { - c := newURLChecker(0, "", newSemaphore(1024)) + c := newURLChecker(0, "", nil, newSemaphore(1024)) for _, us := range [][]string{{}, {"https://google.com", "README.md"}} { rc := make(chan urlResult, 1024) @@ -45,7 +58,7 @@ func TestURLCheckerCheckMany(t *testing.T) { } } func TestURLCheckerResolveURL(t *testing.T) { - f := newURLChecker(0, "", newSemaphore(1024)) + f := newURLChecker(0, "", nil, newSemaphore(1024)) for _, c := range []struct { source, target string @@ -63,7 +76,7 @@ func TestURLCheckerResolveURL(t *testing.T) { } func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) { - f := newURLChecker(0, "", newSemaphore(1024)) + f := newURLChecker(0, "", nil, newSemaphore(1024)) u, _, err := f.resolveURL("/foo", "foo.md") @@ -72,7 +85,7 @@ func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) { } func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) { - f := newURLChecker(0, "foo", newSemaphore(1024)) + f := newURLChecker(0, "foo", nil, newSemaphore(1024)) for _, c := range []struct { source, target string