Browse Source

Implement --exclude <regex> argument

So that you can pass a regex of links (paths or URLs) to exclude from
checking. My use case for this is legitimate links in documentation
that may not function at the time of checking, for example:

- development servers
- private repos

I've implemented this all the way down in `urlChecker`, rather than
`fileChecker`, so that we can compare the fully resolved path/URL.

I would have found some of the codebase easier to read if longer
variable names had been used and struct fields specified, but I've
matched the house-style for now.
renovate/configure
Dan Carley 7 years ago
parent
commit
1f08c70009
  1. 3
      README.md
  2. 14
      arguments.go
  3. 31
      arguments_test.go
  4. 5
      file_checker.go
  5. 8
      file_checker_test.go
  6. 2
      main.go
  7. 11
      url_checker.go
  8. 25
      url_checker_test.go

3
README.md

@ -23,13 +23,14 @@ go get -u github.com/raviqqe/liche
Link checker for Markdown and HTML Link checker for Markdown and HTML
Usage: Usage:
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-v] <filenames>... liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-x <regex>] [-v] <filenames>...
Options: Options:
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: 512] -c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: 512]
-d, --document-root <directory> Set document root directory for absolute paths. -d, --document-root <directory> Set document root directory for absolute paths.
-r, --recursive Search Markdown and HTML files recursively -r, --recursive Search Markdown and HTML files recursively
-t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default. -t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default.
-x, --exclude <regex> Regex of links to exclude from checking.
-v, --verbose Be verbose. -v, --verbose Be verbose.
``` ```

14
arguments.go

@ -2,6 +2,7 @@ package main
import ( import (
"fmt" "fmt"
"regexp"
"strconv" "strconv"
"time" "time"
@ -13,13 +14,14 @@ const defaultConcurrency = maxOpenFiles / 2
const usage = `Link checker for Markdown and HTML const usage = `Link checker for Markdown and HTML
Usage: Usage:
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-v] <filenames>... liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-x <regex>] [-v] <filenames>...
Options: Options:
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: %v] -c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: %v]
-d, --document-root <directory> Set document root directory for absolute paths. -d, --document-root <directory> Set document root directory for absolute paths.
-r, --recursive Search Markdown and HTML files recursively -r, --recursive Search Markdown and HTML files recursively
-t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default. -t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default.
-x, --exclude <regex> Regex of links to exclude from checking.
-v, --verbose Be verbose.` -v, --verbose Be verbose.`
type arguments struct { type arguments struct {
@ -28,6 +30,7 @@ type arguments struct {
concurrency int concurrency int
timeout time.Duration timeout time.Duration
recursive bool recursive bool
exclude *regexp.Regexp
verbose bool verbose bool
} }
@ -58,12 +61,21 @@ func getArguments(argv []string) (arguments, error) {
} }
} }
var exclude *regexp.Regexp
if args["--exclude"] != nil {
exclude, err = regexp.Compile(args["--exclude"].(string))
if err != nil {
return arguments{}, err
}
}
return arguments{ return arguments{
args["<filenames>"].([]string), args["<filenames>"].([]string),
args["--document-root"].(string), args["--document-root"].(string),
int(c), int(c),
time.Duration(t) * time.Second, time.Duration(t) * time.Second,
args["--recursive"].(bool), args["--recursive"].(bool),
exclude,
args["--verbose"].(bool), args["--verbose"].(bool),
}, nil }, nil
} }

31
arguments_test.go

@ -1,6 +1,7 @@
package main package main
import ( import (
"regexp"
"testing" "testing"
"time" "time"
@ -14,47 +15,55 @@ func TestGetArguments(t *testing.T) {
}{ }{
{ {
argv: []string{"file"}, argv: []string{"file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, false}, args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, false},
}, },
{ {
argv: []string{"-c", "42", "file"}, argv: []string{"-c", "42", "file"},
args: arguments{[]string{"file"}, "", 42, 0, false, false}, args: arguments{[]string{"file"}, "", 42, 0, false, nil, false},
}, },
{ {
argv: []string{"--concurrency", "42", "file"}, argv: []string{"--concurrency", "42", "file"},
args: arguments{[]string{"file"}, "", 42, 0, false, false}, args: arguments{[]string{"file"}, "", 42, 0, false, nil, false},
}, },
{ {
argv: []string{"-d", "directory", "file"}, argv: []string{"-d", "directory", "file"},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, false}, args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, nil, false},
}, },
{ {
argv: []string{"--document-root", "directory", "file"}, argv: []string{"--document-root", "directory", "file"},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, false}, args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, nil, false},
}, },
{ {
argv: []string{"-r", "file"}, argv: []string{"-r", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, false}, args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, nil, false},
}, },
{ {
argv: []string{"--recursive", "file"}, argv: []string{"--recursive", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, false}, args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, nil, false},
}, },
{ {
argv: []string{"-t", "42", "file"}, argv: []string{"-t", "42", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, false}, args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, nil, false},
}, },
{ {
argv: []string{"--timeout", "42", "file"}, argv: []string{"--timeout", "42", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, false}, args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, nil, false},
},
{
argv: []string{"-x", "^.*$", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, regexp.MustCompile(`^.*$`), false},
},
{
argv: []string{"--exclude", "^.*$", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, regexp.MustCompile(`^.*$`), false},
}, },
{ {
argv: []string{"-v", "file"}, argv: []string{"-v", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, true}, args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, true},
}, },
{ {
argv: []string{"--verbose", "file"}, argv: []string{"--verbose", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, true}, args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, true},
}, },
} { } {
args, err := getArguments(c.argv) args, err := getArguments(c.argv)

5
file_checker.go

@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"io/ioutil" "io/ioutil"
"net/url" "net/url"
"regexp"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -17,8 +18,8 @@ type fileChecker struct {
semaphore semaphore semaphore semaphore
} }
func newFileChecker(timeout time.Duration, d string, s semaphore) fileChecker { func newFileChecker(timeout time.Duration, d string, x *regexp.Regexp, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, d, s), s} return fileChecker{newURLChecker(timeout, d, x, s), s}
} }
func (c fileChecker) Check(f string) ([]urlResult, error) { func (c fileChecker) Check(f string) ([]urlResult, error) {

8
file_checker_test.go

@ -10,7 +10,7 @@ import (
) )
func TestFileCheckerCheck(t *testing.T) { func TestFileCheckerCheck(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(1024)) c := newFileChecker(0, "", nil, newSemaphore(1024))
for _, f := range []string{"README.md", "test/foo.md", "test/foo.html"} { for _, f := range []string{"README.md", "test/foo.md", "test/foo.html"} {
rs, err := c.Check(f) rs, err := c.Check(f)
@ -48,7 +48,7 @@ func TestFileCheckerCheck(t *testing.T) {
} }
func TestFileCheckerCheckMany(t *testing.T) { func TestFileCheckerCheckMany(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(maxOpenFiles)) c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles))
for _, fs := range [][]string{ for _, fs := range [][]string{
{"README.md"}, {"README.md"},
@ -77,7 +77,7 @@ func TestFileCheckerCheckMany(t *testing.T) {
} }
func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) { func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(maxOpenFiles)) c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles))
for _, fs := range [][]string{ for _, fs := range [][]string{
{"test/absolute_path.md"}, {"test/absolute_path.md"},
@ -107,7 +107,7 @@ func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) {
} }
func TestFileCheckerExtractURLs(t *testing.T) { func TestFileCheckerExtractURLs(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(42)) c := newFileChecker(0, "", nil, newSemaphore(42))
for _, x := range []struct { for _, x := range []struct {
html string html string

2
main.go

@ -30,7 +30,7 @@ func main() {
}() }()
rc := make(chan fileResult, maxOpenFiles) rc := make(chan fileResult, maxOpenFiles)
c := newFileChecker(args.timeout, args.documentRoot, newSemaphore(args.concurrency)) c := newFileChecker(args.timeout, args.documentRoot, args.exclude, newSemaphore(args.concurrency))
go c.CheckMany(m.Filenames(), rc) go c.CheckMany(m.Filenames(), rc)

11
url_checker.go

@ -5,6 +5,7 @@ import (
"net/url" "net/url"
"os" "os"
"path" "path"
"regexp"
"sync" "sync"
"time" "time"
@ -14,20 +15,24 @@ import (
type urlChecker struct { type urlChecker struct {
timeout time.Duration timeout time.Duration
documentRoot string documentRoot string
exclude *regexp.Regexp
semaphore semaphore semaphore semaphore
} }
func newURLChecker(t time.Duration, d string, s semaphore) urlChecker { func newURLChecker(t time.Duration, d string, x *regexp.Regexp, s semaphore) urlChecker {
return urlChecker{t, d, s} return urlChecker{t, d, x, s}
} }
func (c urlChecker) Check(u string, f string) error { func (c urlChecker) Check(u string, f string) error {
u, local, err := c.resolveURL(u, f) u, local, err := c.resolveURL(u, f)
if err != nil { if err != nil {
return err return err
} }
if c.exclude != nil && c.exclude.MatchString(u) {
return nil
}
if local { if local {
_, err := os.Stat(u) _, err := os.Stat(u)
return err return err

25
url_checker_test.go

@ -1,6 +1,7 @@
package main package main
import ( import (
"regexp"
"testing" "testing"
"time" "time"
@ -8,7 +9,7 @@ import (
) )
func TestURLCheckerCheck(t *testing.T) { func TestURLCheckerCheck(t *testing.T) {
c := newURLChecker(0, "", newSemaphore(1024)) c := newURLChecker(0, "", nil, newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} { for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md")) assert.Equal(t, nil, c.Check(u, "README.md"))
@ -19,8 +20,20 @@ func TestURLCheckerCheck(t *testing.T) {
} }
} }
func TestURLCheckerCheckWithExclude(t *testing.T) {
c := newURLChecker(0, "", regexp.MustCompile(`^http:\/\/localhost:[13]$`), newSemaphore(1024))
for _, u := range []string{"http://localhost:1", "http://localhost:3", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md"))
}
for _, u := range []string{"http://localhost:2", "READYOU.md"} {
assert.NotEqual(t, nil, c.Check(u, "README.md"))
}
}
func TestURLCheckerCheckWithTimeout(t *testing.T) { func TestURLCheckerCheckWithTimeout(t *testing.T) {
c := newURLChecker(30*time.Second, "", newSemaphore(1024)) c := newURLChecker(30*time.Second, "", nil, newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} { for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md")) assert.Equal(t, nil, c.Check(u, "README.md"))
@ -32,7 +45,7 @@ func TestURLCheckerCheckWithTimeout(t *testing.T) {
} }
func TestURLCheckerCheckMany(t *testing.T) { func TestURLCheckerCheckMany(t *testing.T) {
c := newURLChecker(0, "", newSemaphore(1024)) c := newURLChecker(0, "", nil, newSemaphore(1024))
for _, us := range [][]string{{}, {"https://google.com", "README.md"}} { for _, us := range [][]string{{}, {"https://google.com", "README.md"}} {
rc := make(chan urlResult, 1024) rc := make(chan urlResult, 1024)
@ -45,7 +58,7 @@ func TestURLCheckerCheckMany(t *testing.T) {
} }
} }
func TestURLCheckerResolveURL(t *testing.T) { func TestURLCheckerResolveURL(t *testing.T) {
f := newURLChecker(0, "", newSemaphore(1024)) f := newURLChecker(0, "", nil, newSemaphore(1024))
for _, c := range []struct { for _, c := range []struct {
source, target string source, target string
@ -63,7 +76,7 @@ func TestURLCheckerResolveURL(t *testing.T) {
} }
func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) { func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) {
f := newURLChecker(0, "", newSemaphore(1024)) f := newURLChecker(0, "", nil, newSemaphore(1024))
u, _, err := f.resolveURL("/foo", "foo.md") u, _, err := f.resolveURL("/foo", "foo.md")
@ -72,7 +85,7 @@ func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) {
} }
func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) { func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) {
f := newURLChecker(0, "foo", newSemaphore(1024)) f := newURLChecker(0, "foo", nil, newSemaphore(1024))
for _, c := range []struct { for _, c := range []struct {
source, target string source, target string

Loading…
Cancel
Save