Browse Source

Implement --exclude <regex> argument

So that you can pass a regex of links (paths or URLs) to exclude from
checking. My use case for this is legitimate links in documentation
that may not function at the time of checking, for example:

- development servers
- private repos

I've implemented this all the way down in `urlChecker`, rather than
`fileChecker`, so that we can compare the fully resolved path/URL.

I would have found some of the codebase easier to read if longer
variable names had been used and struct fields specified, but I've
matched the house-style for now.
renovate/configure
Dan Carley 7 years ago
parent
commit
1f08c70009
  1. 3
      README.md
  2. 14
      arguments.go
  3. 31
      arguments_test.go
  4. 5
      file_checker.go
  5. 8
      file_checker_test.go
  6. 2
      main.go
  7. 11
      url_checker.go
  8. 25
      url_checker_test.go

3
README.md

@ -23,13 +23,14 @@ go get -u github.com/raviqqe/liche
Link checker for Markdown and HTML
Usage:
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-v] <filenames>...
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-x <regex>] [-v] <filenames>...
Options:
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: 512]
-d, --document-root <directory> Set document root directory for absolute paths.
-r, --recursive Search Markdown and HTML files recursively
-t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default.
-x, --exclude <regex> Regex of links to exclude from checking.
-v, --verbose Be verbose.
```

14
arguments.go

@ -2,6 +2,7 @@ package main
import (
"fmt"
"regexp"
"strconv"
"time"
@ -13,13 +14,14 @@ const defaultConcurrency = maxOpenFiles / 2
const usage = `Link checker for Markdown and HTML
Usage:
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-v] <filenames>...
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-x <regex>] [-v] <filenames>...
Options:
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: %v]
-d, --document-root <directory> Set document root directory for absolute paths.
-r, --recursive Search Markdown and HTML files recursively
-t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default.
-x, --exclude <regex> Regex of links to exclude from checking.
-v, --verbose Be verbose.`
type arguments struct {
@ -28,6 +30,7 @@ type arguments struct {
concurrency int
timeout time.Duration
recursive bool
exclude *regexp.Regexp
verbose bool
}
@ -58,12 +61,21 @@ func getArguments(argv []string) (arguments, error) {
}
}
var exclude *regexp.Regexp
if args["--exclude"] != nil {
exclude, err = regexp.Compile(args["--exclude"].(string))
if err != nil {
return arguments{}, err
}
}
return arguments{
args["<filenames>"].([]string),
args["--document-root"].(string),
int(c),
time.Duration(t) * time.Second,
args["--recursive"].(bool),
exclude,
args["--verbose"].(bool),
}, nil
}

31
arguments_test.go

@ -1,6 +1,7 @@
package main
import (
"regexp"
"testing"
"time"
@ -14,47 +15,55 @@ func TestGetArguments(t *testing.T) {
}{
{
argv: []string{"file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, false},
},
{
argv: []string{"-c", "42", "file"},
args: arguments{[]string{"file"}, "", 42, 0, false, false},
args: arguments{[]string{"file"}, "", 42, 0, false, nil, false},
},
{
argv: []string{"--concurrency", "42", "file"},
args: arguments{[]string{"file"}, "", 42, 0, false, false},
args: arguments{[]string{"file"}, "", 42, 0, false, nil, false},
},
{
argv: []string{"-d", "directory", "file"},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, false},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, nil, false},
},
{
argv: []string{"--document-root", "directory", "file"},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, false},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, nil, false},
},
{
argv: []string{"-r", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, nil, false},
},
{
argv: []string{"--recursive", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, nil, false},
},
{
argv: []string{"-t", "42", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, nil, false},
},
{
argv: []string{"--timeout", "42", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, nil, false},
},
{
argv: []string{"-x", "^.*$", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, regexp.MustCompile(`^.*$`), false},
},
{
argv: []string{"--exclude", "^.*$", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, regexp.MustCompile(`^.*$`), false},
},
{
argv: []string{"-v", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, true},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, true},
},
{
argv: []string{"--verbose", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, true},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, true},
},
} {
args, err := getArguments(c.argv)

5
file_checker.go

@ -4,6 +4,7 @@ import (
"bytes"
"io/ioutil"
"net/url"
"regexp"
"strings"
"sync"
"time"
@ -17,8 +18,8 @@ type fileChecker struct {
semaphore semaphore
}
func newFileChecker(timeout time.Duration, d string, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, d, s), s}
func newFileChecker(timeout time.Duration, d string, x *regexp.Regexp, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, d, x, s), s}
}
func (c fileChecker) Check(f string) ([]urlResult, error) {

8
file_checker_test.go

@ -10,7 +10,7 @@ import (
)
func TestFileCheckerCheck(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(1024))
c := newFileChecker(0, "", nil, newSemaphore(1024))
for _, f := range []string{"README.md", "test/foo.md", "test/foo.html"} {
rs, err := c.Check(f)
@ -48,7 +48,7 @@ func TestFileCheckerCheck(t *testing.T) {
}
func TestFileCheckerCheckMany(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(maxOpenFiles))
c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles))
for _, fs := range [][]string{
{"README.md"},
@ -77,7 +77,7 @@ func TestFileCheckerCheckMany(t *testing.T) {
}
func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(maxOpenFiles))
c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles))
for _, fs := range [][]string{
{"test/absolute_path.md"},
@ -107,7 +107,7 @@ func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) {
}
func TestFileCheckerExtractURLs(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(42))
c := newFileChecker(0, "", nil, newSemaphore(42))
for _, x := range []struct {
html string

2
main.go

@ -30,7 +30,7 @@ func main() {
}()
rc := make(chan fileResult, maxOpenFiles)
c := newFileChecker(args.timeout, args.documentRoot, newSemaphore(args.concurrency))
c := newFileChecker(args.timeout, args.documentRoot, args.exclude, newSemaphore(args.concurrency))
go c.CheckMany(m.Filenames(), rc)

11
url_checker.go

@ -5,6 +5,7 @@ import (
"net/url"
"os"
"path"
"regexp"
"sync"
"time"
@ -14,20 +15,24 @@ import (
type urlChecker struct {
timeout time.Duration
documentRoot string
exclude *regexp.Regexp
semaphore semaphore
}
func newURLChecker(t time.Duration, d string, s semaphore) urlChecker {
return urlChecker{t, d, s}
func newURLChecker(t time.Duration, d string, x *regexp.Regexp, s semaphore) urlChecker {
return urlChecker{t, d, x, s}
}
func (c urlChecker) Check(u string, f string) error {
u, local, err := c.resolveURL(u, f)
if err != nil {
return err
}
if c.exclude != nil && c.exclude.MatchString(u) {
return nil
}
if local {
_, err := os.Stat(u)
return err

25
url_checker_test.go

@ -1,6 +1,7 @@
package main
import (
"regexp"
"testing"
"time"
@ -8,7 +9,7 @@ import (
)
func TestURLCheckerCheck(t *testing.T) {
c := newURLChecker(0, "", newSemaphore(1024))
c := newURLChecker(0, "", nil, newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md"))
@ -19,8 +20,20 @@ func TestURLCheckerCheck(t *testing.T) {
}
}
func TestURLCheckerCheckWithExclude(t *testing.T) {
c := newURLChecker(0, "", regexp.MustCompile(`^http:\/\/localhost:[13]$`), newSemaphore(1024))
for _, u := range []string{"http://localhost:1", "http://localhost:3", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md"))
}
for _, u := range []string{"http://localhost:2", "READYOU.md"} {
assert.NotEqual(t, nil, c.Check(u, "README.md"))
}
}
func TestURLCheckerCheckWithTimeout(t *testing.T) {
c := newURLChecker(30*time.Second, "", newSemaphore(1024))
c := newURLChecker(30*time.Second, "", nil, newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md"))
@ -32,7 +45,7 @@ func TestURLCheckerCheckWithTimeout(t *testing.T) {
}
func TestURLCheckerCheckMany(t *testing.T) {
c := newURLChecker(0, "", newSemaphore(1024))
c := newURLChecker(0, "", nil, newSemaphore(1024))
for _, us := range [][]string{{}, {"https://google.com", "README.md"}} {
rc := make(chan urlResult, 1024)
@ -45,7 +58,7 @@ func TestURLCheckerCheckMany(t *testing.T) {
}
}
func TestURLCheckerResolveURL(t *testing.T) {
f := newURLChecker(0, "", newSemaphore(1024))
f := newURLChecker(0, "", nil, newSemaphore(1024))
for _, c := range []struct {
source, target string
@ -63,7 +76,7 @@ func TestURLCheckerResolveURL(t *testing.T) {
}
func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) {
f := newURLChecker(0, "", newSemaphore(1024))
f := newURLChecker(0, "", nil, newSemaphore(1024))
u, _, err := f.resolveURL("/foo", "foo.md")
@ -72,7 +85,7 @@ func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) {
}
func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) {
f := newURLChecker(0, "foo", newSemaphore(1024))
f := newURLChecker(0, "foo", nil, newSemaphore(1024))
for _, c := range []struct {
source, target string

Loading…
Cancel
Save