Browse Source

Merge pull request #8 from dcarley/exclude_argument

Implement --exclude <regex> argument
renovate/configure
Yota Toyama 7 years ago
committed by GitHub
parent
commit
1a8f349f91
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      README.md
  2. 14
      arguments.go
  3. 31
      arguments_test.go
  4. 5
      file_checker.go
  5. 8
      file_checker_test.go
  6. 2
      main.go
  7. 11
      url_checker.go
  8. 25
      url_checker_test.go

3
README.md

@ -23,13 +23,14 @@ go get -u github.com/raviqqe/liche
Link checker for Markdown and HTML Link checker for Markdown and HTML
Usage: Usage:
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-v] <filenames>... liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-x <regex>] [-v] <filenames>...
Options: Options:
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: 512] -c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: 512]
-d, --document-root <directory> Set document root directory for absolute paths. -d, --document-root <directory> Set document root directory for absolute paths.
-r, --recursive Search Markdown and HTML files recursively -r, --recursive Search Markdown and HTML files recursively
-t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default. -t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default.
-x, --exclude <regex> Regex of links to exclude from checking.
-v, --verbose Be verbose. -v, --verbose Be verbose.
``` ```

14
arguments.go

@ -2,6 +2,7 @@ package main
import ( import (
"fmt" "fmt"
"regexp"
"strconv" "strconv"
"time" "time"
@ -13,13 +14,14 @@ const defaultConcurrency = maxOpenFiles / 2
const usage = `Link checker for Markdown and HTML const usage = `Link checker for Markdown and HTML
Usage: Usage:
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-v] <filenames>... liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-x <regex>] [-v] <filenames>...
Options: Options:
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: %v] -c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: %v]
-d, --document-root <directory> Set document root directory for absolute paths. -d, --document-root <directory> Set document root directory for absolute paths.
-r, --recursive Search Markdown and HTML files recursively -r, --recursive Search Markdown and HTML files recursively
-t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default. -t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default.
-x, --exclude <regex> Regex of links to exclude from checking.
-v, --verbose Be verbose.` -v, --verbose Be verbose.`
type arguments struct { type arguments struct {
@ -28,6 +30,7 @@ type arguments struct {
concurrency int concurrency int
timeout time.Duration timeout time.Duration
recursive bool recursive bool
exclude *regexp.Regexp
verbose bool verbose bool
} }
@ -58,12 +61,21 @@ func getArguments(argv []string) (arguments, error) {
} }
} }
var exclude *regexp.Regexp
if args["--exclude"] != nil {
exclude, err = regexp.Compile(args["--exclude"].(string))
if err != nil {
return arguments{}, err
}
}
return arguments{ return arguments{
args["<filenames>"].([]string), args["<filenames>"].([]string),
args["--document-root"].(string), args["--document-root"].(string),
int(c), int(c),
time.Duration(t) * time.Second, time.Duration(t) * time.Second,
args["--recursive"].(bool), args["--recursive"].(bool),
exclude,
args["--verbose"].(bool), args["--verbose"].(bool),
}, nil }, nil
} }

31
arguments_test.go

@ -1,6 +1,7 @@
package main package main
import ( import (
"regexp"
"testing" "testing"
"time" "time"
@ -14,47 +15,55 @@ func TestGetArguments(t *testing.T) {
}{ }{
{ {
argv: []string{"file"}, argv: []string{"file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, false}, args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, false},
}, },
{ {
argv: []string{"-c", "42", "file"}, argv: []string{"-c", "42", "file"},
args: arguments{[]string{"file"}, "", 42, 0, false, false}, args: arguments{[]string{"file"}, "", 42, 0, false, nil, false},
}, },
{ {
argv: []string{"--concurrency", "42", "file"}, argv: []string{"--concurrency", "42", "file"},
args: arguments{[]string{"file"}, "", 42, 0, false, false}, args: arguments{[]string{"file"}, "", 42, 0, false, nil, false},
}, },
{ {
argv: []string{"-d", "directory", "file"}, argv: []string{"-d", "directory", "file"},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, false}, args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, nil, false},
}, },
{ {
argv: []string{"--document-root", "directory", "file"}, argv: []string{"--document-root", "directory", "file"},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, false}, args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, false, nil, false},
}, },
{ {
argv: []string{"-r", "file"}, argv: []string{"-r", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, false}, args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, nil, false},
}, },
{ {
argv: []string{"--recursive", "file"}, argv: []string{"--recursive", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, false}, args: arguments{[]string{"file"}, "", defaultConcurrency, 0, true, nil, false},
}, },
{ {
argv: []string{"-t", "42", "file"}, argv: []string{"-t", "42", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, false}, args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, nil, false},
}, },
{ {
argv: []string{"--timeout", "42", "file"}, argv: []string{"--timeout", "42", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, false}, args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, false, nil, false},
},
{
argv: []string{"-x", "^.*$", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, regexp.MustCompile(`^.*$`), false},
},
{
argv: []string{"--exclude", "^.*$", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, regexp.MustCompile(`^.*$`), false},
}, },
{ {
argv: []string{"-v", "file"}, argv: []string{"-v", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, true}, args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, true},
}, },
{ {
argv: []string{"--verbose", "file"}, argv: []string{"--verbose", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, true}, args: arguments{[]string{"file"}, "", defaultConcurrency, 0, false, nil, true},
}, },
} { } {
args, err := getArguments(c.argv) args, err := getArguments(c.argv)

5
file_checker.go

@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"io/ioutil" "io/ioutil"
"net/url" "net/url"
"regexp"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -17,8 +18,8 @@ type fileChecker struct {
semaphore semaphore semaphore semaphore
} }
func newFileChecker(timeout time.Duration, d string, s semaphore) fileChecker { func newFileChecker(timeout time.Duration, d string, x *regexp.Regexp, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, d, s), s} return fileChecker{newURLChecker(timeout, d, x, s), s}
} }
func (c fileChecker) Check(f string) ([]urlResult, error) { func (c fileChecker) Check(f string) ([]urlResult, error) {

8
file_checker_test.go

@ -10,7 +10,7 @@ import (
) )
func TestFileCheckerCheck(t *testing.T) { func TestFileCheckerCheck(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(1024)) c := newFileChecker(0, "", nil, newSemaphore(1024))
for _, f := range []string{"README.md", "test/foo.md", "test/foo.html"} { for _, f := range []string{"README.md", "test/foo.md", "test/foo.html"} {
rs, err := c.Check(f) rs, err := c.Check(f)
@ -48,7 +48,7 @@ func TestFileCheckerCheck(t *testing.T) {
} }
func TestFileCheckerCheckMany(t *testing.T) { func TestFileCheckerCheckMany(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(maxOpenFiles)) c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles))
for _, fs := range [][]string{ for _, fs := range [][]string{
{"README.md"}, {"README.md"},
@ -77,7 +77,7 @@ func TestFileCheckerCheckMany(t *testing.T) {
} }
func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) { func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(maxOpenFiles)) c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles))
for _, fs := range [][]string{ for _, fs := range [][]string{
{"test/absolute_path.md"}, {"test/absolute_path.md"},
@ -107,7 +107,7 @@ func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) {
} }
func TestFileCheckerExtractURLs(t *testing.T) { func TestFileCheckerExtractURLs(t *testing.T) {
c := newFileChecker(0, "", newSemaphore(42)) c := newFileChecker(0, "", nil, newSemaphore(42))
for _, x := range []struct { for _, x := range []struct {
html string html string

2
main.go

@ -30,7 +30,7 @@ func main() {
}() }()
rc := make(chan fileResult, maxOpenFiles) rc := make(chan fileResult, maxOpenFiles)
c := newFileChecker(args.timeout, args.documentRoot, newSemaphore(args.concurrency)) c := newFileChecker(args.timeout, args.documentRoot, args.exclude, newSemaphore(args.concurrency))
go c.CheckMany(m.Filenames(), rc) go c.CheckMany(m.Filenames(), rc)

11
url_checker.go

@ -5,6 +5,7 @@ import (
"net/url" "net/url"
"os" "os"
"path" "path"
"regexp"
"sync" "sync"
"time" "time"
@ -14,20 +15,24 @@ import (
type urlChecker struct { type urlChecker struct {
timeout time.Duration timeout time.Duration
documentRoot string documentRoot string
exclude *regexp.Regexp
semaphore semaphore semaphore semaphore
} }
func newURLChecker(t time.Duration, d string, s semaphore) urlChecker { func newURLChecker(t time.Duration, d string, x *regexp.Regexp, s semaphore) urlChecker {
return urlChecker{t, d, s} return urlChecker{t, d, x, s}
} }
func (c urlChecker) Check(u string, f string) error { func (c urlChecker) Check(u string, f string) error {
u, local, err := c.resolveURL(u, f) u, local, err := c.resolveURL(u, f)
if err != nil { if err != nil {
return err return err
} }
if c.exclude != nil && c.exclude.MatchString(u) {
return nil
}
if local { if local {
_, err := os.Stat(u) _, err := os.Stat(u)
return err return err

25
url_checker_test.go

@ -1,6 +1,7 @@
package main package main
import ( import (
"regexp"
"testing" "testing"
"time" "time"
@ -8,7 +9,7 @@ import (
) )
func TestURLCheckerCheck(t *testing.T) { func TestURLCheckerCheck(t *testing.T) {
c := newURLChecker(0, "", newSemaphore(1024)) c := newURLChecker(0, "", nil, newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} { for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md")) assert.Equal(t, nil, c.Check(u, "README.md"))
@ -19,8 +20,20 @@ func TestURLCheckerCheck(t *testing.T) {
} }
} }
func TestURLCheckerCheckWithExclude(t *testing.T) {
c := newURLChecker(0, "", regexp.MustCompile(`^http:\/\/localhost:[13]$`), newSemaphore(1024))
for _, u := range []string{"http://localhost:1", "http://localhost:3", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md"))
}
for _, u := range []string{"http://localhost:2", "READYOU.md"} {
assert.NotEqual(t, nil, c.Check(u, "README.md"))
}
}
func TestURLCheckerCheckWithTimeout(t *testing.T) { func TestURLCheckerCheckWithTimeout(t *testing.T) {
c := newURLChecker(30*time.Second, "", newSemaphore(1024)) c := newURLChecker(30*time.Second, "", nil, newSemaphore(1024))
for _, u := range []string{"https://google.com", "README.md"} { for _, u := range []string{"https://google.com", "README.md"} {
assert.Equal(t, nil, c.Check(u, "README.md")) assert.Equal(t, nil, c.Check(u, "README.md"))
@ -32,7 +45,7 @@ func TestURLCheckerCheckWithTimeout(t *testing.T) {
} }
func TestURLCheckerCheckMany(t *testing.T) { func TestURLCheckerCheckMany(t *testing.T) {
c := newURLChecker(0, "", newSemaphore(1024)) c := newURLChecker(0, "", nil, newSemaphore(1024))
for _, us := range [][]string{{}, {"https://google.com", "README.md"}} { for _, us := range [][]string{{}, {"https://google.com", "README.md"}} {
rc := make(chan urlResult, 1024) rc := make(chan urlResult, 1024)
@ -45,7 +58,7 @@ func TestURLCheckerCheckMany(t *testing.T) {
} }
} }
func TestURLCheckerResolveURL(t *testing.T) { func TestURLCheckerResolveURL(t *testing.T) {
f := newURLChecker(0, "", newSemaphore(1024)) f := newURLChecker(0, "", nil, newSemaphore(1024))
for _, c := range []struct { for _, c := range []struct {
source, target string source, target string
@ -63,7 +76,7 @@ func TestURLCheckerResolveURL(t *testing.T) {
} }
func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) { func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) {
f := newURLChecker(0, "", newSemaphore(1024)) f := newURLChecker(0, "", nil, newSemaphore(1024))
u, _, err := f.resolveURL("/foo", "foo.md") u, _, err := f.resolveURL("/foo", "foo.md")
@ -72,7 +85,7 @@ func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) {
} }
func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) { func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) {
f := newURLChecker(0, "foo", newSemaphore(1024)) f := newURLChecker(0, "foo", nil, newSemaphore(1024))
for _, c := range []struct { for _, c := range []struct {
source, target string source, target string

Loading…
Cancel
Save