diff --git a/arguments.go b/arguments.go index 61d980c..b17182f 100644 --- a/arguments.go +++ b/arguments.go @@ -1,35 +1,57 @@ package main import ( + "fmt" + "runtime" "strconv" "time" "github.com/docopt/docopt-go" ) +const maxConcurrency = 256 + +var defaultConcurrency = func() int { + n := 8 * runtime.NumCPU() // 8 is an empirical value. + + if n < maxConcurrency { + return n + } + + return maxConcurrency +}() + const usage = `Link checker for Markdown and HTML Usage: - liche [-t ] [-v] ... + liche [-c ] [-t ] [-v] ... Options: - -v, --verbose Be verbose - -t, --timeout Set timeout for HTTP requests in seconds [default: 5]` + -c, --concurrency Set max number of concurrent HTTP requests [default: %v] + -t, --timeout Set timeout for HTTP requests in seconds [default: 5] + -v, --verbose Be verbose` type arguments struct { - filenames []string - timeout time.Duration - verbose bool + filenames []string + concurrency int + timeout time.Duration + verbose bool } func getArgs() (arguments, error) { - args, err := docopt.Parse(usage, nil, true, "liche", true) + args, err := docopt.Parse(fmt.Sprintf(usage, defaultConcurrency), nil, true, "liche", true) + + if err != nil { + return arguments{}, err + } + + c, err := strconv.ParseInt(args["--concurrency"].(string), 10, 32) if err != nil { return arguments{}, err } - f, err := strconv.ParseFloat(args["--timeout"].(string), 64) + t, err := strconv.ParseFloat(args["--timeout"].(string), 64) if err != nil { return arguments{}, err @@ -37,7 +59,8 @@ func getArgs() (arguments, error) { return arguments{ args[""].([]string), - time.Duration(f) * time.Second, + int(c), + time.Duration(t) * time.Second, args["--verbose"].(bool), }, nil } diff --git a/examples/markdown.feature b/examples/markdown.feature index 9e546be..8543d09 100644 --- a/examples/markdown.feature +++ b/examples/markdown.feature @@ -112,3 +112,11 @@ Feature: Markdown """ When I successfully run `liche --timeout 10 foo.md` Then the stdout should contain exactly "" + + Scenario: Set concurrency + Given a file named "foo.md" with: + """ + [Google](https://google.com) + """ + When I successfully run `liche --concurrency 10 foo.md` + Then the stdout should contain exactly "" diff --git a/file_checker.go b/file_checker.go index dcba151..a17359c 100644 --- a/file_checker.go +++ b/file_checker.go @@ -15,8 +15,8 @@ type fileChecker struct { urlChecker urlChecker } -func newFileChecker(timeout time.Duration) fileChecker { - return fileChecker{newURLChecker(timeout)} +func newFileChecker(timeout time.Duration, s semaphore) fileChecker { + return fileChecker{newURLChecker(timeout, s)} } func (c fileChecker) Check(f string) ([]urlResult, error) { diff --git a/main.go b/main.go index c1e0ae4..bda8dc2 100644 --- a/main.go +++ b/main.go @@ -11,7 +11,8 @@ func main() { } rc := make(chan fileResult, len(args.filenames)) - c := newFileChecker(args.timeout) + s := newSemaphore(args.concurrency) + c := newFileChecker(args.timeout, s) go c.CheckMany(args.filenames, rc) diff --git a/url_checker.go b/url_checker.go index be880ed..0fabf8c 100644 --- a/url_checker.go +++ b/url_checker.go @@ -2,34 +2,22 @@ package main import ( "net/http" - "runtime" "sync" "time" ) -const maxOpenFiles = 512 - -var sem = make(chan bool, func() int { - n := 8 * runtime.NumCPU() // 8 is an empirical value. - - if n < maxOpenFiles { - return n - } - - return maxOpenFiles -}()) - type urlChecker struct { - client http.Client + client http.Client + semaphore semaphore } -func newURLChecker(timeout time.Duration) urlChecker { - return urlChecker{http.Client{Timeout: timeout}} +func newURLChecker(t time.Duration, s semaphore) urlChecker { + return urlChecker{http.Client{Timeout: t}, s} } func (c urlChecker) Check(s string) (resultErr error) { - sem <- true - defer func() { <-sem }() + c.semaphore.Request() + defer c.semaphore.Release() res, err := c.client.Get(s)