Fast Link Checker for Markdown and HTML in Go
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

105 lines
2.0 KiB

package main
import (
"errors"
"fmt"
"net/http"
"net/url"
"os"
"path"
"path/filepath"
"regexp"
"sync"
"time"
"github.com/valyala/fasthttp"
)
type urlChecker struct {
timeout time.Duration
documentRoot string
excludedPattern *regexp.Regexp
semaphore semaphore
localOnly bool
}
var errSkipped = errors.New("skipped as instructed")
func newURLChecker(t time.Duration, d string, r *regexp.Regexp, s semaphore, l bool) urlChecker {
return urlChecker{t, d, r, s, l}
}
func (c urlChecker) Check(u string, f string) error {
u, local, err := c.resolveURL(u, f)
if err != nil {
return err
}
if c.excludedPattern != nil && c.excludedPattern.MatchString(u) {
return nil
}
if local {
_, err := os.Stat(u)
return err
} else if c.localOnly {
return errSkipped
}
c.semaphore.Request()
defer c.semaphore.Release()
var sc int
if c.timeout == 0 {
sc, _, err = fasthttp.Get(nil, u)
} else {
sc, _, err = fasthttp.GetTimeout(nil, u, c.timeout)
}
if sc >= http.StatusBadRequest {
return fmt.Errorf("%s (HTTP error %d)", http.StatusText(sc), sc)
}
// Ignore errors from fasthttp about small buffer for URL headers,
// the content is discarded anyway.
if _, ok := err.(*fasthttp.ErrSmallBuffer); ok {
err = nil
}
return err
}
func (c urlChecker) CheckMany(us []string, f string, rc chan<- urlResult) {
wg := sync.WaitGroup{}
for _, s := range us {
wg.Add(1)
go func(s string) {
rc <- urlResult{s, c.Check(s, f)}
wg.Done()
}(s)
}
wg.Wait()
close(rc)
}
func (c urlChecker) resolveURL(u string, f string) (string, bool, error) {
uu, err := url.Parse(u)
if err != nil {
return "", false, err
}
if uu.Scheme != "" && uu.Scheme != "file" {
return u, false, nil
}
if !path.IsAbs(uu.Path) {
return path.Join(filepath.Dir(f), uu.Path), true, nil
}
if c.documentRoot == "" {
return "", false, fmt.Errorf("document root directory is not specified")
}
return path.Join(c.documentRoot, uu.Path), true, nil
}