diff --git a/examples/html.feature b/examples/html.feature new file mode 100644 index 0000000..ce326e8 --- /dev/null +++ b/examples/html.feature @@ -0,0 +1,24 @@ +Feature: HTML + Scenario: Check an empty HTML file + Given a file named "foo.html" with "" + When I successfully run `liche foo.html` + Then the stdout should contain exactly "" + + Scenario: Check a HTML file + Given a file named "foo.html" with: + """ + + + + My title + + +
+ Google + Yahoo +
+ + + """ + When I successfully run `liche foo.html` + Then the stdout should contain exactly "" diff --git a/file_checker.go b/file_checker.go index a17359c..f1aebaf 100644 --- a/file_checker.go +++ b/file_checker.go @@ -4,6 +4,7 @@ import ( "bytes" "io/ioutil" "net/url" + "strings" "sync" "time" @@ -20,13 +21,7 @@ func newFileChecker(timeout time.Duration, s semaphore) fileChecker { } func (c fileChecker) Check(f string) ([]urlResult, error) { - bs, err := ioutil.ReadFile(f) - - if err != nil { - return nil, err - } - - n, err := html.Parse(bytes.NewReader(blackfriday.Run(bs))) + n, err := parseFile(f) if err != nil { return nil, err @@ -66,6 +61,26 @@ func (c fileChecker) CheckMany(fs []string, rc chan<- fileResult) { close(rc) } +func parseFile(f string) (*html.Node, error) { + bs, err := ioutil.ReadFile(f) + + if err != nil { + return nil, err + } + + if !isHTMLFile(f) { + bs = blackfriday.Run(bs) + } + + n, err := html.Parse(bytes.NewReader(bs)) + + if err != nil { + return nil, err + } + + return n, nil +} + func extractURLs(n *html.Node) []string { us := make(map[string]bool) ns := make([]*html.Node, 0, 1024) @@ -107,3 +122,7 @@ func isURL(s string) bool { u, err := url.Parse(s) return err == nil && (u.Scheme == "http" || u.Scheme == "https") } + +func isHTMLFile(f string) bool { + return strings.HasSuffix(f, ".html") || strings.HasSuffix(f, ".htm") +}