diff --git a/examples/html.feature b/examples/html.feature
new file mode 100644
index 0000000..ce326e8
--- /dev/null
+++ b/examples/html.feature
@@ -0,0 +1,24 @@
+Feature: HTML
+ Scenario: Check an empty HTML file
+ Given a file named "foo.html" with ""
+ When I successfully run `liche foo.html`
+ Then the stdout should contain exactly ""
+
+ Scenario: Check a HTML file
+ Given a file named "foo.html" with:
+ """
+
+
+
+ My title
+
+
+
+
+
+ """
+ When I successfully run `liche foo.html`
+ Then the stdout should contain exactly ""
diff --git a/file_checker.go b/file_checker.go
index a17359c..f1aebaf 100644
--- a/file_checker.go
+++ b/file_checker.go
@@ -4,6 +4,7 @@ import (
"bytes"
"io/ioutil"
"net/url"
+ "strings"
"sync"
"time"
@@ -20,13 +21,7 @@ func newFileChecker(timeout time.Duration, s semaphore) fileChecker {
}
func (c fileChecker) Check(f string) ([]urlResult, error) {
- bs, err := ioutil.ReadFile(f)
-
- if err != nil {
- return nil, err
- }
-
- n, err := html.Parse(bytes.NewReader(blackfriday.Run(bs)))
+ n, err := parseFile(f)
if err != nil {
return nil, err
@@ -66,6 +61,26 @@ func (c fileChecker) CheckMany(fs []string, rc chan<- fileResult) {
close(rc)
}
+func parseFile(f string) (*html.Node, error) {
+ bs, err := ioutil.ReadFile(f)
+
+ if err != nil {
+ return nil, err
+ }
+
+ if !isHTMLFile(f) {
+ bs = blackfriday.Run(bs)
+ }
+
+ n, err := html.Parse(bytes.NewReader(bs))
+
+ if err != nil {
+ return nil, err
+ }
+
+ return n, nil
+}
+
func extractURLs(n *html.Node) []string {
us := make(map[string]bool)
ns := make([]*html.Node, 0, 1024)
@@ -107,3 +122,7 @@ func isURL(s string) bool {
u, err := url.Parse(s)
return err == nil && (u.Scheme == "http" || u.Scheme == "https")
}
+
+func isHTMLFile(f string) bool {
+ return strings.HasSuffix(f, ".html") || strings.HasSuffix(f, ".htm")
+}