Browse Source

Support HTML files

renovate/configure
Yota Toyama 8 years ago
parent
commit
a4d15256b9
  1. 24
      examples/html.feature
  2. 33
      file_checker.go

24
examples/html.feature

@ -0,0 +1,24 @@
Feature: HTML
Scenario: Check an empty HTML file
Given a file named "foo.html" with ""
When I successfully run `liche foo.html`
Then the stdout should contain exactly ""
Scenario: Check a HTML file
Given a file named "foo.html" with:
"""
<!DOCTYPE html>
<html>
<head>
<title>My title</title>
</head>
<body>
<div>
<a href="https://google.com">Google</a>
<a href="https://yahoo.com">Yahoo</a>
</div>
</body>
</html>
"""
When I successfully run `liche foo.html`
Then the stdout should contain exactly ""

33
file_checker.go

@ -4,6 +4,7 @@ import (
"bytes"
"io/ioutil"
"net/url"
"strings"
"sync"
"time"
@ -20,13 +21,7 @@ func newFileChecker(timeout time.Duration, s semaphore) fileChecker {
}
func (c fileChecker) Check(f string) ([]urlResult, error) {
bs, err := ioutil.ReadFile(f)
if err != nil {
return nil, err
}
n, err := html.Parse(bytes.NewReader(blackfriday.Run(bs)))
n, err := parseFile(f)
if err != nil {
return nil, err
@ -66,6 +61,26 @@ func (c fileChecker) CheckMany(fs []string, rc chan<- fileResult) {
close(rc)
}
func parseFile(f string) (*html.Node, error) {
bs, err := ioutil.ReadFile(f)
if err != nil {
return nil, err
}
if !isHTMLFile(f) {
bs = blackfriday.Run(bs)
}
n, err := html.Parse(bytes.NewReader(bs))
if err != nil {
return nil, err
}
return n, nil
}
func extractURLs(n *html.Node) []string {
us := make(map[string]bool)
ns := make([]*html.Node, 0, 1024)
@ -107,3 +122,7 @@ func isURL(s string) bool {
u, err := url.Parse(s)
return err == nil && (u.Scheme == "http" || u.Scheme == "https")
}
func isHTMLFile(f string) bool {
return strings.HasSuffix(f, ".html") || strings.HasSuffix(f, ".htm")
}

Loading…
Cancel
Save