diff --git a/main.go b/main.go index 38b5266..352db4a 100644 --- a/main.go +++ b/main.go @@ -30,6 +30,33 @@ func main() { html.Parse(strings.NewReader(mark.Render(string(bs)))) } +func extractUrls(n *html.Node) map[string]bool { + ss := make(map[string]bool) + ns := make([]*html.Node, 0, 1024) + ns = append(ns, n) + + for len(ns) > 0 { + i := len(ns) - 1 + n := ns[i] + ns = ns[:i] + + if n.Type == html.ElementNode && n.Data == "a" { + for _, a := range n.Attr { + if a.Key == "href" { + ss[a.Val] = true + break + } + } + } + + for n := n.FirstChild; n != nil; n = n.NextSibling { + ns = append(ns, n) + } + } + + return ss +} + func getArgs() map[string]interface{} { usage := `Link checker for Markdown and HTML diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..f0dec47 --- /dev/null +++ b/main_test.go @@ -0,0 +1,41 @@ +package main + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "golang.org/x/net/html" +) + +func TestExtractUrls(t *testing.T) { + for _, c := range []struct { + html string + numUrls int + }{ + {`Google`, 1}, + { + ` +
+ Google + Google +
+ `, + 1, + }, + { + ` +
+ Google + Yahoo! +
+ `, + 2, + }, + } { + n, err := html.Parse(strings.NewReader(c.html)) + + assert.Equal(t, nil, err) + assert.Equal(t, c.numUrls, len(extractUrls(n))) + } +}