From 1481c3785637ad284d025bca80b69d243cc639ab Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Sat, 18 Nov 2017 01:25:49 +0900 Subject: [PATCH] Check if href is a valid URL --- main.go | 8 +++++++- main_test.go | 10 ++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/main.go b/main.go index dc1baaf..36ee2cb 100644 --- a/main.go +++ b/main.go @@ -4,6 +4,7 @@ import ( "fmt" "io/ioutil" "net/http" + "net/url" "os" "strings" @@ -61,7 +62,7 @@ func extractUrls(n *html.Node) map[string]bool { if n.Type == html.ElementNode && n.Data == "a" { for _, a := range n.Attr { - if a.Key == "href" { + if a.Key == "href" && isUrl(a.Val) { ss[a.Val] = true break } @@ -76,6 +77,11 @@ func extractUrls(n *html.Node) map[string]bool { return ss } +func isUrl(s string) bool { + u, err := url.Parse(s) + return err == nil && (u.Scheme == "http" || u.Scheme == "https") +} + func getArgs() map[string]interface{} { usage := `Link checker for Markdown and HTML diff --git a/main_test.go b/main_test.go index 30ec969..fc934b7 100644 --- a/main_test.go +++ b/main_test.go @@ -47,3 +47,13 @@ func TestUrlParse(t *testing.T) { assert.Equal(t, nil, err) assert.Equal(t, "", u.Scheme) } + +func TestIsUrl(t *testing.T) { + for _, s := range []string{"http://google.com", "https://google.com"} { + assert.True(t, isUrl(s)) + } + + for _, s := range []string{"", "file-path"} { + assert.False(t, isUrl(s)) + } +}