Browse Source

improve tag detection by looking for spaces around the tag

main
Nicolas Massé 5 years ago
parent
commit
fa50832c0a
  1. 4
      migrate.go
  2. 28
      note.go
  3. 34
      note_test.go

4
migrate.go

@ -82,13 +82,13 @@ func MigrateNotes(from string, to string, tagFile string) error {
if tagOption.TargetDirectory != "" && targetDir != "" && targetDir != tagOption.TargetDirectory { if tagOption.TargetDirectory != "" && targetDir != "" && targetDir != tagOption.TargetDirectory {
log.Printf("WARNING: Target directory '%s' for tag '%s' conflict with directives (%s) from another tag. Continuing with existing value.\n", tagOption.TargetDirectory, tagName, targetDir) log.Printf("WARNING: Target directory '%s' for tag '%s' conflict with directives (%s) from another tag. Continuing with existing value.\n", tagOption.TargetDirectory, tagName, targetDir)
} else { } else if targetDir == "" {
targetDir = tagOption.TargetDirectory targetDir = tagOption.TargetDirectory
} }
if tagOption.HandlingStrategy != "" && handlingStrategy != "" && handlingStrategy != tagOption.HandlingStrategy { if tagOption.HandlingStrategy != "" && handlingStrategy != "" && handlingStrategy != tagOption.HandlingStrategy {
log.Printf("WARNING: Handling strategy '%s' for tag '%s' conflict with directives (%s) from another tag. Continuing with existing value.\n", tagOption.HandlingStrategy, tagName, handlingStrategy) log.Printf("WARNING: Handling strategy '%s' for tag '%s' conflict with directives (%s) from another tag. Continuing with existing value.\n", tagOption.HandlingStrategy, tagName, handlingStrategy)
} else { } else if handlingStrategy == "" {
if tagOption.HandlingStrategy == "same-folder" || tagOption.HandlingStrategy == "one-note-per-folder" || tagOption.HandlingStrategy == "" { if tagOption.HandlingStrategy == "same-folder" || tagOption.HandlingStrategy == "one-note-per-folder" || tagOption.HandlingStrategy == "" {
handlingStrategy = tagOption.HandlingStrategy handlingStrategy = tagOption.HandlingStrategy
} else { } else {

28
note.go

@ -15,6 +15,8 @@ import (
"regexp" "regexp"
"sort" "sort"
"strings" "strings"
"unicode"
"unicode/utf8"
) )
// Regular expression to detect Bear tags. // Regular expression to detect Bear tags.
@ -32,9 +34,10 @@ var reFile *regexp.Regexp
var reImage *regexp.Regexp var reImage *regexp.Regexp
func init() { func init() {
// This regex has a catch: it catches a leading and trailing extra character. // This regex has a catch: it matches a leading and trailing extra character.
// This is because Go does not support look-ahead/look-behind markers. // This is because Go does not support look-ahead/look-behind markers.
reTag = regexp.MustCompile(`(^|\s)#([\p{L}][-\p{L}\p{N}/$_§%=+°({[\\@]*)($|\s)`) // So we need to implement look-ahead/look-behind by ourself.
reTag = regexp.MustCompile(`(^|.?)#([\p{L}][-\p{L}\p{N}/$_§%=+°({[\\@]*)(.?|$)`)
// Those two regex are straightforward // Those two regex are straightforward
reFile = regexp.MustCompile(`<a +href=['"]([^'"]+)['"]>([^<]+)</a>`) reFile = regexp.MustCompile(`<a +href=['"]([^'"]+)['"]>([^<]+)</a>`)
@ -47,9 +50,9 @@ type Tag struct {
Name string Name string
// Position of this tag in the Markdown file // Position of this tag in the Markdown file
position []int position []int
// The character before the tag (see Regex description above) // The character before the tag (for look-ahead, see Regex description above)
before string before string
// The character after the tag (see Regex description above) // The character after the tag (for look-behind, see Regex description above)
after string after string
} }
@ -57,13 +60,23 @@ type Tag struct {
// characters) and position in file. // characters) and position in file.
func NewTag(content string, position []int) Tag { func NewTag(content string, position []int) Tag {
var tag Tag var tag Tag
tag.position = position
parts := reTag.FindStringSubmatch(content) parts := reTag.FindStringSubmatch(content)
if len(parts) > 0 { if len(parts) > 0 {
beforeIsEmpty := len(parts[1]) == 0
before, _ := utf8.DecodeRuneInString(parts[1])
beforeIsSpace := unicode.IsSpace(before)
afterIsEmpty := len(parts[3]) == 0
after, _ := utf8.DecodeRuneInString(parts[3])
afterIsSpace := unicode.IsSpace(after)
// A valid tag is surrounded by either a space character or nothing
if (beforeIsEmpty || beforeIsSpace) && (afterIsEmpty || afterIsSpace) {
tag.position = position
tag.before = parts[1] tag.before = parts[1]
tag.Name = parts[2] tag.Name = parts[2]
tag.after = parts[3] tag.after = parts[3]
} }
}
return tag return tag
} }
@ -151,7 +164,10 @@ func LoadNote(content string) *Note {
var note Note var note Note
note.content = content note.content = content
for _, match := range reTag.FindAllStringIndex(content, -1) { for _, match := range reTag.FindAllStringIndex(content, -1) {
note.Tags = append(note.Tags, NewTag(content[match[0]:match[1]], match)) tag := NewTag(content[match[0]:match[1]], match)
if len(tag.Name) > 0 {
note.Tags = append(note.Tags, tag)
}
} }
for _, match := range reFile.FindAllStringIndex(content, -1) { for _, match := range reFile.FindAllStringIndex(content, -1) {
note.Files = append(note.Files, NewFile(content[match[0]:match[1]], match)) note.Files = append(note.Files, NewFile(content[match[0]:match[1]], match))

34
note_test.go

@ -18,6 +18,16 @@ func TestNewTag(t *testing.T) {
assert.Equal(t, " ", tag.String(), "tag content must be empty") assert.Equal(t, " ", tag.String(), "tag content must be empty")
} }
func TestNewTagLookAround(t *testing.T) {
testCases := [][]string{{" #test/123 ", "test/123"}, {"/#trap ", ""}, {" #trap#", ""}, {"#ok", "ok"}}
for _, testCase := range testCases {
tagContent := testCase[0]
expected := testCase[1]
tag := NewTag(tagContent, []int{0, len(tagContent)})
assert.Equal(t, expected, tag.Name, "tag name must be equal")
}
}
func TestNewFile(t *testing.T) { func TestNewFile(t *testing.T) {
fileContent := `<a href='note/my%20file.pdf'>my file.pdf</a>` fileContent := `<a href='note/my%20file.pdf'>my file.pdf</a>`
file := NewFile(fileContent, []int{0, len(fileContent)}) file := NewFile(fileContent, []int{0, len(fileContent)})
@ -61,21 +71,29 @@ And some tags in a list
- #foo/bar@baz - #foo/bar@baz
- #and-a_very%special$one/avec/des/éèà - #and-a_very%special$one/avec/des/éèà
[it's a trap](https://www.perdu.com/#toto) [it's a trap](https://www.perdu.com/#trap)
Another trap: https://www.perdu.com/#trap
another trap: world #1 another trap: world #1
Traps, traps, traps... #trap#trap
#two-tags #one-after-another
#end` #end`
note := LoadNote(md) note := LoadNote(md)
// Tags // Tags
assert.Len(t, note.Tags, 5, "There must be 5 tags") assert.Len(t, note.Tags, 7, "There must be 7 tags")
assert.Equal(t, "tag", note.Tags[0].Name, "first tag must be 'tag'") assert.Equal(t, "tag", note.Tags[0].Name, "first tag must be 'tag'")
assert.Equal(t, "foo", note.Tags[1].Name, "second tag must be 'foo'") assert.Equal(t, "foo", note.Tags[1].Name, "second tag must be 'foo'")
assert.Equal(t, "foo/bar@baz", note.Tags[2].Name, "third tag must be 'foo/bar@baz'") assert.Equal(t, "foo/bar@baz", note.Tags[2].Name, "third tag must be 'foo/bar@baz'")
assert.Equal(t, "and-a_very%special$one/avec/des/éèà", note.Tags[3].Name, "fourth tag must be 'and-a_very%special$one/avec/des/éèà'") assert.Equal(t, "and-a_very%special$one/avec/des/éèà", note.Tags[3].Name, "fourth tag must be 'and-a_very%special$one/avec/des/éèà'")
assert.Equal(t, "end", note.Tags[4].Name, "fifth tag must be 'end'") assert.Equal(t, "two-tags", note.Tags[4].Name, "fifth tag must be 'two-tags'")
assert.Equal(t, "one-after-another", note.Tags[5].Name, "sixth tag must be 'one-after-another'")
assert.Equal(t, "end", note.Tags[6].Name, "seventh tag must be 'end'")
// Files // Files
assert.Len(t, note.Files, 2, "There must be 2 files") assert.Len(t, note.Files, 2, "There must be 2 files")
@ -89,7 +107,7 @@ another trap: world #1
// Alter tags, files and images // Alter tags, files and images
note.Tags[1].Name = "" note.Tags[1].Name = ""
note.Tags[4].Name = "not-really" note.Tags[6].Name = "not-really"
note.Files[0].Location = "note2/my file.pdf" note.Files[0].Location = "note2/my file.pdf"
note.Files[1].Location = "note2/my other file.pdf" note.Files[1].Location = "note2/my other file.pdf"
note.Images[0].Location = "note2/image 2.jpg" note.Images[0].Location = "note2/image 2.jpg"
@ -118,10 +136,16 @@ And some tags in a list
- #foo/bar@baz - #foo/bar@baz
- #and-a_very%special$one/avec/des/éèà - #and-a_very%special$one/avec/des/éèà
[it's a trap](https://www.perdu.com/#toto) [it's a trap](https://www.perdu.com/#trap)
Another trap: https://www.perdu.com/#trap
another trap: world #1 another trap: world #1
Traps, traps, traps... #trap#trap
#two-tags #one-after-another
#not-really` #not-really`
newNote := note.WriteNote() newNote := note.WriteNote()
assert.Equal(t, expectedMd, newNote, "notes must be equal") assert.Equal(t, expectedMd, newNote, "notes must be equal")

Loading…
Cancel
Save