bearnotes/note.go


								// Package bearnotes provides tools to read Markdown files generated

								// by the Bear app. It can also convert those files to a format suitable

								// for Zettlr.

								//

								// It handles notes, embedded images and file attachments.

								//

								// Note: there are some Unicode normalization issues between the filenames

								// in the filesystem and paths in the Markdown file. It is up to the caller

								// to normalize strings when required.

								package bearnotes


								import (

									"fmt"

									"net/url"

									"regexp"

									"sort"

									"strings"

									"unicode"

									"unicode/utf8"

								)


								// Regular expression to detect Bear tags.

								// Examples:

								//  - #foo

								//  - #bar/baz

								var reTag *regexp.Regexp


								// Regular expression to detect file attachments.

								// Example: <a href='my%20file.pdf'>my file.pdf</a>

								var reFile *regexp.Regexp


								// Regular expression to detect embedded images.

								// Example: ![](note/my-image.png)

								var reImage *regexp.Regexp


								func init() {

									// This regex has a catch: it matches a leading and trailing extra character.

									// This is because Go does not support look-ahead/look-behind markers.

									// So we need to implement look-ahead/look-behind by ourself.

									reTag = regexp.MustCompile(`(^|.?)#([\p{L}][-\p{L}\p{N}/$_§%=+°({[\\@]*)(.?|$)`)


									// Those two regex are straightforward

									reFile = regexp.MustCompile(`<a +href=['"]([^'"]+)['"]>([^<]+)</a>`)

									reImage = regexp.MustCompile(`!\[([^\]]*)]\(([^())]+|[^(]+\([^)]+\)[^)]+)\)`)

								}


								// Tag represents a Bear tag (#foo)

								type Tag struct {

									// The name of the tag (without the leading hashtag)

									Name string

									// Position of this tag in the Markdown file

									position []int

									// The character before the tag (for look-ahead, see Regex description above)

									before string

									// The character after the tag (for look-behind, see Regex description above)

									after string

								}


								// NewTag creates a Tag from its content (including leading and trailing

								// characters) and position in file.

								func NewTag(content string, position []int) Tag {

									var tag Tag

									parts := reTag.FindStringSubmatch(content)

									if len(parts) > 0 {

										beforeIsEmpty := len(parts[1]) == 0

										before, _ := utf8.DecodeRuneInString(parts[1])

										beforeIsSpace := unicode.IsSpace(before)

										afterIsEmpty := len(parts[3]) == 0

										after, _ := utf8.DecodeRuneInString(parts[3])

										afterIsSpace := unicode.IsSpace(after)


										// A valid tag is surrounded by either a space character or nothing

										if (beforeIsEmpty || beforeIsSpace) && (afterIsEmpty || afterIsSpace) {

											tag.position = position

											tag.before = parts[1]

											tag.Name = parts[2]

											tag.after = parts[3]

										}

									}

									return tag

								}


								// String converts the Tag back to string.

								func (tag *Tag) String() string {

									if len(tag.Name) == 0 {

										return fmt.Sprintf("%s%s", tag.before, tag.after)

									}


									return fmt.Sprintf("%s#%s%s", tag.before, tag.Name, tag.after)

								}


								// File represents a file attachment in a note.

								type File struct {

									Location string // The path to the file attachment

									Name     string // The name of the file

									position []int  // The position in the Markdown file

								}


								// NewFile creates a File from the Markdown content and position in file.

								func NewFile(content string, position []int) File {

									var file File

									parts := reFile.FindStringSubmatch(content)

									if len(parts) > 0 {

										file.Location, _ = url.PathUnescape(parts[1])

										file.Name = parts[2]

										file.position = position

									}

									return file

								}


								// URL encode a path, component by component so that slashes do not go

								// through URL encoding.

								func escapePath(path string) string {

									pathComponents := strings.Split(path, "/")

									var escapedPath strings.Builder

									for i, pathComponent := range pathComponents {

										if i > 0 {

											escapedPath.WriteString("/")

										}

										escapedPath.WriteString(url.PathEscape(pathComponent))

									}

									return escapedPath.String()

								}


								// String converts a file attachment back to Markdown syntax suitable for Zettlr.

								func (file *File) String() string {

									return fmt.Sprintf("[%s](%s)", file.Name, escapePath(file.Location))

								}


								// Image represents an embedded image in a note.

								type Image struct {

									Location    string // The path to the embedded image

									Description string // The alternative text for the image

									position    []int  // The position in the Markdown file

								}


								// NewImage creates an Image from the Markdown content and position in file.

								func NewImage(content string, position []int) Image {

									var image Image

									parts := reImage.FindStringSubmatch(content)

									if len(parts) > 0 {

										image.Location, _ = url.PathUnescape(parts[2])

										image.Description = parts[1]

										image.position = position

									}

									return image

								}


								// String converts an image back to Markdown syntax suitable for Zettlr.

								func (image *Image) String() string {

									return fmt.Sprintf("![%s](%s)", image.Description, escapePath(image.Location))

								}


								// Note represents a Bear note with its tags, file attachments and embedded images.

								type Note struct {

									Tags    []Tag   // All the tags

									Files   []File  // All the file attachments

									Images  []Image // All the embedded images

									content string  // The full note content

								}


								// LoadNote parses a Bear note in Markdown format and returns a Note object.

								func LoadNote(content string) *Note {

									var note Note

									note.content = content

									for _, match := range reTag.FindAllStringIndex(content, -1) {

										tag := NewTag(content[match[0]:match[1]], match)

										if len(tag.Name) > 0 {

											note.Tags = append(note.Tags, tag)

										}

									}

									for _, match := range reFile.FindAllStringIndex(content, -1) {

										note.Files = append(note.Files, NewFile(content[match[0]:match[1]], match))

									}

									for _, match := range reImage.FindAllStringIndex(content, -1) {

										note.Images = append(note.Images, NewImage(content[match[0]:match[1]], match))

									}

									return &note

								}


								// updatedItem is used to sort tags, images and files by their order

								// of appearance in the file.

								type updatedItem struct {

									content  string // tag, file or image content

									position []int  // position in file

								}


								// WriteNote converts the note back into a format suitable for Zettlr.

								func (note *Note) WriteNote() string {

									// Tags, Images and Files are all stored into a common list

									var items []updatedItem

									for _, item := range note.Tags {

										items = append(items, updatedItem{item.String(), item.position})

									}

									for _, item := range note.Files {

										items = append(items, updatedItem{item.String(), item.position})

									}

									for _, item := range note.Images {

										items = append(items, updatedItem{item.String(), item.position})

									}

									// And sorted by their order of appearance in the file

									// Note: this only works when items do not overlap (which hopefully

									// is the case in most, if not all, markdown files).

									sort.Slice(items, func(i, j int) bool {

										return items[i].position[0] < items[j].position[1]

									})


									// Go through all items and copy the updated version of the item along

									// with the interleaved original excerpts

									var current int

									var newContent strings.Builder

									for _, item := range items {

										newContent.WriteString(note.content[current:item.position[0]])

										newContent.WriteString(item.content)

										current = item.position[1]

									}

									newContent.WriteString(note.content[current:len(note.content)])


									return newContent.String()

								}