A tool to migrate notes from Bear to Zettlr
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

220 lines
6.8 KiB

// Package bearnotes provides tools to read Markdown files generated
// by the Bear app. It can also convert those files to a format suitable
// for Zettlr.
//
// It handles notes, embedded images and file attachments.
//
// Note: there are some Unicode normalization issues between the filenames
// in the filesystem and paths in the Markdown file. It is up to the caller
// to normalize strings when required.
package bearnotes
import (
"fmt"
"net/url"
"regexp"
"sort"
"strings"
"unicode"
"unicode/utf8"
)
// Regular expression to detect Bear tags.
// Examples:
// - #foo
// - #bar/baz
var reTag *regexp.Regexp
// Regular expression to detect file attachments.
// Example: <a href='my%20file.pdf'>my file.pdf</a>
var reFile *regexp.Regexp
// Regular expression to detect embedded images.
// Example: ![](note/my-image.png)
var reImage *regexp.Regexp
func init() {
// This regex has a catch: it matches a leading and trailing extra character.
// This is because Go does not support look-ahead/look-behind markers.
// So we need to implement look-ahead/look-behind by ourself.
reTag = regexp.MustCompile(`(^|.?)#([\p{L}][-\p{L}\p{N}/$_§%=+°({[\\@]*)(.?|$)`)
// Those two regex are straightforward
reFile = regexp.MustCompile(`<a +href=['"]([^'"]+)['"]>([^<]+)</a>`)
reImage = regexp.MustCompile(`!\[([^\]]*)]\(([^(]+)\)`)
}
// Tag represents a Bear tag (#foo)
type Tag struct {
// The name of the tag (without the leading hashtag)
Name string
// Position of this tag in the Markdown file
position []int
// The character before the tag (for look-ahead, see Regex description above)
before string
// The character after the tag (for look-behind, see Regex description above)
after string
}
// NewTag creates a Tag from its content (including leading and trailing
// characters) and position in file.
func NewTag(content string, position []int) Tag {
var tag Tag
parts := reTag.FindStringSubmatch(content)
if len(parts) > 0 {
beforeIsEmpty := len(parts[1]) == 0
before, _ := utf8.DecodeRuneInString(parts[1])
beforeIsSpace := unicode.IsSpace(before)
afterIsEmpty := len(parts[3]) == 0
after, _ := utf8.DecodeRuneInString(parts[3])
afterIsSpace := unicode.IsSpace(after)
// A valid tag is surrounded by either a space character or nothing
if (beforeIsEmpty || beforeIsSpace) && (afterIsEmpty || afterIsSpace) {
tag.position = position
tag.before = parts[1]
tag.Name = parts[2]
tag.after = parts[3]
}
}
return tag
}
// String converts the Tag back to string.
func (tag *Tag) String() string {
if len(tag.Name) == 0 {
return fmt.Sprintf("%s%s", tag.before, tag.after)
}
return fmt.Sprintf("%s#%s%s", tag.before, tag.Name, tag.after)
}
// File represents a file attachment in a note.
type File struct {
Location string // The path to the file attachment
Name string // The name of the file
position []int // The position in the Markdown file
}
// NewFile creates a File from the Markdown content and position in file.
func NewFile(content string, position []int) File {
var file File
parts := reFile.FindStringSubmatch(content)
if len(parts) > 0 {
file.Location, _ = url.PathUnescape(parts[1])
file.Name = parts[2]
file.position = position
}
return file
}
// URL encode a path, component by component so that slashes do not go
// through URL encoding.
func escapePath(path string) string {
pathComponents := strings.Split(path, "/")
var escapedPath strings.Builder
for i, pathComponent := range pathComponents {
if i > 0 {
escapedPath.WriteString("/")
}
escapedPath.WriteString(url.PathEscape(pathComponent))
}
return escapedPath.String()
}
// String converts a file attachment back to Markdown syntax suitable for Zettlr.
func (file *File) String() string {
return fmt.Sprintf("[%s](%s)", file.Name, escapePath(file.Location))
}
// Image represents an embedded image in a note.
type Image struct {
Location string // The path to the embedded image
Description string // The alternative text for the image
position []int // The position in the Markdown file
}
// NewImage creates an Image from the Markdown content and position in file.
func NewImage(content string, position []int) Image {
var image Image
parts := reImage.FindStringSubmatch(content)
if len(parts) > 0 {
image.Location, _ = url.PathUnescape(parts[2])
image.Description = parts[1]
image.position = position
}
return image
}
// String converts an image back to Markdown syntax suitable for Zettlr.
func (image *Image) String() string {
return fmt.Sprintf("![%s](%s)", image.Description, escapePath(image.Location))
}
// Note represents a Bear note with its tags, file attachments and embedded images.
type Note struct {
Tags []Tag // All the tags
Files []File // All the file attachments
Images []Image // All the embedded images
content string // The full note content
}
// LoadNote parses a Bear note in Markdown format and returns a Note object.
func LoadNote(content string) *Note {
var note Note
note.content = content
for _, match := range reTag.FindAllStringIndex(content, -1) {
tag := NewTag(content[match[0]:match[1]], match)
if len(tag.Name) > 0 {
note.Tags = append(note.Tags, tag)
}
}
for _, match := range reFile.FindAllStringIndex(content, -1) {
note.Files = append(note.Files, NewFile(content[match[0]:match[1]], match))
}
for _, match := range reImage.FindAllStringIndex(content, -1) {
note.Images = append(note.Images, NewImage(content[match[0]:match[1]], match))
}
return &note
}
// updatedItem is used to sort tags, images and files by their order
// of appearance in the file.
type updatedItem struct {
content string // tag, file or image content
position []int // position in file
}
// WriteNote converts the note back into a format suitable for Zettlr.
func (note *Note) WriteNote() string {
// Tags, Images and Files are all stored into a common list
var items []updatedItem
for _, item := range note.Tags {
items = append(items, updatedItem{item.String(), item.position})
}
for _, item := range note.Files {
items = append(items, updatedItem{item.String(), item.position})
}
for _, item := range note.Images {
items = append(items, updatedItem{item.String(), item.position})
}
// And sorted by their order of appearance in the file
// Note: this only works when items do not overlap (which hopefully
// is the case in most, if not all, markdown files).
sort.Slice(items, func(i, j int) bool {
return items[i].position[0] < items[j].position[1]
})
// Go through all items and copy the updated version of the item along
// with the interleaved original excerpts
var current int
var newContent strings.Builder
for _, item := range items {
newContent.WriteString(note.content[current:item.position[0]])
newContent.WriteString(item.content)
current = item.position[1]
}
newContent.WriteString(note.content[current:len(note.content)])
return newContent.String()
}