From d730b4d43cfaf03ba4ea3437564eb71e7a406f9f Mon Sep 17 00:00:00 2001 From: Noah Petherbridge Date: Sat, 2 Dec 2017 10:47:21 -0800 Subject: [PATCH] Import wiki pages, render Markdown pages w/ Pygments --- README.md | 13 +++++ cmd/rophako-import/main.go | 116 +++++++++++++++++++++++++++++++++++-- core/markdown.go | 103 +++++++++++++++++++++++++++++++- core/pages.go | 22 +++++++ core/responses.go | 13 ++++- root/.errors/500.gohtml | 6 ++ root/.markdown.gohtml | 6 ++ 7 files changed, 271 insertions(+), 8 deletions(-) create mode 100644 root/.errors/500.gohtml create mode 100644 root/.markdown.gohtml diff --git a/README.md b/README.md index a3c6177..2c27d60 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,19 @@ cd ~/go/src/github.com/kirsle/blog make run ``` +## Syntax Highlighting with Pygments + +To enable syntax highlighting within Markdown files (like with GitHub Flavored +Markdown), install [pygments](http://pygments.org) on your system. For example: + +``` +# Fedora/RHEL +sudo dnf install python3-pygments python3-pygments-markdown-lexer + +# Debian +sudo apt install python3-pygments +``` + # License MIT. diff --git a/cmd/rophako-import/main.go b/cmd/rophako-import/main.go index 517e050..39e1559 100644 --- a/cmd/rophako-import/main.go +++ b/cmd/rophako-import/main.go @@ -6,8 +6,10 @@ import ( "bufio" "flag" "fmt" + "io/ioutil" "os" "path/filepath" + "regexp" "strconv" "strings" "time" @@ -26,6 +28,9 @@ var ( log *golog.Logger inDB *jsondb.DB outDB *jsondb.DB + + urlSafe = regexp.MustCompile(`[^A-Za-z0-9/]`) + wikiLink = regexp.MustCompile(`\[\[(.+?)\]\]`) ) func init() { @@ -45,15 +50,13 @@ func main() { if inPath == "" || outPath == "" { log.Error("Usage: rophako-import -in /opt/rophako/db -out /path/to/blog/root") os.Exit(1) - } - - if !strings.Contains(outPath, "/.private") { - outPath = strings.TrimSuffix(filepath.Join(outPath, ".private"), "/") - log.Info("Note: rewriting -out to: %s", outPath) + } else if strings.Contains(outPath, "/.private") { + log.Error("Do not provide the /.private suffix to -out, only the parent web root") + os.Exit(1) } inDB = jsondb.New(inPath) - outDB = jsondb.New(outPath) + outDB = jsondb.New(strings.TrimSuffix(filepath.Join(outPath, ".private"), "/")) fmt.Printf( "Importing Rophako DB from: %s\n"+ "Writing output JsonDB to: %s\n"+ @@ -72,6 +75,7 @@ func main() { // Migrate everything over. migrateBlog() migrateComments() + migrateWiki() } func migrateBlog() { @@ -197,6 +201,94 @@ func migrateComments() { } } +func migrateWiki() { + log.Warn("Migrating wiki...") + + threads, err := inDB.List("wiki/pages") + if err != nil { + log.Error("No pages found: %s", err.Error()) + return + } + + // Prepare the output directory. + wikiPath := filepath.Join(outPath, "wiki") + if _, err = os.Stat(wikiPath); os.IsNotExist(err) { + err = os.Mkdir(wikiPath, 0755) + if err != nil { + log.Error("Can't create wiki root %s: %s", wikiPath, err) + return + } + } + + for _, doc := range threads { + parts := strings.Split(doc, "/") + title := parts[len(parts)-1] + + // File name is the title but with hyphens, to match the URL version. + filename := makeURLSafe(title) + + page := &legacyWiki{} + err = inDB.Get(doc, &page) + if err != nil { + log.Error("Error reading wiki DB %s: %s", doc, err) + continue + } + + // Take the highest revision as the current version to use. + var highest *legacyRevision + for _, rev := range page.Revisions { + if highest == nil || rev.Time > highest.Time { + highest = &rev + } + } + if highest == nil { + log.Error("Wiki page %s has no revisions?", doc) + continue + } + + // Insert the title as the first

in the Markdown. + markdown := fmt.Sprintf("# %s\n\n%s", title, highest.Body) + + // Find and replace inter-wiki links with normal URIs. + links := wikiLink.FindAllStringSubmatch(markdown, -1) + for _, match := range links { + href, label := match[1], match[1] + if strings.Contains(label, "|") { + parts := strings.SplitN(label, "|", 2) + label, href = parts[0], parts[1] + } + href = "/wiki/" + makeURLSafe(href) + + markdown = strings.Replace( + markdown, + match[0], + fmt.Sprintf("[%s](%s)", label, href), + 1, + ) + } + + // Write the body into Markdown files. + path := filepath.Join(wikiPath, fmt.Sprintf("%s.md", filename)) + log.Debug("Writing page '%s' to: %s", title, path) + err = ioutil.WriteFile(path, []byte(markdown), 0644) + if err != nil { + log.Error("Error writing: %s", err) + } + } +} + +func makeURLSafe(input string) string { + return strings.Trim( + strings.Replace( + urlSafe.ReplaceAllString(input, "-"), + "--", + "-", + 0, + ), + "-", + ) +} + func commit(document string, v interface{}) { err := outDB.Commit(document, v) if err != nil { @@ -229,3 +321,15 @@ type legacyComment struct { type legacyThread map[string]legacyComment type legacySubscribers map[string]float64 + +type legacyWiki struct { + Revisions []legacyRevision `json:"revisions"` +} + +type legacyRevision struct { + Body string `json:"body"` + Author int `json:"author"` + Time float64 `json:"time"` + Note string `json:"note"` + ID string `json:"id"` +} diff --git a/core/markdown.go b/core/markdown.go index 68ade81..a54a39f 100644 --- a/core/markdown.go +++ b/core/markdown.go @@ -1,10 +1,52 @@ package core import ( + "bytes" + "errors" + "fmt" + "os/exec" + "regexp" + "strings" + "github.com/microcosm-cc/bluemonday" "github.com/shurcooL/github_flavored_markdown" ) +// Regexps for Markdown use cases. +var ( + // Match title from the first `# h1` heading. + reMarkdownTitle = regexp.MustCompile(`(?m:^#([^#\r\n]+)$)`) + + // Match fenced code blocks with languages defined. + reFencedCode = regexp.MustCompile("```" + `([a-z]*)\n([\s\S]*?)\n\s*` + "```") + + // Regexp to match fenced code blocks in rendered Markdown HTML. + // Tweak this if you change Markdown engines later. + reCodeBlock = regexp.MustCompile(`
(.+?)
`) + reDecodeBlock = regexp.MustCompile(`\[?FENCED_CODE_%d_BLOCK?\]`) +) + +// A container for parsed code blocks. +type codeBlock struct { + placeholder int + language string + source string +} + +// TitleFromMarkdown tries to find a title from the source of a Markdown file. +// +// On error, returns "Untitled" along with the error. So if you're lazy and +// want a suitable default, you can safely ignore the error. +func TitleFromMarkdown(body string) (string, error) { + m := reMarkdownTitle.FindStringSubmatch(body) + if len(m) > 0 { + return m[1], nil + } + return "Untitled", errors.New( + "did not find a single h1 (denoted by # prefix) for Markdown title", + ) +} + // RenderMarkdown renders markdown to HTML, safely. It uses blackfriday to // render Markdown to HTML and then Bluemonday to sanitize the resulting HTML. func (b *Blog) RenderMarkdown(input string) string { @@ -22,6 +64,65 @@ func (b *Blog) RenderMarkdown(input string) string { // bluemonday filtering afterward. This is for blog posts and website // Markdown pages, not for user-submitted comments or things. func (b *Blog) RenderTrustedMarkdown(input string) string { - html := github_flavored_markdown.Markdown([]byte(input)) + // Find and hang on to fenced code blocks. + codeBlocks := []codeBlock{} + log.Info("RE: %s", reFencedCode.String()) + matches := reFencedCode.FindAllStringSubmatch(input, -1) + for i, m := range matches { + language, source := m[1], m[2] + if language == "" { + continue + } + codeBlocks = append(codeBlocks, codeBlock{i, language, source}) + + input = strings.Replace(input, m[0], fmt.Sprintf( + "[?FENCED_CODE_%d_BLOCK?]", + i, + ), 1) + } + + // Render the HTML out. + html := string(github_flavored_markdown.Markdown([]byte(input))) + + // Substitute fenced codes back in. + for _, block := range codeBlocks { + highlighted, _ := Pygmentize(block.language, block.source) + html = strings.Replace(html, + fmt.Sprintf("[?FENCED_CODE_%d_BLOCK?]", block.placeholder), + highlighted, + 1, + ) + } + return string(html) } + +// Pygmentize searches for fenced code blocks in rendered Markdown HTML +// and runs Pygments to syntax highlight it. +// +// On error the original given source is returned back. +// +// TODO: this takes ~0.6s per go, we need something faster. +func Pygmentize(language, source string) (string, error) { + bin := "pygmentize" + + if _, err := exec.LookPath(bin); err != nil { + return source, errors.New("pygmentize not installed") + } + + cmd := exec.Command(bin, "-l"+language, "-f"+"html", "-O encoding=utf-8") + cmd.Stdin = strings.NewReader(source) + + var out bytes.Buffer + cmd.Stdout = &out + + var stderr bytes.Buffer + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + log.Error("Error running pygments: %s", stderr.String()) + return source, err + } + + return out.String(), nil +} diff --git a/core/pages.go b/core/pages.go index dc37291..7ce6de0 100644 --- a/core/pages.go +++ b/core/pages.go @@ -2,6 +2,8 @@ package core import ( "errors" + "html/template" + "io/ioutil" "net/http" "os" "path/filepath" @@ -42,6 +44,26 @@ func (b *Blog) PageHandler(w http.ResponseWriter, r *http.Request) { return } + // Is it a Markdown file? + if strings.HasSuffix(filepath.URI, ".md") || strings.HasSuffix(filepath.URI, ".markdown") { + source, err := ioutil.ReadFile(filepath.Absolute) + if err != nil { + b.Error(w, r, "Couldn't read Markdown source!") + return + } + + // Render it to HTML and find out its title. + body := string(source) + html := b.RenderTrustedMarkdown(body) + title, _ := TitleFromMarkdown(body) + + b.RenderTemplate(w, r, ".markdown", NewVars(map[interface{}]interface{}{ + "Title": title, + "HTML": template.HTML(html), + })) + return + } + http.ServeFile(w, r, filepath.Absolute) } diff --git a/core/responses.go b/core/responses.go index 87451fa..11126b3 100644 --- a/core/responses.go +++ b/core/responses.go @@ -60,9 +60,20 @@ func (b *Blog) Forbidden(w http.ResponseWriter, r *http.Request, message ...stri } } +// Error sends an HTTP 500 Internal Server Error response. +func (b *Blog) Error(w http.ResponseWriter, r *http.Request, message ...string) { + w.WriteHeader(http.StatusInternalServerError) + err := b.RenderTemplate(w, r, ".errors/500", &Vars{ + Message: message[0], + }) + if err != nil { + log.Error(err.Error()) + w.Write([]byte("Unrecoverable template error for Error()")) + } +} + // BadRequest sends an HTTP 400 Bad Request. func (b *Blog) BadRequest(w http.ResponseWriter, r *http.Request, message ...string) { - log.Error("HERE 4") w.WriteHeader(http.StatusBadRequest) err := b.RenderTemplate(w, r, ".errors/400", &Vars{ Message: message[0], diff --git a/root/.errors/500.gohtml b/root/.errors/500.gohtml new file mode 100644 index 0000000..2ca412f --- /dev/null +++ b/root/.errors/500.gohtml @@ -0,0 +1,6 @@ +{{ define "title" }}Internal Server Error{{ end }} +{{ define "content" }} +

500 Internal Server Error

+ +{{ .Message }} +{{ end }} diff --git a/root/.markdown.gohtml b/root/.markdown.gohtml new file mode 100644 index 0000000..43a7881 --- /dev/null +++ b/root/.markdown.gohtml @@ -0,0 +1,6 @@ +{{ define "title" }}{{ .Data.Title }}{{ end }} +{{ define "content" }} + +{{ .Data.HTML }} + +{{ end }}