Import wiki pages, render Markdown pages w/ Pygments

This commit is contained in:
Noah 2017-12-02 10:47:21 -08:00
parent 10071a8301
commit d730b4d43c
7 changed files with 271 additions and 8 deletions

View File

@ -16,6 +16,19 @@ cd ~/go/src/github.com/kirsle/blog
make run
```
## Syntax Highlighting with Pygments
To enable syntax highlighting within Markdown files (like with GitHub Flavored
Markdown), install [pygments](http://pygments.org) on your system. For example:
```
# Fedora/RHEL
sudo dnf install python3-pygments python3-pygments-markdown-lexer
# Debian
sudo apt install python3-pygments
```
# License
MIT.

View File

@ -6,8 +6,10 @@ import (
"bufio"
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
@ -26,6 +28,9 @@ var (
log *golog.Logger
inDB *jsondb.DB
outDB *jsondb.DB
urlSafe = regexp.MustCompile(`[^A-Za-z0-9/]`)
wikiLink = regexp.MustCompile(`\[\[(.+?)\]\]`)
)
func init() {
@ -45,15 +50,13 @@ func main() {
if inPath == "" || outPath == "" {
log.Error("Usage: rophako-import -in /opt/rophako/db -out /path/to/blog/root")
os.Exit(1)
}
if !strings.Contains(outPath, "/.private") {
outPath = strings.TrimSuffix(filepath.Join(outPath, ".private"), "/")
log.Info("Note: rewriting -out to: %s", outPath)
} else if strings.Contains(outPath, "/.private") {
log.Error("Do not provide the /.private suffix to -out, only the parent web root")
os.Exit(1)
}
inDB = jsondb.New(inPath)
outDB = jsondb.New(outPath)
outDB = jsondb.New(strings.TrimSuffix(filepath.Join(outPath, ".private"), "/"))
fmt.Printf(
"Importing Rophako DB from: %s\n"+
"Writing output JsonDB to: %s\n"+
@ -72,6 +75,7 @@ func main() {
// Migrate everything over.
migrateBlog()
migrateComments()
migrateWiki()
}
func migrateBlog() {
@ -197,6 +201,94 @@ func migrateComments() {
}
}
func migrateWiki() {
log.Warn("Migrating wiki...")
threads, err := inDB.List("wiki/pages")
if err != nil {
log.Error("No pages found: %s", err.Error())
return
}
// Prepare the output directory.
wikiPath := filepath.Join(outPath, "wiki")
if _, err = os.Stat(wikiPath); os.IsNotExist(err) {
err = os.Mkdir(wikiPath, 0755)
if err != nil {
log.Error("Can't create wiki root %s: %s", wikiPath, err)
return
}
}
for _, doc := range threads {
parts := strings.Split(doc, "/")
title := parts[len(parts)-1]
// File name is the title but with hyphens, to match the URL version.
filename := makeURLSafe(title)
page := &legacyWiki{}
err = inDB.Get(doc, &page)
if err != nil {
log.Error("Error reading wiki DB %s: %s", doc, err)
continue
}
// Take the highest revision as the current version to use.
var highest *legacyRevision
for _, rev := range page.Revisions {
if highest == nil || rev.Time > highest.Time {
highest = &rev
}
}
if highest == nil {
log.Error("Wiki page %s has no revisions?", doc)
continue
}
// Insert the title as the first <h1> in the Markdown.
markdown := fmt.Sprintf("# %s\n\n%s", title, highest.Body)
// Find and replace inter-wiki links with normal URIs.
links := wikiLink.FindAllStringSubmatch(markdown, -1)
for _, match := range links {
href, label := match[1], match[1]
if strings.Contains(label, "|") {
parts := strings.SplitN(label, "|", 2)
label, href = parts[0], parts[1]
}
href = "/wiki/" + makeURLSafe(href)
markdown = strings.Replace(
markdown,
match[0],
fmt.Sprintf("[%s](%s)", label, href),
1,
)
}
// Write the body into Markdown files.
path := filepath.Join(wikiPath, fmt.Sprintf("%s.md", filename))
log.Debug("Writing page '%s' to: %s", title, path)
err = ioutil.WriteFile(path, []byte(markdown), 0644)
if err != nil {
log.Error("Error writing: %s", err)
}
}
}
func makeURLSafe(input string) string {
return strings.Trim(
strings.Replace(
urlSafe.ReplaceAllString(input, "-"),
"--",
"-",
0,
),
"-",
)
}
func commit(document string, v interface{}) {
err := outDB.Commit(document, v)
if err != nil {
@ -229,3 +321,15 @@ type legacyComment struct {
type legacyThread map[string]legacyComment
type legacySubscribers map[string]float64
type legacyWiki struct {
Revisions []legacyRevision `json:"revisions"`
}
type legacyRevision struct {
Body string `json:"body"`
Author int `json:"author"`
Time float64 `json:"time"`
Note string `json:"note"`
ID string `json:"id"`
}

View File

@ -1,10 +1,52 @@
package core
import (
"bytes"
"errors"
"fmt"
"os/exec"
"regexp"
"strings"
"github.com/microcosm-cc/bluemonday"
"github.com/shurcooL/github_flavored_markdown"
)
// Regexps for Markdown use cases.
var (
// Match title from the first `# h1` heading.
reMarkdownTitle = regexp.MustCompile(`(?m:^#([^#\r\n]+)$)`)
// Match fenced code blocks with languages defined.
reFencedCode = regexp.MustCompile("```" + `([a-z]*)\n([\s\S]*?)\n\s*` + "```")
// Regexp to match fenced code blocks in rendered Markdown HTML.
// Tweak this if you change Markdown engines later.
reCodeBlock = regexp.MustCompile(`<div class="highlight highlight-(.+?)"><pre>(.+?)</pre></div>`)
reDecodeBlock = regexp.MustCompile(`\[?FENCED_CODE_%d_BLOCK?\]`)
)
// A container for parsed code blocks.
type codeBlock struct {
placeholder int
language string
source string
}
// TitleFromMarkdown tries to find a title from the source of a Markdown file.
//
// On error, returns "Untitled" along with the error. So if you're lazy and
// want a suitable default, you can safely ignore the error.
func TitleFromMarkdown(body string) (string, error) {
m := reMarkdownTitle.FindStringSubmatch(body)
if len(m) > 0 {
return m[1], nil
}
return "Untitled", errors.New(
"did not find a single h1 (denoted by # prefix) for Markdown title",
)
}
// RenderMarkdown renders markdown to HTML, safely. It uses blackfriday to
// render Markdown to HTML and then Bluemonday to sanitize the resulting HTML.
func (b *Blog) RenderMarkdown(input string) string {
@ -22,6 +64,65 @@ func (b *Blog) RenderMarkdown(input string) string {
// bluemonday filtering afterward. This is for blog posts and website
// Markdown pages, not for user-submitted comments or things.
func (b *Blog) RenderTrustedMarkdown(input string) string {
html := github_flavored_markdown.Markdown([]byte(input))
// Find and hang on to fenced code blocks.
codeBlocks := []codeBlock{}
log.Info("RE: %s", reFencedCode.String())
matches := reFencedCode.FindAllStringSubmatch(input, -1)
for i, m := range matches {
language, source := m[1], m[2]
if language == "" {
continue
}
codeBlocks = append(codeBlocks, codeBlock{i, language, source})
input = strings.Replace(input, m[0], fmt.Sprintf(
"[?FENCED_CODE_%d_BLOCK?]",
i,
), 1)
}
// Render the HTML out.
html := string(github_flavored_markdown.Markdown([]byte(input)))
// Substitute fenced codes back in.
for _, block := range codeBlocks {
highlighted, _ := Pygmentize(block.language, block.source)
html = strings.Replace(html,
fmt.Sprintf("[?FENCED_CODE_%d_BLOCK?]", block.placeholder),
highlighted,
1,
)
}
return string(html)
}
// Pygmentize searches for fenced code blocks in rendered Markdown HTML
// and runs Pygments to syntax highlight it.
//
// On error the original given source is returned back.
//
// TODO: this takes ~0.6s per go, we need something faster.
func Pygmentize(language, source string) (string, error) {
bin := "pygmentize"
if _, err := exec.LookPath(bin); err != nil {
return source, errors.New("pygmentize not installed")
}
cmd := exec.Command(bin, "-l"+language, "-f"+"html", "-O encoding=utf-8")
cmd.Stdin = strings.NewReader(source)
var out bytes.Buffer
cmd.Stdout = &out
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
log.Error("Error running pygments: %s", stderr.String())
return source, err
}
return out.String(), nil
}

View File

@ -2,6 +2,8 @@ package core
import (
"errors"
"html/template"
"io/ioutil"
"net/http"
"os"
"path/filepath"
@ -42,6 +44,26 @@ func (b *Blog) PageHandler(w http.ResponseWriter, r *http.Request) {
return
}
// Is it a Markdown file?
if strings.HasSuffix(filepath.URI, ".md") || strings.HasSuffix(filepath.URI, ".markdown") {
source, err := ioutil.ReadFile(filepath.Absolute)
if err != nil {
b.Error(w, r, "Couldn't read Markdown source!")
return
}
// Render it to HTML and find out its title.
body := string(source)
html := b.RenderTrustedMarkdown(body)
title, _ := TitleFromMarkdown(body)
b.RenderTemplate(w, r, ".markdown", NewVars(map[interface{}]interface{}{
"Title": title,
"HTML": template.HTML(html),
}))
return
}
http.ServeFile(w, r, filepath.Absolute)
}

View File

@ -60,9 +60,20 @@ func (b *Blog) Forbidden(w http.ResponseWriter, r *http.Request, message ...stri
}
}
// Error sends an HTTP 500 Internal Server Error response.
func (b *Blog) Error(w http.ResponseWriter, r *http.Request, message ...string) {
w.WriteHeader(http.StatusInternalServerError)
err := b.RenderTemplate(w, r, ".errors/500", &Vars{
Message: message[0],
})
if err != nil {
log.Error(err.Error())
w.Write([]byte("Unrecoverable template error for Error()"))
}
}
// BadRequest sends an HTTP 400 Bad Request.
func (b *Blog) BadRequest(w http.ResponseWriter, r *http.Request, message ...string) {
log.Error("HERE 4")
w.WriteHeader(http.StatusBadRequest)
err := b.RenderTemplate(w, r, ".errors/400", &Vars{
Message: message[0],

6
root/.errors/500.gohtml Normal file
View File

@ -0,0 +1,6 @@
{{ define "title" }}Internal Server Error{{ end }}
{{ define "content" }}
<h1>500 Internal Server Error</h1>
{{ .Message }}
{{ end }}

6
root/.markdown.gohtml Normal file
View File

@ -0,0 +1,6 @@
{{ define "title" }}{{ .Data.Title }}{{ end }}
{{ define "content" }}
{{ .Data.HTML }}
{{ end }}