150 lines
4.2 KiB
Go
150 lines
4.2 KiB
Go
package core
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/md5"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os/exec"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/microcosm-cc/bluemonday"
|
|
"github.com/shurcooL/github_flavored_markdown"
|
|
)
|
|
|
|
// Regexps for Markdown use cases.
|
|
var (
|
|
// Match title from the first `# h1` heading.
|
|
reMarkdownTitle = regexp.MustCompile(`(?m:^#([^#\r\n]+)$)`)
|
|
|
|
// Match fenced code blocks with languages defined.
|
|
reFencedCode = regexp.MustCompile("```" + `([a-z]*)[\r\n]([\s\S]*?)[\r\n]\s*` + "```")
|
|
|
|
// Regexp to match fenced code blocks in rendered Markdown HTML.
|
|
// Tweak this if you change Markdown engines later.
|
|
reCodeBlock = regexp.MustCompile(`<div class="highlight highlight-(.+?)"><pre>(.+?)</pre></div>`)
|
|
reDecodeBlock = regexp.MustCompile(`\[?FENCED_CODE_%d_BLOCK?\]`)
|
|
)
|
|
|
|
// A container for parsed code blocks.
|
|
type codeBlock struct {
|
|
placeholder int
|
|
language string
|
|
source string
|
|
}
|
|
|
|
// TitleFromMarkdown tries to find a title from the source of a Markdown file.
|
|
//
|
|
// On error, returns "Untitled" along with the error. So if you're lazy and
|
|
// want a suitable default, you can safely ignore the error.
|
|
func TitleFromMarkdown(body string) (string, error) {
|
|
m := reMarkdownTitle.FindStringSubmatch(body)
|
|
if len(m) > 0 {
|
|
return m[1], nil
|
|
}
|
|
return "Untitled", errors.New(
|
|
"did not find a single h1 (denoted by # prefix) for Markdown title",
|
|
)
|
|
}
|
|
|
|
// RenderMarkdown renders markdown to HTML, safely. It uses blackfriday to
|
|
// render Markdown to HTML and then Bluemonday to sanitize the resulting HTML.
|
|
func (b *Blog) RenderMarkdown(input string) string {
|
|
unsafe := []byte(b.RenderTrustedMarkdown(input))
|
|
|
|
// Sanitize HTML, but allow fenced code blocks to not get mangled in user
|
|
// submitted comments.
|
|
p := bluemonday.UGCPolicy()
|
|
p.AllowAttrs("class").Matching(reFencedCodeClass).OnElements("code")
|
|
html := p.SanitizeBytes(unsafe)
|
|
return string(html)
|
|
}
|
|
|
|
// RenderTrustedMarkdown renders markdown to HTML, but without applying
|
|
// bluemonday filtering afterward. This is for blog posts and website
|
|
// Markdown pages, not for user-submitted comments or things.
|
|
func (b *Blog) RenderTrustedMarkdown(input string) string {
|
|
// Find and hang on to fenced code blocks.
|
|
codeBlocks := []codeBlock{}
|
|
matches := reFencedCode.FindAllStringSubmatch(input, -1)
|
|
for i, m := range matches {
|
|
language, source := m[1], m[2]
|
|
if language == "" {
|
|
continue
|
|
}
|
|
codeBlocks = append(codeBlocks, codeBlock{i, language, source})
|
|
|
|
input = strings.Replace(input, m[0], fmt.Sprintf(
|
|
"[?FENCED_CODE_%d_BLOCK?]",
|
|
i,
|
|
), 1)
|
|
}
|
|
|
|
// Render the HTML out.
|
|
html := string(github_flavored_markdown.Markdown([]byte(input)))
|
|
|
|
// Substitute fenced codes back in.
|
|
for _, block := range codeBlocks {
|
|
highlighted, _ := b.Pygmentize(block.language, block.source)
|
|
html = strings.Replace(html,
|
|
fmt.Sprintf("[?FENCED_CODE_%d_BLOCK?]", block.placeholder),
|
|
highlighted,
|
|
1,
|
|
)
|
|
}
|
|
|
|
return string(html)
|
|
}
|
|
|
|
// Pygmentize searches for fenced code blocks in rendered Markdown HTML
|
|
// and runs Pygments to syntax highlight it.
|
|
//
|
|
// On error the original given source is returned back.
|
|
//
|
|
// The rendered result is cached in Redis if available, because the CLI
|
|
// call takes ~0.6s which is slow if you're rendering a lot of code blocks.
|
|
func (b *Blog) Pygmentize(language, source string) (string, error) {
|
|
var result string
|
|
|
|
// Hash the source for the cache key.
|
|
h := md5.New()
|
|
io.WriteString(h, language+source)
|
|
hash := fmt.Sprintf("%x", h.Sum(nil))
|
|
cacheKey := "pygmentize:" + hash
|
|
|
|
// Do we have it cached?
|
|
if cached, err := b.Cache.Get(cacheKey); err == nil && len(cached) > 0 {
|
|
return string(cached), nil
|
|
}
|
|
|
|
// Defer to the `pygmentize` command
|
|
bin := "pygmentize"
|
|
if _, err := exec.LookPath(bin); err != nil {
|
|
return source, errors.New("pygmentize not installed")
|
|
}
|
|
|
|
cmd := exec.Command(bin, "-l"+language, "-f"+"html", "-O encoding=utf-8")
|
|
cmd.Stdin = strings.NewReader(source)
|
|
|
|
var out bytes.Buffer
|
|
cmd.Stdout = &out
|
|
|
|
var stderr bytes.Buffer
|
|
cmd.Stderr = &stderr
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
log.Error("Error running pygments: %s", stderr.String())
|
|
return source, err
|
|
}
|
|
|
|
result = out.String()
|
|
err := b.Cache.Set(cacheKey, []byte(result), 60*60*24) // cool md5's don't change
|
|
if err != nil {
|
|
log.Error("Couldn't cache Pygmentize output: %s", err)
|
|
}
|
|
|
|
return result, nil
|
|
}
|