220 lines
4.6 KiB
Go
220 lines
4.6 KiB
Go
package app
|
|
|
|
import (
|
|
"fmt"
|
|
"image"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"net/url"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// Insights parsed from a web URL.
|
|
type Insights struct {
|
|
Title string
|
|
Icons []Icon
|
|
}
|
|
|
|
// Icon found for the web URL.
|
|
type Icon struct {
|
|
Size string
|
|
URL string
|
|
Data image.Image // raw image data, if available
|
|
}
|
|
|
|
// Width returns the icon's width.
|
|
func (i Icon) Width() int {
|
|
parts := strings.Split(i.Size, "x")
|
|
if len(parts) > 1 {
|
|
w, _ := strconv.Atoi(parts[0])
|
|
return w
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// Parse a web page and return page insights.
|
|
func Parse(url string) (Insights, error) {
|
|
log.Printf("### Parse HTML on %s for Title and Icon URLs", url)
|
|
var (
|
|
result = Insights{
|
|
Icons: []Icon{},
|
|
}
|
|
inTag = ""
|
|
)
|
|
|
|
resp, err := http.Get(url)
|
|
if err != nil {
|
|
return result, fmt.Errorf("HTTP error: %s", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
z := html.NewTokenizer(resp.Body)
|
|
parsing := true
|
|
for parsing {
|
|
tt := z.Next()
|
|
token := z.Token()
|
|
switch tt {
|
|
case html.ErrorToken:
|
|
fmt.Printf("error: %s\n", z.Err())
|
|
if z.Err() == io.EOF {
|
|
// successful error condition
|
|
parsing = false
|
|
break
|
|
}
|
|
return result, fmt.Errorf("HTML parsing error: %s", z.Err())
|
|
|
|
case html.TextToken:
|
|
if inTag == "title" && result.Title == "" {
|
|
result.Title += token.Data
|
|
}
|
|
|
|
case html.StartTagToken:
|
|
inTag = token.Data
|
|
|
|
// Looking for <link rel="shortcut icon">
|
|
if token.Data == "link" {
|
|
attr := AttrDict(token)
|
|
rel, _ := attr["rel"]
|
|
sizes, _ := attr["sizes"]
|
|
href, _ := attr["href"]
|
|
|
|
// Ensure "//" URIs start with "https://"
|
|
href = AddScheme(href)
|
|
|
|
if rel == "shortcut icon" {
|
|
log.Printf(`Found <link rel="shortcut icon"> URL: %s`, href)
|
|
if sizes == "" {
|
|
sizes = "16x16"
|
|
}
|
|
|
|
// If an ico file, extract the PNGs.
|
|
if filepath.Ext(href) == ".ico" {
|
|
log.Printf("The favicon is a .ico file, extracting PNGs")
|
|
if resp, err := http.Get(href); err == nil {
|
|
if pngs, err := IcoToPNG(resp.Body); err == nil {
|
|
for _, png := range pngs {
|
|
size := png.Bounds().Size()
|
|
result.Icons = append(result.Icons, Icon{
|
|
Size: fmt.Sprintf("%dx%d", size.X, size.Y),
|
|
URL: href,
|
|
Data: png,
|
|
})
|
|
}
|
|
resp.Body.Close()
|
|
continue
|
|
} else {
|
|
log.Printf("Error extracting PNG from %s: %s", href, err)
|
|
}
|
|
resp.Body.Close()
|
|
} else {
|
|
log.Printf("HTTP error downloading %s: %s", href, err)
|
|
}
|
|
}
|
|
|
|
result.Icons = append(result.Icons, Icon{
|
|
Size: sizes,
|
|
URL: href,
|
|
})
|
|
}
|
|
}
|
|
|
|
case html.EndTagToken, html.SelfClosingTagToken:
|
|
inTag = ""
|
|
}
|
|
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// DetectIcons checks well-known icon URLs on a base domain.
|
|
func DetectIcons(weburl string) ([]Icon, error) {
|
|
var result = []Icon{}
|
|
|
|
uri, err := url.Parse(weburl)
|
|
if err != nil {
|
|
return result, err
|
|
}
|
|
|
|
baseURL := uri.Scheme + "://" + uri.Host
|
|
log.Printf("### Auto-detecting Icon URLs from site %s", baseURL)
|
|
|
|
tryURI := []string{
|
|
"/apple-touch-icon.png",
|
|
"/apple-touch-icon-180x180.png",
|
|
"/apple-touch-icon-152x152.png",
|
|
"/apple-touch-icon-144x144.png",
|
|
"/apple-touch-icon-120x120.png",
|
|
"/apple-touch-icon-114x114.png",
|
|
"/apple-touch-icon-76x76.png",
|
|
"/apple-touch-icon-72x72.png",
|
|
"/apple-touch-icon-57x57.png",
|
|
"/apple-touch-icon-60x60.png",
|
|
"/favicon.ico",
|
|
}
|
|
for _, uri := range tryURI {
|
|
resp, err := http.Get(baseURL + uri)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// PNG images?
|
|
if filepath.Ext(uri) == ".png" {
|
|
png, err := ParsePNG(resp.Body)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
size := png.Bounds().Size()
|
|
result = append(result, Icon{
|
|
Size: fmt.Sprintf("%dx%d", size.X, size.Y),
|
|
URL: baseURL + uri,
|
|
})
|
|
} else if filepath.Ext(uri) == ".ico" {
|
|
// Extract the PNG images from the icon.
|
|
pngs, err := IcoToPNG(resp.Body)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
for _, png := range pngs {
|
|
size := png.Bounds().Size()
|
|
result = append(result, Icon{
|
|
Size: fmt.Sprintf("%dx%d", size.X, size.Y),
|
|
URL: baseURL + uri,
|
|
Data: png,
|
|
})
|
|
}
|
|
}
|
|
|
|
log.Printf("Found icon: %s", uri)
|
|
_ = resp
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// AttrDict converts an HTML Token attributes list into a hash map.
|
|
func AttrDict(token html.Token) map[string]string {
|
|
var result = map[string]string{}
|
|
for _, attr := range token.Attr {
|
|
result[attr.Key] = attr.Val
|
|
}
|
|
return result
|
|
}
|
|
|
|
// AddScheme ensures an HTTP URL has a valid scheme, converting "//" into
|
|
// "https://"
|
|
func AddScheme(uri string) string {
|
|
if strings.HasPrefix(uri, "//") {
|
|
return "https:" + uri
|
|
}
|
|
return uri
|
|
}
|