Add visitor tracking code

This commit is contained in:
Noah 2014-12-02 15:46:13 -08:00
parent a1b2550c0b
commit 67a30e2471
16 changed files with 377 additions and 31 deletions

View File

@ -135,3 +135,4 @@ load_plugin("rophako.modules.photo")
load_plugin("rophako.modules.comment") load_plugin("rophako.modules.comment")
load_plugin("rophako.modules.emoticons") load_plugin("rophako.modules.emoticons")
load_plugin("rophako.modules.contact") load_plugin("rophako.modules.contact")
load_plugin("rophako.modules.tracking")

View File

@ -26,6 +26,7 @@ BLUEPRINT_PATHS = []
import config import config
from rophako import __version__ from rophako import __version__
from rophako.plugin import load_plugin from rophako.plugin import load_plugin
import rophako.model.tracking as Tracking
import rophako.utils import rophako.utils
app.DEBUG = config.DEBUG app.DEBUG = config.DEBUG
@ -78,7 +79,8 @@ def before_request():
"uid": 0, "uid": 0,
"name": "Guest", "name": "Guest",
"role": "user", "role": "user",
} },
"tracking": Tracking.track_visit(request, session),
} }
# Default session vars. # Default session vars.

View File

@ -19,7 +19,7 @@ redis_client = None
cache_lifetime = 60*60 # 1 hour cache_lifetime = 60*60 # 1 hour
def get(document): def get(document, cache=True):
"""Get a specific document from the DB.""" """Get a specific document from the DB."""
logger.debug("JsonDB: GET {}".format(document)) logger.debug("JsonDB: GET {}".format(document))
@ -32,7 +32,7 @@ def get(document):
stat = os.stat(path) stat = os.stat(path)
# Do we have it cached? # Do we have it cached?
data = get_cache(document) data = get_cache(document) if cache else None
if data: if data:
# Check if the cache is fresh. # Check if the cache is fresh.
if stat.st_mtime > get_cache(document+"_mtime"): if stat.st_mtime > get_cache(document+"_mtime"):
@ -45,12 +45,13 @@ def get(document):
data = read_json(path) data = read_json(path)
# Cache and return it. # Cache and return it.
set_cache(document, data, expires=cache_lifetime) if cache:
set_cache(document+"_mtime", stat.st_mtime, expires=cache_lifetime) set_cache(document, data, expires=cache_lifetime)
set_cache(document+"_mtime", stat.st_mtime, expires=cache_lifetime)
return data return data
def commit(document, data): def commit(document, data, cache=True):
"""Insert/update a document in the DB.""" """Insert/update a document in the DB."""
# Need to create the file? # Need to create the file?
@ -69,8 +70,9 @@ def commit(document, data):
os.mkdir(segment, 0o755) os.mkdir(segment, 0o755)
# Update the cached document. # Update the cached document.
set_cache(document, data, expires=cache_lifetime) if cache:
set_cache(document+"_mtime", time.time(), expires=cache_lifetime) set_cache(document, data, expires=cache_lifetime)
set_cache(document+"_mtime", time.time(), expires=cache_lifetime)
# Write the JSON. # Write the JSON.
write_json(path, data) write_json(path, data)
@ -82,6 +84,7 @@ def delete(document):
if os.path.isfile(path): if os.path.isfile(path):
logger.info("Delete DB document: {}".format(path)) logger.info("Delete DB document: {}".format(path))
os.unlink(path) os.unlink(path)
del_cache(document)
def exists(document): def exists(document):

View File

@ -3,7 +3,7 @@
"""Photo album models.""" """Photo album models."""
import os import os
from flask import g, request from flask import g
import time import time
import requests import requests
from PIL import Image from PIL import Image
@ -12,7 +12,7 @@ import random
import config import config
import rophako.jsondb as JsonDB import rophako.jsondb as JsonDB
from rophako.utils import sanitize_name from rophako.utils import sanitize_name, remote_addr
from rophako.log import logger from rophako.log import logger
# Maps the friendly names of photo sizes with their pixel values from config. # Maps the friendly names of photo sizes with their pixel values from config.
@ -523,7 +523,7 @@ def process_photo(form, filename):
} }
index["albums"][album][key] = dict( index["albums"][album][key] = dict(
ip=request.remote_addr, ip=remote_addr(),
author=g.info["session"]["uid"], author=g.info["session"]["uid"],
uploaded=int(time.time()), uploaded=int(time.time()),
caption=form.get("caption", ""), caption=form.get("caption", ""),

175
rophako/model/tracking.py Normal file
View File

@ -0,0 +1,175 @@
# -*- coding: utf-8 -*-
"""Visitor tracking models."""
import time
import requests
import rophako.jsondb as JsonDB
from rophako.utils import remote_addr, pretty_time, server_name
def track_visit(request, session):
"""Main logic to track and log visitor details."""
# Get their tracking cookie value. The value will either be their HTTP
# referrer (if exists and valid) or else a "1".
cookie = session.get("tracking")
addr = remote_addr()
values = dict() # Returnable traffic values
# Log hit counts. We need four kinds:
# - Unique today - Unique total
# - Hits today - Hits total
today = pretty_time("%Y-%m-%d", time.time())
files = {
"unique/{}".format(today) : "unique_today",
"unique/total" : "unique_total",
"hits/{}".format(today) : "hits_today",
"hits/total" : "hits_total",
}
# Go through the hit count files. Update them only if their tracking
# cookie was not present.
for file, key in files.items():
dbfile = "traffic/{}".format(file)
if file.startswith("hits"):
# Hit file is just a simple counter.
db = dict(hits=0)
if JsonDB.exists(dbfile):
db = JsonDB.get(dbfile)
# Update it?
if not cookie:
db["hits"] += 1
JsonDB.commit(dbfile, db)
# Store the copy.
values[key] = db["hits"]
else:
# Unique file is a collection of IP addresses.
db = dict()
if JsonDB.exists(dbfile):
db = JsonDB.get(dbfile)
# Update with their IP?
if not cookie and not addr in db:
db[addr] = time.time()
JsonDB.commit(dbfile, db)
# Store the copy.
values[key] = len(db.keys())
# Log their HTTP referrer.
referrer = "1"
if request.referrer:
# Branch and check this.
referrer = log_referrer(request, request.referrer)
if not referrer:
# Wasn't a valid referrer.
referrer = "1"
# Set their tracking cookie.
if not cookie:
cookie = referrer
session["tracking"] = cookie
return values
def log_referrer(request, link):
"""Double check the referring URL."""
# Ignore if same domain.
if link.startswith(request.url_root):
print "Referrer is same host!"
return None
# See if the URL really links back to us.
hostname = server_name()
r = requests.get(link)
if hostname in r.text:
# Log it.
db = list()
if JsonDB.exists("traffic/referrers"):
# Don't cache the result -- the list can get huge!
db = JsonDB.get("traffic/referrers", cache=False)
db.append(link)
JsonDB.commit("traffic/referrers", db, cache=False)
return link
return None
def get_visitor_details():
"""Retrieve detailed visitor information for the frontend."""
result = {
"traffic": [], # Historical traffic data
"most_unique": [ "0000-00-00", 0 ], # Day with the most unique
"most_hits": [ "0000-00-00", 0 ], # Day with the most hits
"oldest": None, # Oldest day on record.
}
# List all the documents.
hits = JsonDB.list_docs("traffic/hits")
for date in sorted(hits):
if date == "total": continue
if not result["oldest"]:
result["oldest"] = date
# Get the DBs.
hits_db = JsonDB.get("traffic/hits/{}".format(date), cache=False)
uniq_db = JsonDB.get("traffic/unique/{}".format(date), cache=False)
# Most we've seen?
if hits_db["hits"] > result["most_hits"][1]:
result["most_hits"] = [ date, hits_db["hits"] ]
if len(uniq_db.keys()) > result["most_unique"][1]:
result["most_unique"] = [ date, len(uniq_db.keys()) ]
result["traffic"].append(dict(
date=date,
hits=hits_db["hits"],
unique=len(uniq_db.keys()),
))
return result
def get_referrers(recent=25):
"""Retrieve the referrer details. Returns results in this format:
```
{
referrers: [
["http://...", 20], # Pre-sorted by number of hits
],
recent: [ recent list ]
}
```
"""
db = []
if JsonDB.exists("traffic/referrers"):
db = JsonDB.get("traffic/referrers", cache=False)
# Count the links.
unique = dict()
for link in db:
if not link in unique:
unique[link] = 1
else:
unique[link] += 1
# Sort them by popularity.
result = dict(
referrers=[],
recent=[],
)
sorted_links = sorted(unique.keys(), key=lambda x: unique[x], reverse=True)
for link in sorted_links:
result["referrers"].append([ link, unique[link] ])
recent = 0 - recent
result["recent"] = db[recent:]
return result

View File

@ -3,7 +3,6 @@
"""Endpoints for admin functions.""" """Endpoints for admin functions."""
from flask import g, Blueprint, request, redirect, url_for, session, flash from flask import g, Blueprint, request, redirect, url_for, session, flash
import re
import rophako.model.user as User import rophako.model.user as User
from rophako.modules.account import validate_create_form from rophako.modules.account import validate_create_form

View File

@ -2,10 +2,8 @@
"""Endpoints for the web blog.""" """Endpoints for the web blog."""
from flask import Blueprint, g, request, redirect, url_for, session, flash, make_response from flask import Blueprint, g, request, redirect, url_for, flash, make_response
import re
import datetime import datetime
import calendar
import time import time
from xml.dom.minidom import Document from xml.dom.minidom import Document
@ -13,7 +11,8 @@ import rophako.model.user as User
import rophako.model.blog as Blog import rophako.model.blog as Blog
import rophako.model.comment as Comment import rophako.model.comment as Comment
import rophako.model.emoticons as Emoticons import rophako.model.emoticons as Emoticons
from rophako.utils import template, render_markdown, pretty_time, login_required from rophako.utils import (template, render_markdown, pretty_time,
login_required, remote_addr)
from rophako.plugin import load_plugin from rophako.plugin import load_plugin
from rophako.log import logger from rophako.log import logger
from config import * from config import *
@ -264,7 +263,7 @@ def update():
avatar = g.info["avatar"], avatar = g.info["avatar"],
categories = tags, categories = tags,
privacy = g.info["privacy"], privacy = g.info["privacy"],
ip = request.remote_addr, ip = remote_addr(),
emoticons = g.info["emoticons"], emoticons = g.info["emoticons"],
comments = g.info["comments"], comments = g.info["comments"],
format = g.info["format"], format = g.info["format"],

View File

@ -8,7 +8,8 @@ import time
import rophako.model.user as User import rophako.model.user as User
import rophako.model.comment as Comment import rophako.model.comment as Comment
from rophako.utils import template, pretty_time, login_required, sanitize_name from rophako.utils import (template, pretty_time, login_required, sanitize_name,
remote_addr)
from rophako.plugin import load_plugin from rophako.plugin import load_plugin
from rophako.log import logger from rophako.log import logger
from config import * from config import *
@ -48,7 +49,7 @@ def preview():
Comment.add_comment( Comment.add_comment(
thread=thread, thread=thread,
uid=g.info["session"]["uid"], uid=g.info["session"]["uid"],
ip=request.remote_addr, ip=remote_addr(),
time=int(time.time()), time=int(time.time()),
image=gravatar, image=gravatar,
name=form["name"], name=form["name"],

View File

@ -2,12 +2,9 @@
"""Endpoints for contacting the site owner.""" """Endpoints for contacting the site owner."""
from flask import Blueprint, g, request, redirect, url_for, session, flash from flask import Blueprint, request, redirect, url_for, flash
import re
import time
from rophako.utils import template, send_email from rophako.utils import template, send_email, remote_addr
from rophako.log import logger
from config import * from config import *
mod = Blueprint("contact", __name__, url_prefix="/contact") mod = Blueprint("contact", __name__, url_prefix="/contact")
@ -59,7 +56,7 @@ Subject: {subject}
{message}""".format( {message}""".format(
site_name=SITE_NAME, site_name=SITE_NAME,
ip=request.remote_addr, ip=remote_addr(),
ua=request.user_agent.string, ua=request.user_agent.string,
referer=request.headers.get("Referer", ""), referer=request.headers.get("Referer", ""),
name=name, name=name,

View File

@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
"""Endpoints for visitor tracking functions."""
from flask import Blueprint, g
import re
import rophako.model.tracking as Tracking
from rophako.utils import template
mod = Blueprint("tracking", __name__, url_prefix="/tracking")
@mod.route("/")
def index():
return template("tracking/index.html")
@mod.route("/visitors")
def visitors():
g.info["history"] = Tracking.get_visitor_details()
return template("tracking/visitors.html")
@mod.route("/referrers")
def referrers():
g.info["referrers"] = Tracking.get_referrers()
# Filter some of the links.
for i, link in enumerate(g.info["referrers"]["referrers"]):
# Clean up useless Google links.
if "google" in link[0] and re.search(r'/(?:imgres|url|search|translate\w+)?/', link[0]):
g.info["referrers"]["referrers"][i] = None
# Make the links word-wrap properly.
filtered = [
[ re.sub(r'(.{20})', r'\1<wbr>', x[0]), x[1] ]
for x in g.info["referrers"]["referrers"]
if x is not None
]
g.info["referrers"]["referrers"] = filtered
return template("tracking/referrers.html")

View File

@ -0,0 +1,13 @@
{% extends "layout.html" %}
{% block title %}Visitor Tracking{% endblock %}
{% block content %}
<h1>Visitor Tracking</h1>
<ul>
<li><a href="{{ url_for('tracking.visitors') }}">Unique Visitors &amp; Visits</a></li>
<li><a href="{{ url_for('tracking.referrers') }}">HTTP Referrers</a></li>
</ul>
{% endblock %}

View File

@ -0,0 +1,44 @@
{% extends "layout.html" %}
{% block title %}Referring URLs{% endblock %}
{% block content %}
<h1>Referring URLs</h1>
This table lists the HTTP referrers to this site, in order of popularity. For
the most recent 25 links, see <a href="#recent">the end of this page</a>.<p>
<div style="height: 450px; overflow: auto">
<table class="table" width="100%">
<thead>
<tr>
<th width="40">Hits</th>
<th>Query</th>
</tr>
</thead>
<tbody>
{% for link in referrers["referrers"] %}
{% if link %}
<tr>
<td align="center" valign="top">
{{ link[1] }}
</td>
<td align="left" valign="top" style="position: relative">
{{ link[0]|safe }}
</td>
</tr>
{% endif %}
{% endfor %}
</tbody>
</table>
</div>
<h2 id="recent">25 Most Recent Links</h2>
<ol>
{% for item in referrers["recent"] %}
<li>{{ item }}</li>
{% endfor %}
</ol>
{% endblock %}

View File

@ -0,0 +1,54 @@
{% extends "layout.html" %}
{% block title %}Visitor History{% endblock %}
{% block content %}
<h1>Visitor History</h1>
Unique visitors and hit counts have been logged on this site since
{{ history["oldest"] }}.<p>
The most unique visitors on this site in one day has been
{{ history["most_unique"][1] }} on {{ history["most_unique"][0] }}. The most
hits total in one day has been {{ history["most_hits"][1] }} on
{{ history["most_hits"][0] }}.<p>
Here is a full list of hits over time. Percentages are relative to the current
records.<p>
<table class="table" width="100%" border="0" cellspacing="2" cellpadding="2">
<thead>
<tr>
<th width="20">Date</th>
<th>Graph</th>
<th width="250">Details</th>
</tr>
</thead>
<tbody>
{% for date in history["traffic"]|reverse %}
<tr>
<td align="center" valign="middle" rowspan="2">
{{ date["date"] }}
</td>
<td align="left" valign="middle">
{% set pct = (date["unique"] / history["most_unique"][1]) * 100 %}
<div class="visitor-graph unique" style="width: {{ pct|int }}%"></div>
</td>
<td align="left" valign="middle">
Unique: {{ date["unique"] }} ({{ pct|int }}%)
</td>
</tr>
<tr>
<td align="left" valign="middle">
{% set pct = (date["unique"] / history["most_unique"][1]) * 100 %}
<div class="visitor-graph hits" style="width: {{ pct|int }}%"></div>
</td>
<td align="left" valign="middle">
Hits: {{ date["hits"] }} ({{ pct|int }}%)
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endblock %}

View File

@ -11,6 +11,7 @@ import importlib
import smtplib import smtplib
import markdown import markdown
import json import json
import urlparse
from rophako.log import logger from rophako.log import logger
from config import * from config import *
@ -211,6 +212,20 @@ def include(endpoint, *args, **kwargs):
return html return html
def remote_addr():
"""Retrieve the end user's remote IP address."""
# TODO: eventually support configurations with X-Forwarded-For, but for
# now at least we're centralizing this in one spot.
return request.remote_addr
def server_name():
"""Get the server's hostname."""
urlparts = list(urlparse.urlparse(request.url_root))
return urlparts[1]
def pretty_time(time_format, unix): def pretty_time(time_format, unix):
"""Pretty-print a time stamp.""" """Pretty-print a time stamp."""
date = datetime.datetime.fromtimestamp(unix) date = datetime.datetime.fromtimestamp(unix)

Binary file not shown.

After

Width:  |  Height:  |  Size: 306 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 306 B