diff --git a/config-sample.py b/config-sample.py index ae1803c..93b8a25 100644 --- a/config-sample.py +++ b/config-sample.py @@ -134,4 +134,5 @@ load_plugin("rophako.modules.blog") load_plugin("rophako.modules.photo") load_plugin("rophako.modules.comment") load_plugin("rophako.modules.emoticons") -load_plugin("rophako.modules.contact") \ No newline at end of file +load_plugin("rophako.modules.contact") +load_plugin("rophako.modules.tracking") diff --git a/rophako/app.py b/rophako/app.py index f19cdf2..bbd4de3 100644 --- a/rophako/app.py +++ b/rophako/app.py @@ -26,6 +26,7 @@ BLUEPRINT_PATHS = [] import config from rophako import __version__ from rophako.plugin import load_plugin +import rophako.model.tracking as Tracking import rophako.utils app.DEBUG = config.DEBUG @@ -78,7 +79,8 @@ def before_request(): "uid": 0, "name": "Guest", "role": "user", - } + }, + "tracking": Tracking.track_visit(request, session), } # Default session vars. @@ -158,4 +160,4 @@ def not_found(error): @app.errorhandler(403) def forbidden(error): - return render_template('errors/403.html', **g.info), 403 \ No newline at end of file + return render_template('errors/403.html', **g.info), 403 diff --git a/rophako/jsondb.py b/rophako/jsondb.py index 5a5d016..7bcc3d1 100644 --- a/rophako/jsondb.py +++ b/rophako/jsondb.py @@ -19,7 +19,7 @@ redis_client = None cache_lifetime = 60*60 # 1 hour -def get(document): +def get(document, cache=True): """Get a specific document from the DB.""" logger.debug("JsonDB: GET {}".format(document)) @@ -32,7 +32,7 @@ def get(document): stat = os.stat(path) # Do we have it cached? - data = get_cache(document) + data = get_cache(document) if cache else None if data: # Check if the cache is fresh. if stat.st_mtime > get_cache(document+"_mtime"): @@ -45,12 +45,13 @@ def get(document): data = read_json(path) # Cache and return it. - set_cache(document, data, expires=cache_lifetime) - set_cache(document+"_mtime", stat.st_mtime, expires=cache_lifetime) + if cache: + set_cache(document, data, expires=cache_lifetime) + set_cache(document+"_mtime", stat.st_mtime, expires=cache_lifetime) return data -def commit(document, data): +def commit(document, data, cache=True): """Insert/update a document in the DB.""" # Need to create the file? @@ -69,8 +70,9 @@ def commit(document, data): os.mkdir(segment, 0o755) # Update the cached document. - set_cache(document, data, expires=cache_lifetime) - set_cache(document+"_mtime", time.time(), expires=cache_lifetime) + if cache: + set_cache(document, data, expires=cache_lifetime) + set_cache(document+"_mtime", time.time(), expires=cache_lifetime) # Write the JSON. write_json(path, data) @@ -82,6 +84,7 @@ def delete(document): if os.path.isfile(path): logger.info("Delete DB document: {}".format(path)) os.unlink(path) + del_cache(document) def exists(document): @@ -216,4 +219,4 @@ def del_cache(key): """Delete a cached item.""" key = config.REDIS_PREFIX + key client = get_redis() - client.delete(key) \ No newline at end of file + client.delete(key) diff --git a/rophako/model/photo.py b/rophako/model/photo.py index 36ad42c..33b53a5 100644 --- a/rophako/model/photo.py +++ b/rophako/model/photo.py @@ -3,7 +3,7 @@ """Photo album models.""" import os -from flask import g, request +from flask import g import time import requests from PIL import Image @@ -12,7 +12,7 @@ import random import config import rophako.jsondb as JsonDB -from rophako.utils import sanitize_name +from rophako.utils import sanitize_name, remote_addr from rophako.log import logger # Maps the friendly names of photo sizes with their pixel values from config. @@ -523,7 +523,7 @@ def process_photo(form, filename): } index["albums"][album][key] = dict( - ip=request.remote_addr, + ip=remote_addr(), author=g.info["session"]["uid"], uploaded=int(time.time()), caption=form.get("caption", ""), diff --git a/rophako/model/tracking.py b/rophako/model/tracking.py new file mode 100644 index 0000000..d5233e0 --- /dev/null +++ b/rophako/model/tracking.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- + +"""Visitor tracking models.""" + +import time +import requests + +import rophako.jsondb as JsonDB +from rophako.utils import remote_addr, pretty_time, server_name + +def track_visit(request, session): + """Main logic to track and log visitor details.""" + + # Get their tracking cookie value. The value will either be their HTTP + # referrer (if exists and valid) or else a "1". + cookie = session.get("tracking") + addr = remote_addr() + values = dict() # Returnable traffic values + + # Log hit counts. We need four kinds: + # - Unique today - Unique total + # - Hits today - Hits total + today = pretty_time("%Y-%m-%d", time.time()) + files = { + "unique/{}".format(today) : "unique_today", + "unique/total" : "unique_total", + "hits/{}".format(today) : "hits_today", + "hits/total" : "hits_total", + } + + # Go through the hit count files. Update them only if their tracking + # cookie was not present. + for file, key in files.items(): + dbfile = "traffic/{}".format(file) + if file.startswith("hits"): + # Hit file is just a simple counter. + db = dict(hits=0) + if JsonDB.exists(dbfile): + db = JsonDB.get(dbfile) + + # Update it? + if not cookie: + db["hits"] += 1 + JsonDB.commit(dbfile, db) + + # Store the copy. + values[key] = db["hits"] + else: + # Unique file is a collection of IP addresses. + db = dict() + if JsonDB.exists(dbfile): + db = JsonDB.get(dbfile) + + # Update with their IP? + if not cookie and not addr in db: + db[addr] = time.time() + JsonDB.commit(dbfile, db) + + # Store the copy. + values[key] = len(db.keys()) + + # Log their HTTP referrer. + referrer = "1" + if request.referrer: + # Branch and check this. + referrer = log_referrer(request, request.referrer) + if not referrer: + # Wasn't a valid referrer. + referrer = "1" + + # Set their tracking cookie. + if not cookie: + cookie = referrer + session["tracking"] = cookie + + return values + + +def log_referrer(request, link): + """Double check the referring URL.""" + + # Ignore if same domain. + if link.startswith(request.url_root): + print "Referrer is same host!" + return None + + # See if the URL really links back to us. + hostname = server_name() + r = requests.get(link) + if hostname in r.text: + # Log it. + db = list() + if JsonDB.exists("traffic/referrers"): + # Don't cache the result -- the list can get huge! + db = JsonDB.get("traffic/referrers", cache=False) + db.append(link) + JsonDB.commit("traffic/referrers", db, cache=False) + return link + + return None + + +def get_visitor_details(): + """Retrieve detailed visitor information for the frontend.""" + result = { + "traffic": [], # Historical traffic data + "most_unique": [ "0000-00-00", 0 ], # Day with the most unique + "most_hits": [ "0000-00-00", 0 ], # Day with the most hits + "oldest": None, # Oldest day on record. + } + + # List all the documents. + hits = JsonDB.list_docs("traffic/hits") + for date in sorted(hits): + if date == "total": continue + if not result["oldest"]: + result["oldest"] = date + + # Get the DBs. + hits_db = JsonDB.get("traffic/hits/{}".format(date), cache=False) + uniq_db = JsonDB.get("traffic/unique/{}".format(date), cache=False) + + # Most we've seen? + if hits_db["hits"] > result["most_hits"][1]: + result["most_hits"] = [ date, hits_db["hits"] ] + if len(uniq_db.keys()) > result["most_unique"][1]: + result["most_unique"] = [ date, len(uniq_db.keys()) ] + + result["traffic"].append(dict( + date=date, + hits=hits_db["hits"], + unique=len(uniq_db.keys()), + )) + + return result + + +def get_referrers(recent=25): + """Retrieve the referrer details. Returns results in this format: + + ``` + { + referrers: [ + ["http://...", 20], # Pre-sorted by number of hits + ], + recent: [ recent list ] + } + ``` + """ + db = [] + if JsonDB.exists("traffic/referrers"): + db = JsonDB.get("traffic/referrers", cache=False) + + # Count the links. + unique = dict() + for link in db: + if not link in unique: + unique[link] = 1 + else: + unique[link] += 1 + + # Sort them by popularity. + result = dict( + referrers=[], + recent=[], + ) + + sorted_links = sorted(unique.keys(), key=lambda x: unique[x], reverse=True) + for link in sorted_links: + result["referrers"].append([ link, unique[link] ]) + + recent = 0 - recent + result["recent"] = db[recent:] + + return result diff --git a/rophako/modules/admin/__init__.py b/rophako/modules/admin/__init__.py index 463b08e..d64b54b 100644 --- a/rophako/modules/admin/__init__.py +++ b/rophako/modules/admin/__init__.py @@ -3,7 +3,6 @@ """Endpoints for admin functions.""" from flask import g, Blueprint, request, redirect, url_for, session, flash -import re import rophako.model.user as User from rophako.modules.account import validate_create_form @@ -180,4 +179,4 @@ def unimpersonate(): ) flash("No longer impersonating.") - return redirect(url_for("index")) \ No newline at end of file + return redirect(url_for("index")) diff --git a/rophako/modules/blog/__init__.py b/rophako/modules/blog/__init__.py index 341bd5c..71f19c9 100644 --- a/rophako/modules/blog/__init__.py +++ b/rophako/modules/blog/__init__.py @@ -2,10 +2,8 @@ """Endpoints for the web blog.""" -from flask import Blueprint, g, request, redirect, url_for, session, flash, make_response -import re +from flask import Blueprint, g, request, redirect, url_for, flash, make_response import datetime -import calendar import time from xml.dom.minidom import Document @@ -13,7 +11,8 @@ import rophako.model.user as User import rophako.model.blog as Blog import rophako.model.comment as Comment import rophako.model.emoticons as Emoticons -from rophako.utils import template, render_markdown, pretty_time, login_required +from rophako.utils import (template, render_markdown, pretty_time, + login_required, remote_addr) from rophako.plugin import load_plugin from rophako.log import logger from config import * @@ -264,7 +263,7 @@ def update(): avatar = g.info["avatar"], categories = tags, privacy = g.info["privacy"], - ip = request.remote_addr, + ip = remote_addr(), emoticons = g.info["emoticons"], comments = g.info["comments"], format = g.info["format"], diff --git a/rophako/modules/comment/__init__.py b/rophako/modules/comment/__init__.py index 5594ede..1fd9079 100644 --- a/rophako/modules/comment/__init__.py +++ b/rophako/modules/comment/__init__.py @@ -8,7 +8,8 @@ import time import rophako.model.user as User import rophako.model.comment as Comment -from rophako.utils import template, pretty_time, login_required, sanitize_name +from rophako.utils import (template, pretty_time, login_required, sanitize_name, + remote_addr) from rophako.plugin import load_plugin from rophako.log import logger from config import * @@ -48,7 +49,7 @@ def preview(): Comment.add_comment( thread=thread, uid=g.info["session"]["uid"], - ip=request.remote_addr, + ip=remote_addr(), time=int(time.time()), image=gravatar, name=form["name"], @@ -216,4 +217,4 @@ def get_comment_form(form): contact = request.form.get("contact", ""), message = request.form.get("message", ""), subscribe = request.form.get("subscribe", "false"), - ) \ No newline at end of file + ) diff --git a/rophako/modules/contact/__init__.py b/rophako/modules/contact/__init__.py index 6b52608..6c37d56 100644 --- a/rophako/modules/contact/__init__.py +++ b/rophako/modules/contact/__init__.py @@ -2,12 +2,9 @@ """Endpoints for contacting the site owner.""" -from flask import Blueprint, g, request, redirect, url_for, session, flash -import re -import time +from flask import Blueprint, request, redirect, url_for, flash -from rophako.utils import template, send_email -from rophako.log import logger +from rophako.utils import template, send_email, remote_addr from config import * mod = Blueprint("contact", __name__, url_prefix="/contact") @@ -59,7 +56,7 @@ Subject: {subject} {message}""".format( site_name=SITE_NAME, - ip=request.remote_addr, + ip=remote_addr(), ua=request.user_agent.string, referer=request.headers.get("Referer", ""), name=name, @@ -70,4 +67,4 @@ Subject: {subject} ) flash("Your message has been delivered.") - return redirect(url_for("index")) \ No newline at end of file + return redirect(url_for("index")) diff --git a/rophako/modules/tracking/__init__.py b/rophako/modules/tracking/__init__.py new file mode 100644 index 0000000..7a040f4 --- /dev/null +++ b/rophako/modules/tracking/__init__.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- + +"""Endpoints for visitor tracking functions.""" + +from flask import Blueprint, g +import re + +import rophako.model.tracking as Tracking +from rophako.utils import template + +mod = Blueprint("tracking", __name__, url_prefix="/tracking") + + +@mod.route("/") +def index(): + return template("tracking/index.html") + + +@mod.route("/visitors") +def visitors(): + g.info["history"] = Tracking.get_visitor_details() + return template("tracking/visitors.html") + + +@mod.route("/referrers") +def referrers(): + g.info["referrers"] = Tracking.get_referrers() + + # Filter some of the links. + for i, link in enumerate(g.info["referrers"]["referrers"]): + # Clean up useless Google links. + if "google" in link[0] and re.search(r'/(?:imgres|url|search|translate\w+)?/', link[0]): + g.info["referrers"]["referrers"][i] = None + + # Make the links word-wrap properly. + filtered = [ + [ re.sub(r'(.{20})', r'\1', x[0]), x[1] ] + for x in g.info["referrers"]["referrers"] + if x is not None + ] + g.info["referrers"]["referrers"] = filtered + + return template("tracking/referrers.html") diff --git a/rophako/modules/tracking/templates/tracking/index.html b/rophako/modules/tracking/templates/tracking/index.html new file mode 100644 index 0000000..64479b1 --- /dev/null +++ b/rophako/modules/tracking/templates/tracking/index.html @@ -0,0 +1,13 @@ +{% extends "layout.html" %} +{% block title %}Visitor Tracking{% endblock %} + +{% block content %} + +

Visitor Tracking

+ + + +{% endblock %} diff --git a/rophako/modules/tracking/templates/tracking/referrers.html b/rophako/modules/tracking/templates/tracking/referrers.html new file mode 100644 index 0000000..62c9fe0 --- /dev/null +++ b/rophako/modules/tracking/templates/tracking/referrers.html @@ -0,0 +1,44 @@ +{% extends "layout.html" %} +{% block title %}Referring URLs{% endblock %} + +{% block content %} + +

Referring URLs

+ +This table lists the HTTP referrers to this site, in order of popularity. For +the most recent 25 links, see the end of this page.

+ +

+ + + + + + + + + {% for link in referrers["referrers"] %} + {% if link %} + + + + + {% endif %} + {% endfor %} + +
HitsQuery
+ {{ link[1] }} + + {{ link[0]|safe }} +
+
+ +

25 Most Recent Links

+ +
    +{% for item in referrers["recent"] %} +
  1. {{ item }}
  2. +{% endfor %} +
+ +{% endblock %} diff --git a/rophako/modules/tracking/templates/tracking/visitors.html b/rophako/modules/tracking/templates/tracking/visitors.html new file mode 100644 index 0000000..85aeea0 --- /dev/null +++ b/rophako/modules/tracking/templates/tracking/visitors.html @@ -0,0 +1,54 @@ +{% extends "layout.html" %} +{% block title %}Visitor History{% endblock %} + +{% block content %} + +

Visitor History

+ +Unique visitors and hit counts have been logged on this site since +{{ history["oldest"] }}.

+ +The most unique visitors on this site in one day has been +{{ history["most_unique"][1] }} on {{ history["most_unique"][0] }}. The most +hits total in one day has been {{ history["most_hits"][1] }} on +{{ history["most_hits"][0] }}.

+ +Here is a full list of hits over time. Percentages are relative to the current +records.

+ + + + + + + + + + + {% for date in history["traffic"]|reverse %} + + + + + + + + + + {% endfor %} + +
DateGraphDetails
+ {{ date["date"] }} + + {% set pct = (date["unique"] / history["most_unique"][1]) * 100 %} +
+
+ Unique: {{ date["unique"] }} ({{ pct|int }}%) +
+ {% set pct = (date["unique"] / history["most_unique"][1]) * 100 %} +
+
+ Hits: {{ date["hits"] }} ({{ pct|int }}%) +
+ +{% endblock %} diff --git a/rophako/utils.py b/rophako/utils.py index 9268e34..ab9b6fe 100644 --- a/rophako/utils.py +++ b/rophako/utils.py @@ -11,6 +11,7 @@ import importlib import smtplib import markdown import json +import urlparse from rophako.log import logger from config import * @@ -211,6 +212,20 @@ def include(endpoint, *args, **kwargs): return html +def remote_addr(): + """Retrieve the end user's remote IP address.""" + + # TODO: eventually support configurations with X-Forwarded-For, but for + # now at least we're centralizing this in one spot. + return request.remote_addr + + +def server_name(): + """Get the server's hostname.""" + urlparts = list(urlparse.urlparse(request.url_root)) + return urlparts[1] + + def pretty_time(time_format, unix): """Pretty-print a time stamp.""" date = datetime.datetime.fromtimestamp(unix) diff --git a/rophako/www/static/images/blue-clearlooks.png b/rophako/www/static/images/blue-clearlooks.png new file mode 100644 index 0000000..aa8a37c Binary files /dev/null and b/rophako/www/static/images/blue-clearlooks.png differ diff --git a/rophako/www/static/images/pink-clearlooks.png b/rophako/www/static/images/pink-clearlooks.png new file mode 100644 index 0000000..6bad628 Binary files /dev/null and b/rophako/www/static/images/pink-clearlooks.png differ