2014-12-02 23:46:13 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2015-02-23 21:10:21 +00:00
|
|
|
from __future__ import unicode_literals
|
2014-12-02 23:46:13 +00:00
|
|
|
|
|
|
|
"""Visitor tracking models."""
|
|
|
|
|
|
|
|
import time
|
|
|
|
import requests
|
|
|
|
|
|
|
|
import rophako.jsondb as JsonDB
|
2014-12-05 22:33:42 +00:00
|
|
|
from rophako.utils import (remote_addr, pretty_time, server_name,
|
|
|
|
handle_exception)
|
2014-12-02 23:46:13 +00:00
|
|
|
|
|
|
|
def track_visit(request, session):
|
|
|
|
"""Main logic to track and log visitor details."""
|
|
|
|
|
|
|
|
# Get their tracking cookie value. The value will either be their HTTP
|
|
|
|
# referrer (if exists and valid) or else a "1".
|
|
|
|
cookie = session.get("tracking")
|
|
|
|
addr = remote_addr()
|
|
|
|
values = dict() # Returnable traffic values
|
|
|
|
|
|
|
|
# Log hit counts. We need four kinds:
|
|
|
|
# - Unique today - Unique total
|
|
|
|
# - Hits today - Hits total
|
|
|
|
today = pretty_time("%Y-%m-%d", time.time())
|
|
|
|
files = {
|
|
|
|
"unique/{}".format(today) : "unique_today",
|
|
|
|
"unique/total" : "unique_total",
|
|
|
|
"hits/{}".format(today) : "hits_today",
|
|
|
|
"hits/total" : "hits_total",
|
|
|
|
}
|
|
|
|
|
|
|
|
# Go through the hit count files. Update them only if their tracking
|
|
|
|
# cookie was not present.
|
|
|
|
for file, key in files.items():
|
|
|
|
dbfile = "traffic/{}".format(file)
|
|
|
|
if file.startswith("hits"):
|
|
|
|
# Hit file is just a simple counter.
|
|
|
|
db = dict(hits=0)
|
|
|
|
if JsonDB.exists(dbfile):
|
|
|
|
db = JsonDB.get(dbfile)
|
2014-12-03 20:49:03 +00:00
|
|
|
if db is None:
|
|
|
|
db = dict(hits=0)
|
2014-12-02 23:46:13 +00:00
|
|
|
|
|
|
|
# Update it?
|
|
|
|
if not cookie:
|
|
|
|
db["hits"] += 1
|
|
|
|
JsonDB.commit(dbfile, db)
|
|
|
|
|
|
|
|
# Store the copy.
|
|
|
|
values[key] = db["hits"]
|
|
|
|
else:
|
|
|
|
# Unique file is a collection of IP addresses.
|
|
|
|
db = dict()
|
|
|
|
if JsonDB.exists(dbfile):
|
|
|
|
db = JsonDB.get(dbfile)
|
2014-12-03 20:49:03 +00:00
|
|
|
if db is None:
|
|
|
|
db = dict()
|
2014-12-02 23:46:13 +00:00
|
|
|
|
|
|
|
# Update with their IP?
|
|
|
|
if not cookie and not addr in db:
|
|
|
|
db[addr] = time.time()
|
|
|
|
JsonDB.commit(dbfile, db)
|
|
|
|
|
|
|
|
# Store the copy.
|
|
|
|
values[key] = len(db.keys())
|
|
|
|
|
|
|
|
# Log their HTTP referrer.
|
|
|
|
referrer = "1"
|
|
|
|
if request.referrer:
|
|
|
|
# Branch and check this.
|
|
|
|
referrer = log_referrer(request, request.referrer)
|
|
|
|
if not referrer:
|
|
|
|
# Wasn't a valid referrer.
|
|
|
|
referrer = "1"
|
|
|
|
|
|
|
|
# Set their tracking cookie.
|
|
|
|
if not cookie:
|
|
|
|
cookie = referrer
|
|
|
|
session["tracking"] = cookie
|
|
|
|
|
|
|
|
return values
|
|
|
|
|
|
|
|
|
|
|
|
def log_referrer(request, link):
|
|
|
|
"""Double check the referring URL."""
|
|
|
|
|
|
|
|
# Ignore if same domain.
|
2014-12-03 00:01:11 +00:00
|
|
|
hostname = server_name()
|
|
|
|
if link.startswith("http://{}".format(hostname)) or \
|
|
|
|
link.startswith("https://{}".format(hostname)):
|
2014-12-02 23:46:13 +00:00
|
|
|
return None
|
|
|
|
|
|
|
|
# See if the URL really links back to us.
|
|
|
|
hostname = server_name()
|
2014-12-05 22:33:42 +00:00
|
|
|
try:
|
|
|
|
r = requests.get(link,
|
|
|
|
timeout=5,
|
|
|
|
verify=False, # Don't do SSL verification
|
|
|
|
)
|
2014-12-16 03:49:03 +00:00
|
|
|
|
|
|
|
# Make sure the request didn't just redirect back to our main site
|
|
|
|
# (e.g. http://whatever.example.com wildcard may redirect back to
|
|
|
|
# http://example.com, and if that's us, don't log that!
|
|
|
|
if r.url.startswith("http://{}".format(hostname)) or \
|
|
|
|
r.url.startswith("https://{}".format(hostname)):
|
|
|
|
return None
|
|
|
|
|
|
|
|
# Look for our hostname in their page.
|
2014-12-05 22:33:42 +00:00
|
|
|
if hostname in r.text:
|
|
|
|
# Log it.
|
|
|
|
db = list()
|
|
|
|
if JsonDB.exists("traffic/referrers"):
|
|
|
|
# Don't cache the result -- the list can get huge!
|
|
|
|
db = JsonDB.get("traffic/referrers", cache=False)
|
|
|
|
db.append(link)
|
|
|
|
JsonDB.commit("traffic/referrers", db, cache=False)
|
|
|
|
return link
|
2014-12-16 03:49:03 +00:00
|
|
|
except:
|
|
|
|
pass
|
2014-12-02 23:46:13 +00:00
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2014-12-06 22:45:20 +00:00
|
|
|
def rebuild_visitor_stats():
|
|
|
|
"""Recalculate the total unique/hits based on daily info."""
|
|
|
|
total_unique = {}
|
|
|
|
total_hits = 0
|
|
|
|
|
|
|
|
# Tally them all up!
|
|
|
|
for date in JsonDB.list_docs("traffic/unique"):
|
|
|
|
if date == "total":
|
|
|
|
continue
|
|
|
|
db = JsonDB.get("traffic/unique/{}".format(date), cache=False)
|
|
|
|
total_unique.update(db)
|
|
|
|
for date in JsonDB.list_docs("traffic/hits"):
|
|
|
|
if date == "total":
|
|
|
|
continue
|
|
|
|
db = JsonDB.get("traffic/hits/{}".format(date), cache=False)
|
|
|
|
total_hits += db.get("hits", 0)
|
|
|
|
|
|
|
|
# Write the outputs.
|
|
|
|
JsonDB.commit("traffic/unique/total", total_unique)
|
|
|
|
JsonDB.commit("traffic/hits/total", dict(hits=total_hits))
|
|
|
|
|
|
|
|
|
2014-12-02 23:46:13 +00:00
|
|
|
def get_visitor_details():
|
|
|
|
"""Retrieve detailed visitor information for the frontend."""
|
|
|
|
result = {
|
|
|
|
"traffic": [], # Historical traffic data
|
|
|
|
"most_unique": [ "0000-00-00", 0 ], # Day with the most unique
|
|
|
|
"most_hits": [ "0000-00-00", 0 ], # Day with the most hits
|
|
|
|
"oldest": None, # Oldest day on record.
|
|
|
|
}
|
|
|
|
|
|
|
|
# List all the documents.
|
|
|
|
hits = JsonDB.list_docs("traffic/hits")
|
|
|
|
for date in sorted(hits):
|
|
|
|
if date == "total": continue
|
|
|
|
if not result["oldest"]:
|
|
|
|
result["oldest"] = date
|
|
|
|
|
|
|
|
# Get the DBs.
|
|
|
|
hits_db = JsonDB.get("traffic/hits/{}".format(date), cache=False)
|
|
|
|
uniq_db = JsonDB.get("traffic/unique/{}".format(date), cache=False)
|
|
|
|
|
|
|
|
# Most we've seen?
|
|
|
|
if hits_db["hits"] > result["most_hits"][1]:
|
|
|
|
result["most_hits"] = [ date, hits_db["hits"] ]
|
|
|
|
if len(uniq_db.keys()) > result["most_unique"][1]:
|
|
|
|
result["most_unique"] = [ date, len(uniq_db.keys()) ]
|
|
|
|
|
|
|
|
result["traffic"].append(dict(
|
|
|
|
date=date,
|
|
|
|
hits=hits_db["hits"],
|
|
|
|
unique=len(uniq_db.keys()),
|
|
|
|
))
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def get_referrers(recent=25):
|
|
|
|
"""Retrieve the referrer details. Returns results in this format:
|
|
|
|
|
|
|
|
```
|
|
|
|
{
|
|
|
|
referrers: [
|
|
|
|
["http://...", 20], # Pre-sorted by number of hits
|
|
|
|
],
|
|
|
|
recent: [ recent list ]
|
|
|
|
}
|
|
|
|
```
|
|
|
|
"""
|
|
|
|
db = []
|
|
|
|
if JsonDB.exists("traffic/referrers"):
|
|
|
|
db = JsonDB.get("traffic/referrers", cache=False)
|
|
|
|
|
|
|
|
# Count the links.
|
|
|
|
unique = dict()
|
|
|
|
for link in db:
|
|
|
|
if not link in unique:
|
|
|
|
unique[link] = 1
|
|
|
|
else:
|
|
|
|
unique[link] += 1
|
|
|
|
|
|
|
|
# Sort them by popularity.
|
|
|
|
result = dict(
|
|
|
|
referrers=[],
|
|
|
|
recent=[],
|
|
|
|
)
|
|
|
|
|
|
|
|
sorted_links = sorted(unique.keys(), key=lambda x: unique[x], reverse=True)
|
|
|
|
for link in sorted_links:
|
|
|
|
result["referrers"].append([ link, unique[link] ])
|
|
|
|
|
|
|
|
recent = 0 - recent
|
|
|
|
result["recent"] = db[recent:]
|
2014-12-16 03:49:03 +00:00
|
|
|
result["recent"].reverse()
|
2014-12-02 23:46:13 +00:00
|
|
|
|
|
|
|
return result
|