|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220 |
- # -*- coding: utf-8 -*-
- from __future__ import unicode_literals, absolute_import
-
- """Visitor tracking models."""
-
- import time
- import requests
-
- import rophako.jsondb as JsonDB
- from rophako.utils import (remote_addr, pretty_time, server_name,
- handle_exception)
-
- def track_visit(request, session):
- """Main logic to track and log visitor details."""
-
- # Get their tracking cookie value. The value will either be their HTTP
- # referrer (if exists and valid) or else a "1".
- cookie = session.get("tracking")
- addr = remote_addr()
- values = dict() # Returnable traffic values
-
- # Log hit counts. We need four kinds:
- # - Unique today - Unique total
- # - Hits today - Hits total
- today = pretty_time("%Y-%m-%d", time.time())
- files = {
- "unique/{}".format(today) : "unique_today",
- "unique/total" : "unique_total",
- "hits/{}".format(today) : "hits_today",
- "hits/total" : "hits_total",
- }
-
- # Go through the hit count files. Update them only if their tracking
- # cookie was not present.
- for file, key in files.items():
- dbfile = "traffic/{}".format(file)
- if file.startswith("hits"):
- # Hit file is just a simple counter.
- db = dict(hits=0)
- if JsonDB.exists(dbfile):
- db = JsonDB.get(dbfile)
- if db is None:
- db = dict(hits=0)
-
- # Update it?
- if not cookie:
- db["hits"] += 1
- JsonDB.commit(dbfile, db)
-
- # Store the copy.
- values[key] = db["hits"]
- else:
- # Unique file is a collection of IP addresses.
- db = dict()
- if JsonDB.exists(dbfile):
- db = JsonDB.get(dbfile)
- if db is None:
- db = dict()
-
- # Update with their IP?
- if not cookie and not addr in db:
- db[addr] = time.time()
- JsonDB.commit(dbfile, db)
-
- # Store the copy.
- values[key] = len(db.keys())
-
- # Log their HTTP referrer.
- referrer = "1"
- if request.referrer:
- # Branch and check this.
- referrer = log_referrer(request, request.referrer)
- if not referrer:
- # Wasn't a valid referrer.
- referrer = "1"
-
- # Set their tracking cookie.
- if not cookie:
- cookie = referrer
- session["tracking"] = cookie
-
- return values
-
-
- def log_referrer(request, link):
- """Double check the referring URL."""
-
- # Ignore if same domain.
- hostname = server_name()
- if link.startswith("http://{}".format(hostname)) or \
- link.startswith("https://{}".format(hostname)):
- return None
-
- # See if the URL really links back to us.
- hostname = server_name()
- try:
- r = requests.get(link,
- timeout=5,
- verify=False, # Don't do SSL verification
- )
-
- # Make sure the request didn't just redirect back to our main site
- # (e.g. http://whatever.example.com wildcard may redirect back to
- # http://example.com, and if that's us, don't log that!
- if r.url.startswith("http://{}".format(hostname)) or \
- r.url.startswith("https://{}".format(hostname)):
- return None
-
- # Look for our hostname in their page.
- if hostname in r.text:
- # Log it.
- db = list()
- if JsonDB.exists("traffic/referrers"):
- # Don't cache the result -- the list can get huge!
- db = JsonDB.get("traffic/referrers", cache=False)
- db.append(link)
- JsonDB.commit("traffic/referrers", db, cache=False)
- return link
- except:
- pass
-
- return None
-
-
- def rebuild_visitor_stats():
- """Recalculate the total unique/hits based on daily info."""
- total_unique = {}
- total_hits = 0
-
- # Tally them all up!
- for date in JsonDB.list_docs("traffic/unique"):
- if date == "total":
- continue
- db = JsonDB.get("traffic/unique/{}".format(date), cache=False)
- total_unique.update(db)
- for date in JsonDB.list_docs("traffic/hits"):
- if date == "total":
- continue
- db = JsonDB.get("traffic/hits/{}".format(date), cache=False)
- total_hits += db.get("hits", 0)
-
- # Write the outputs.
- JsonDB.commit("traffic/unique/total", total_unique)
- JsonDB.commit("traffic/hits/total", dict(hits=total_hits))
-
-
- def get_visitor_details():
- """Retrieve detailed visitor information for the frontend."""
- result = {
- "traffic": [], # Historical traffic data
- "most_unique": [ "0000-00-00", 0 ], # Day with the most unique
- "most_hits": [ "0000-00-00", 0 ], # Day with the most hits
- "oldest": None, # Oldest day on record.
- }
-
- # List all the documents.
- hits = JsonDB.list_docs("traffic/hits")
- for date in sorted(hits):
- if date == "total": continue
- if not result["oldest"]:
- result["oldest"] = date
-
- # Get the DBs.
- hits_db = JsonDB.get("traffic/hits/{}".format(date), cache=False)
- uniq_db = JsonDB.get("traffic/unique/{}".format(date), cache=False)
-
- # Most we've seen?
- if hits_db["hits"] > result["most_hits"][1]:
- result["most_hits"] = [ date, hits_db["hits"] ]
- if len(uniq_db.keys()) > result["most_unique"][1]:
- result["most_unique"] = [ date, len(uniq_db.keys()) ]
-
- result["traffic"].append(dict(
- date=date,
- hits=hits_db["hits"],
- unique=len(uniq_db.keys()),
- ))
-
- return result
-
-
- def get_referrers(recent=25):
- """Retrieve the referrer details. Returns results in this format:
-
- ```
- {
- referrers: [
- ["http://...", 20], # Pre-sorted by number of hits
- ],
- recent: [ recent list ]
- }
- ```
- """
- db = []
- if JsonDB.exists("traffic/referrers"):
- db = JsonDB.get("traffic/referrers", cache=False)
-
- # Count the links.
- unique = dict()
- for link in db:
- if not link in unique:
- unique[link] = 1
- else:
- unique[link] += 1
-
- # Sort them by popularity.
- result = dict(
- referrers=[],
- recent=[],
- )
-
- sorted_links = sorted(unique.keys(), key=lambda x: unique[x], reverse=True)
- for link in sorted_links:
- result["referrers"].append([ link, unique[link] ])
-
- recent = 0 - recent
- result["recent"] = db[recent:]
- result["recent"].reverse()
-
- return result
|