A Python content management system designed for kirsle.net featuring a blog, comments and photo albums. https://rophako.kirsle.net/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tracking.py 6.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. """Visitor tracking models."""
  4. import time
  5. import requests
  6. import rophako.jsondb as JsonDB
  7. from rophako.utils import (remote_addr, pretty_time, server_name,
  8. handle_exception)
  9. def track_visit(request, session):
  10. """Main logic to track and log visitor details."""
  11. # Get their tracking cookie value. The value will either be their HTTP
  12. # referrer (if exists and valid) or else a "1".
  13. cookie = session.get("tracking")
  14. addr = remote_addr()
  15. values = dict() # Returnable traffic values
  16. # Log hit counts. We need four kinds:
  17. # - Unique today - Unique total
  18. # - Hits today - Hits total
  19. today = pretty_time("%Y-%m-%d", time.time())
  20. files = {
  21. "unique/{}".format(today) : "unique_today",
  22. "unique/total" : "unique_total",
  23. "hits/{}".format(today) : "hits_today",
  24. "hits/total" : "hits_total",
  25. }
  26. # Go through the hit count files. Update them only if their tracking
  27. # cookie was not present.
  28. for file, key in files.items():
  29. dbfile = "traffic/{}".format(file)
  30. if file.startswith("hits"):
  31. # Hit file is just a simple counter.
  32. db = dict(hits=0)
  33. if JsonDB.exists(dbfile):
  34. db = JsonDB.get(dbfile)
  35. if db is None:
  36. db = dict(hits=0)
  37. # Update it?
  38. if not cookie:
  39. db["hits"] += 1
  40. JsonDB.commit(dbfile, db)
  41. # Store the copy.
  42. values[key] = db["hits"]
  43. else:
  44. # Unique file is a collection of IP addresses.
  45. db = dict()
  46. if JsonDB.exists(dbfile):
  47. db = JsonDB.get(dbfile)
  48. if db is None:
  49. db = dict()
  50. # Update with their IP?
  51. if not cookie and not addr in db:
  52. db[addr] = time.time()
  53. JsonDB.commit(dbfile, db)
  54. # Store the copy.
  55. values[key] = len(db.keys())
  56. # Log their HTTP referrer.
  57. referrer = "1"
  58. if request.referrer:
  59. # Branch and check this.
  60. referrer = log_referrer(request, request.referrer)
  61. if not referrer:
  62. # Wasn't a valid referrer.
  63. referrer = "1"
  64. # Set their tracking cookie.
  65. if not cookie:
  66. cookie = referrer
  67. session["tracking"] = cookie
  68. return values
  69. def log_referrer(request, link):
  70. """Double check the referring URL."""
  71. # Ignore if same domain.
  72. hostname = server_name()
  73. if link.startswith("http://{}".format(hostname)) or \
  74. link.startswith("https://{}".format(hostname)):
  75. return None
  76. # See if the URL really links back to us.
  77. hostname = server_name()
  78. try:
  79. r = requests.get(link,
  80. timeout=5,
  81. verify=False, # Don't do SSL verification
  82. )
  83. # Make sure the request didn't just redirect back to our main site
  84. # (e.g. http://whatever.example.com wildcard may redirect back to
  85. # http://example.com, and if that's us, don't log that!
  86. if r.url.startswith("http://{}".format(hostname)) or \
  87. r.url.startswith("https://{}".format(hostname)):
  88. return None
  89. # Look for our hostname in their page.
  90. if hostname in r.text:
  91. # Log it.
  92. db = list()
  93. if JsonDB.exists("traffic/referrers"):
  94. # Don't cache the result -- the list can get huge!
  95. db = JsonDB.get("traffic/referrers", cache=False)
  96. db.append(link)
  97. JsonDB.commit("traffic/referrers", db, cache=False)
  98. return link
  99. except:
  100. pass
  101. return None
  102. def rebuild_visitor_stats():
  103. """Recalculate the total unique/hits based on daily info."""
  104. total_unique = {}
  105. total_hits = 0
  106. # Tally them all up!
  107. for date in JsonDB.list_docs("traffic/unique"):
  108. if date == "total":
  109. continue
  110. db = JsonDB.get("traffic/unique/{}".format(date), cache=False)
  111. total_unique.update(db)
  112. for date in JsonDB.list_docs("traffic/hits"):
  113. if date == "total":
  114. continue
  115. db = JsonDB.get("traffic/hits/{}".format(date), cache=False)
  116. total_hits += db.get("hits", 0)
  117. # Write the outputs.
  118. JsonDB.commit("traffic/unique/total", total_unique)
  119. JsonDB.commit("traffic/hits/total", dict(hits=total_hits))
  120. def get_visitor_details():
  121. """Retrieve detailed visitor information for the frontend."""
  122. result = {
  123. "traffic": [], # Historical traffic data
  124. "most_unique": [ "0000-00-00", 0 ], # Day with the most unique
  125. "most_hits": [ "0000-00-00", 0 ], # Day with the most hits
  126. "oldest": None, # Oldest day on record.
  127. }
  128. # List all the documents.
  129. hits = JsonDB.list_docs("traffic/hits")
  130. for date in sorted(hits):
  131. if date == "total": continue
  132. if not result["oldest"]:
  133. result["oldest"] = date
  134. # Get the DBs.
  135. hits_db = JsonDB.get("traffic/hits/{}".format(date), cache=False)
  136. uniq_db = JsonDB.get("traffic/unique/{}".format(date), cache=False)
  137. # Most we've seen?
  138. if hits_db["hits"] > result["most_hits"][1]:
  139. result["most_hits"] = [ date, hits_db["hits"] ]
  140. if len(uniq_db.keys()) > result["most_unique"][1]:
  141. result["most_unique"] = [ date, len(uniq_db.keys()) ]
  142. result["traffic"].append(dict(
  143. date=date,
  144. hits=hits_db["hits"],
  145. unique=len(uniq_db.keys()),
  146. ))
  147. return result
  148. def get_referrers(recent=25):
  149. """Retrieve the referrer details. Returns results in this format:
  150. ```
  151. {
  152. referrers: [
  153. ["http://...", 20], # Pre-sorted by number of hits
  154. ],
  155. recent: [ recent list ]
  156. }
  157. ```
  158. """
  159. db = []
  160. if JsonDB.exists("traffic/referrers"):
  161. db = JsonDB.get("traffic/referrers", cache=False)
  162. # Count the links.
  163. unique = dict()
  164. for link in db:
  165. if not link in unique:
  166. unique[link] = 1
  167. else:
  168. unique[link] += 1
  169. # Sort them by popularity.
  170. result = dict(
  171. referrers=[],
  172. recent=[],
  173. )
  174. sorted_links = sorted(unique.keys(), key=lambda x: unique[x], reverse=True)
  175. for link in sorted_links:
  176. result["referrers"].append([ link, unique[link] ])
  177. recent = 0 - recent
  178. result["recent"] = db[recent:]
  179. result["recent"].reverse()
  180. return result