A Python content management system designed for kirsle.net featuring a blog, comments and photo albums. https://rophako.kirsle.net/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

220 lines
6.4 KiB

  1. # -*- coding: utf-8 -*-
  2. """Visitor tracking models."""
  3. import time
  4. import requests
  5. import rophako.jsondb as JsonDB
  6. from rophako.utils import (remote_addr, pretty_time, server_name,
  7. handle_exception)
  8. def track_visit(request, session):
  9. """Main logic to track and log visitor details."""
  10. # Get their tracking cookie value. The value will either be their HTTP
  11. # referrer (if exists and valid) or else a "1".
  12. cookie = session.get("tracking")
  13. addr = remote_addr()
  14. values = dict() # Returnable traffic values
  15. # Log hit counts. We need four kinds:
  16. # - Unique today - Unique total
  17. # - Hits today - Hits total
  18. today = pretty_time("%Y-%m-%d", time.time())
  19. files = {
  20. "unique/{}".format(today) : "unique_today",
  21. "unique/total" : "unique_total",
  22. "hits/{}".format(today) : "hits_today",
  23. "hits/total" : "hits_total",
  24. }
  25. # Go through the hit count files. Update them only if their tracking
  26. # cookie was not present.
  27. for file, key in files.items():
  28. dbfile = "traffic/{}".format(file)
  29. if file.startswith("hits"):
  30. # Hit file is just a simple counter.
  31. db = dict(hits=0)
  32. if JsonDB.exists(dbfile):
  33. db = JsonDB.get(dbfile)
  34. if db is None:
  35. db = dict(hits=0)
  36. # Update it?
  37. if not cookie:
  38. db["hits"] += 1
  39. JsonDB.commit(dbfile, db)
  40. # Store the copy.
  41. values[key] = db["hits"]
  42. else:
  43. # Unique file is a collection of IP addresses.
  44. db = dict()
  45. if JsonDB.exists(dbfile):
  46. db = JsonDB.get(dbfile)
  47. if db is None:
  48. db = dict()
  49. # Update with their IP?
  50. if not cookie and not addr in db:
  51. db[addr] = time.time()
  52. JsonDB.commit(dbfile, db)
  53. # Store the copy.
  54. values[key] = len(db.keys())
  55. # Log their HTTP referrer.
  56. referrer = "1"
  57. if request.referrer:
  58. # Branch and check this.
  59. referrer = log_referrer(request, request.referrer)
  60. if not referrer:
  61. # Wasn't a valid referrer.
  62. referrer = "1"
  63. # Set their tracking cookie.
  64. if not cookie:
  65. cookie = referrer
  66. session["tracking"] = cookie
  67. return values
  68. def log_referrer(request, link):
  69. """Double check the referring URL."""
  70. # Ignore if same domain.
  71. hostname = server_name()
  72. if link.startswith("http://{}".format(hostname)) or \
  73. link.startswith("https://{}".format(hostname)):
  74. return None
  75. # See if the URL really links back to us.
  76. hostname = server_name()
  77. try:
  78. r = requests.get(link,
  79. timeout=5,
  80. verify=False, # Don't do SSL verification
  81. )
  82. # Make sure the request didn't just redirect back to our main site
  83. # (e.g. http://whatever.example.com wildcard may redirect back to
  84. # http://example.com, and if that's us, don't log that!
  85. if r.url.startswith("http://{}".format(hostname)) or \
  86. r.url.startswith("https://{}".format(hostname)):
  87. return None
  88. # Look for our hostname in their page.
  89. if hostname in r.text:
  90. # Log it.
  91. db = list()
  92. if JsonDB.exists("traffic/referrers"):
  93. # Don't cache the result -- the list can get huge!
  94. db = JsonDB.get("traffic/referrers", cache=False)
  95. db.append(link)
  96. JsonDB.commit("traffic/referrers", db, cache=False)
  97. return link
  98. except:
  99. pass
  100. return None
  101. def rebuild_visitor_stats():
  102. """Recalculate the total unique/hits based on daily info."""
  103. total_unique = {}
  104. total_hits = 0
  105. # Tally them all up!
  106. for date in JsonDB.list_docs("traffic/unique"):
  107. if date == "total":
  108. continue
  109. db = JsonDB.get("traffic/unique/{}".format(date), cache=False)
  110. total_unique.update(db)
  111. for date in JsonDB.list_docs("traffic/hits"):
  112. if date == "total":
  113. continue
  114. db = JsonDB.get("traffic/hits/{}".format(date), cache=False)
  115. total_hits += db.get("hits", 0)
  116. # Write the outputs.
  117. JsonDB.commit("traffic/unique/total", total_unique)
  118. JsonDB.commit("traffic/hits/total", dict(hits=total_hits))
  119. def get_visitor_details():
  120. """Retrieve detailed visitor information for the frontend."""
  121. result = {
  122. "traffic": [], # Historical traffic data
  123. "most_unique": [ "0000-00-00", 0 ], # Day with the most unique
  124. "most_hits": [ "0000-00-00", 0 ], # Day with the most hits
  125. "oldest": None, # Oldest day on record.
  126. }
  127. # List all the documents.
  128. hits = JsonDB.list_docs("traffic/hits")
  129. for date in sorted(hits):
  130. if date == "total": continue
  131. if not result["oldest"]:
  132. result["oldest"] = date
  133. # Get the DBs.
  134. hits_db = JsonDB.get("traffic/hits/{}".format(date), cache=False)
  135. uniq_db = JsonDB.get("traffic/unique/{}".format(date), cache=False)
  136. # Most we've seen?
  137. if hits_db["hits"] > result["most_hits"][1]:
  138. result["most_hits"] = [ date, hits_db["hits"] ]
  139. if len(uniq_db.keys()) > result["most_unique"][1]:
  140. result["most_unique"] = [ date, len(uniq_db.keys()) ]
  141. result["traffic"].append(dict(
  142. date=date,
  143. hits=hits_db["hits"],
  144. unique=len(uniq_db.keys()),
  145. ))
  146. return result
  147. def get_referrers(recent=25):
  148. """Retrieve the referrer details. Returns results in this format:
  149. ```
  150. {
  151. referrers: [
  152. ["http://...", 20], # Pre-sorted by number of hits
  153. ],
  154. recent: [ recent list ]
  155. }
  156. ```
  157. """
  158. db = []
  159. if JsonDB.exists("traffic/referrers"):
  160. db = JsonDB.get("traffic/referrers", cache=False)
  161. # Count the links.
  162. unique = dict()
  163. for link in db:
  164. if not link in unique:
  165. unique[link] = 1
  166. else:
  167. unique[link] += 1
  168. # Sort them by popularity.
  169. result = dict(
  170. referrers=[],
  171. recent=[],
  172. )
  173. sorted_links = sorted(unique.keys(), key=lambda x: unique[x], reverse=True)
  174. for link in sorted_links:
  175. result["referrers"].append([ link, unique[link] ])
  176. recent = 0 - recent
  177. result["recent"] = db[recent:]
  178. result["recent"].reverse()
  179. return result