A Python content management system designed for kirsle.net featuring a blog, comments and photo albums. https://rophako.kirsle.net/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tracking.py 5.0 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. # -*- coding: utf-8 -*-
  2. """Visitor tracking models."""
  3. import time
  4. import requests
  5. import rophako.jsondb as JsonDB
  6. from rophako.utils import remote_addr, pretty_time, server_name
  7. def track_visit(request, session):
  8. """Main logic to track and log visitor details."""
  9. # Get their tracking cookie value. The value will either be their HTTP
  10. # referrer (if exists and valid) or else a "1".
  11. cookie = session.get("tracking")
  12. addr = remote_addr()
  13. values = dict() # Returnable traffic values
  14. # Log hit counts. We need four kinds:
  15. # - Unique today - Unique total
  16. # - Hits today - Hits total
  17. today = pretty_time("%Y-%m-%d", time.time())
  18. files = {
  19. "unique/{}".format(today) : "unique_today",
  20. "unique/total" : "unique_total",
  21. "hits/{}".format(today) : "hits_today",
  22. "hits/total" : "hits_total",
  23. }
  24. # Go through the hit count files. Update them only if their tracking
  25. # cookie was not present.
  26. for file, key in files.items():
  27. dbfile = "traffic/{}".format(file)
  28. if file.startswith("hits"):
  29. # Hit file is just a simple counter.
  30. db = dict(hits=0)
  31. if JsonDB.exists(dbfile):
  32. db = JsonDB.get(dbfile)
  33. # Update it?
  34. if not cookie:
  35. db["hits"] += 1
  36. JsonDB.commit(dbfile, db)
  37. # Store the copy.
  38. values[key] = db["hits"]
  39. else:
  40. # Unique file is a collection of IP addresses.
  41. db = dict()
  42. if JsonDB.exists(dbfile):
  43. db = JsonDB.get(dbfile)
  44. # Update with their IP?
  45. if not cookie and not addr in db:
  46. db[addr] = time.time()
  47. JsonDB.commit(dbfile, db)
  48. # Store the copy.
  49. values[key] = len(db.keys())
  50. # Log their HTTP referrer.
  51. referrer = "1"
  52. if request.referrer:
  53. # Branch and check this.
  54. referrer = log_referrer(request, request.referrer)
  55. if not referrer:
  56. # Wasn't a valid referrer.
  57. referrer = "1"
  58. # Set their tracking cookie.
  59. if not cookie:
  60. cookie = referrer
  61. session["tracking"] = cookie
  62. return values
  63. def log_referrer(request, link):
  64. """Double check the referring URL."""
  65. # Ignore if same domain.
  66. hostname = server_name()
  67. if link.startswith("http://{}".format(hostname)) or \
  68. link.startswith("https://{}".format(hostname)):
  69. return None
  70. # See if the URL really links back to us.
  71. hostname = server_name()
  72. r = requests.get(link)
  73. if hostname in r.text:
  74. # Log it.
  75. db = list()
  76. if JsonDB.exists("traffic/referrers"):
  77. # Don't cache the result -- the list can get huge!
  78. db = JsonDB.get("traffic/referrers", cache=False)
  79. db.append(link)
  80. JsonDB.commit("traffic/referrers", db, cache=False)
  81. return link
  82. return None
  83. def get_visitor_details():
  84. """Retrieve detailed visitor information for the frontend."""
  85. result = {
  86. "traffic": [], # Historical traffic data
  87. "most_unique": [ "0000-00-00", 0 ], # Day with the most unique
  88. "most_hits": [ "0000-00-00", 0 ], # Day with the most hits
  89. "oldest": None, # Oldest day on record.
  90. }
  91. # List all the documents.
  92. hits = JsonDB.list_docs("traffic/hits")
  93. for date in sorted(hits):
  94. if date == "total": continue
  95. if not result["oldest"]:
  96. result["oldest"] = date
  97. # Get the DBs.
  98. hits_db = JsonDB.get("traffic/hits/{}".format(date), cache=False)
  99. uniq_db = JsonDB.get("traffic/unique/{}".format(date), cache=False)
  100. # Most we've seen?
  101. if hits_db["hits"] > result["most_hits"][1]:
  102. result["most_hits"] = [ date, hits_db["hits"] ]
  103. if len(uniq_db.keys()) > result["most_unique"][1]:
  104. result["most_unique"] = [ date, len(uniq_db.keys()) ]
  105. result["traffic"].append(dict(
  106. date=date,
  107. hits=hits_db["hits"],
  108. unique=len(uniq_db.keys()),
  109. ))
  110. return result
  111. def get_referrers(recent=25):
  112. """Retrieve the referrer details. Returns results in this format:
  113. ```
  114. {
  115. referrers: [
  116. ["http://...", 20], # Pre-sorted by number of hits
  117. ],
  118. recent: [ recent list ]
  119. }
  120. ```
  121. """
  122. db = []
  123. if JsonDB.exists("traffic/referrers"):
  124. db = JsonDB.get("traffic/referrers", cache=False)
  125. # Count the links.
  126. unique = dict()
  127. for link in db:
  128. if not link in unique:
  129. unique[link] = 1
  130. else:
  131. unique[link] += 1
  132. # Sort them by popularity.
  133. result = dict(
  134. referrers=[],
  135. recent=[],
  136. )
  137. sorted_links = sorted(unique.keys(), key=lambda x: unique[x], reverse=True)
  138. for link in sorted_links:
  139. result["referrers"].append([ link, unique[link] ])
  140. recent = 0 - recent
  141. result["recent"] = db[recent:]
  142. return result