From 0a374e49d0f75559a9739433611b89608ca2a34d Mon Sep 17 00:00:00 2001
From: Sameer Al-Sakran <sameer@expa.com>
Date: Sun, 20 Nov 2016 22:33:36 -0800
Subject: [PATCH] first stab at some anonymous usage tracking

---
 src/metabase/util/stats.clj       | 271 ++++++++++++++++++++++++++++++
 test/metabase/util/stats_test.clj |  58 +++++++
 2 files changed, 329 insertions(+)
 create mode 100644 src/metabase/util/stats.clj
 create mode 100644 test/metabase/util/stats_test.clj

diff --git a/src/metabase/util/stats.clj b/src/metabase/util/stats.clj
new file mode 100644
index 00000000000..1eda6384835
--- /dev/null
+++ b/src/metabase/util/stats.clj
@@ -0,0 +1,271 @@
+(ns metabase.util.stats
+  "Functions which summarize the usage of an instance"
+  (:require [clojure.tools.logging :as log]
+            [clj-http.client :as client]
+            (metabase [config :as config]
+                      [db :as db])
+            [metabase.public-settings :as settings]
+            (metabase.models [field :as field]
+                             [table :as table]
+                             [setting :as setting])           
+            [metabase.util :as u]))
+
+(def ^:private ^:const ^String metabase-usage-url "https://kqatai1z3c.execute-api.us-east-1.amazonaws.com/prod/ServerStatsCollector")
+
+(def ^:private ^Integer anonymous-id
+  "Generate an anonymous id. Don't worry too much about hash collisions or localhost cases, etc.
+   The goal is to be able to get a rough sense for how many different hosts are throwing a specific error/event." 
+  (hash (str (java.net.InetAddress/getLocalHost))))
+
+
+(defn- anon-tracking-enabled? 
+  "To avoid a circular reference"
+  []
+  (require 'metabase.public-settings)
+  (resolve 'metabase.public-settings/anon-tracking-enabled))
+
+
+
+(defn- bin-micro-number 
+  "Return really small bin number. Assumes positive inputs"
+  [x]
+  (cond
+    (= 0 x) "0"
+    (= 1 x) "1"
+    (= 2 x) "2"
+    (> x 2) "3+")
+  )
+
+
+(defn- bin-small-number 
+  "Return small bin number. Assumes positive inputs"
+  [x]
+  (cond
+    (= 0 x) "0"
+    (<= 1 x 5) "1-5"
+    (<= 6 x 10) "6-10"
+    (<= 11 x 25) "11-25"
+    (> x 25) "25+")
+  )
+
+(defn- bin-medium-number 
+  "Return medium bin number. Assumes positive inputs"
+  [x]
+  (cond
+    (= 0 x) "0"
+    (<= 1 x 5) "1-5"
+    (<= 6 x 10) "6-10"
+    (<= 11 x 25) "11-25"  
+    (<= 26 x 50) "26-50"
+    (<= 51 x 100) "51-100"
+    (<= 101 x 250) "101-250"
+    (> x 250) "250+")
+  )
+
+
+(defn- bin-large-number 
+  "Return large bin number. Assumes positive inputs"
+  [x]
+  (cond
+    (= 0 x) "0"
+    (<= 1 x 10) "1-10"
+    (<= 11 x 50) "11-50"
+    (<= 51 x 250) "51-250"  
+    (<= 251 x 1000) "251-1000"
+    (<= 1001 x 10000) "1001-10000"
+    (> x 10000) "10000+")
+  )
+
+
+(defn- get-settings 
+  "Figure out global info aboutt his instance"
+  []
+  {:version               (config/mb-version-info :tag)
+   :running_on            "unknown" ;; HOW DO I GET THIS? beanstalk vs heroku vs mac vs 'unknown'
+   :application_database  (config/config-str :mb-db-type)
+   :check_for_updates     (setting/get :check-for-updates)
+   :site_name             (not= settings/site-name "Metabase")
+   :report_timezone       (setting/get :report-timezone)
+   :friendly_names        true ;; HOW DO I GET THIS?
+   :email_configured      ((resolve 'metabase.email/email-configured?))
+   :slack_configured      ((resolve 'metabase.integrations.slack/slack-configured?))
+   :sso_configured        true ;; HOW DO I GET THIS?
+   :instance_started      (new java.util.Date) ;; HOW DO I GET THIS?
+   :has_sample_data       (db/exists? 'Database, :is_sample true)
+   }
+  )
+
+;; util function
+(def add-summaries 
+  "add up some dictionaries"
+  (partial merge-with +)
+  )
+
+;; User metrics
+(defn user-dims 
+  "characterize a user record"
+  [user]
+  {:total 1
+   :active (if (user :is_active) 1 0) ;; HOW DO I GET THE LIST OF ALL USERS INCLUDING INACTIVES?
+   :admin (if (user :is_superuser) 1 0)
+   :logged-in (if (nil? (user :last_login)) 0 1)
+   :sso (if (nil? (user :google_auth)) 0 1)}
+  )
+
+
+(defn get-user-metrics 
+  "Get metrics based on user records
+  TODO: get activity in terms of created questions, pulses and dashboards"
+  []
+  (let [users (db/select 'User)]   
+    {:users (apply add-summaries (map user-dims users))}))
+
+
+(defn get-group-metrics 
+  "Get metrics based on groups:
+  TODO characterize by # w/ sql access, # of users, no self-serve data access"
+  []
+  (let [groups (db/select 'PermissionsGroup)]
+    {:groups (count groups)}))
+
+;; Artifact Metrics
+(defn question-dims 
+  "characterize a saved question
+  TODO: characterize by whether it has params, # of revisions, created by an admin"
+  [question]
+  (print question)
+    {:total 1
+     :native (if (= (question :iquery_type) "native") 1 0)
+     :gui (if (not= (question :iquery_type) "native") 1 0)}
+  )
+
+(defn get-question-metrics 
+  "Get metrics based on questions
+  TODO characterize by # of labels
+       characterize by # executions and avg latency"
+  []
+  (let [questions (db/select 'Card)]
+    {:questions (count questions)}))
+
+(defn get-dashboard-metrics 
+  "Get metrics based on dashboards
+  TODO characterize by # of cards, # of revisions, and created by an admin"
+  []
+  (let [dashboards (db/select 'Dashboard)]
+    {:dashboards (count dashboards)}))
+
+(defn get-pulse-metrics 
+  "Get metrics based on pulses
+  TODO: characterize by # of cards, non-user account emails, slack vs email, # emails"
+  []
+  (let [pulses (db/select 'Pulse)]
+    {:pulses (count pulses)}))
+
+(defn get-label-metrics 
+  "Get metrics based on labels
+  TODO: characterize by the # of cards each label has + how many cards are unlabeled"
+  []
+  (let [labels (db/select 'CardLabel)]
+    {:labels (count labels)}))
+
+;; Metadata Metrics
+(defn get-database-metrics 
+  "Get metrics based on databases
+  TODO: characterize by # of schemas, tables, fields
+        characterize by in-depth analysis enabled "
+  []
+  (let [databases (db/select 'Database)]
+    {:databases (count databases)}))
+
+(defn get-schema-metrics 
+  "Get metrics based on schemas
+  TODO merge this w/ tables?
+       characterize by # tables"
+  []
+  (let [schemas (db/select 'Table)]
+    {:schemas (count schemas)}))
+
+
+(defn get-table-metrics 
+  "Get metrics based on tables
+  TODO characterize by # fields"
+  []
+  (let [tables (db/select 'Table)]
+    {:tables (count tables)}))
+
+
+(defn get-field-metrics 
+  "Get metrics based on fields"
+  []
+  (let [fields (db/select 'Field)]
+    {:fields (count fields)}))
+
+
+
+(defn get-segment-metrics 
+  "Get metrics based on segments"
+  []
+  (let [segments (db/select 'Segment)]
+    {:segments (count segments)}))
+
+
+(defn get-metric-metrics 
+  "Get metrics based on metrics"
+  []
+  (let [metrics (db/select 'Metric)]
+    {:metrics (count metrics)}))
+
+;; Execution Metrics
+(defn get-execution-metrics 
+  "Get metrics based on executions. 
+  This should be done in a single pass, as there might 
+  be a LOT of query executions in a normal instance
+  TODO: characterize by ad hoc vs cards
+        characterize by latency
+        characterize by error status"
+  []
+  (let [executions (db/select 'QueryExecution)]
+    {:executions (count executions)}))
+
+(defn get-map-metrics 
+  "Get metrics based on custom geojson maps
+  TODO figure out how to get at these"
+  []
+  (let [maps (db/select 'Segment)]
+    {:maps (count maps)}))
+
+
+(defn get-anonymous-usage-stats 
+  "generate a map of the usage stats for this instance"
+  []
+  (when [setting/get :anon-tracking-enabled]
+    ;do stuff
+    (merge (get-settings)
+           {:uuid anonymous-id :timestamp (new java.util.Date)}
+            {:stats {
+              :user (get-user-metrics)
+              :question (get-question-metrics)
+              :dashboard (get-dashboard-metrics)
+              :database (get-database-metrics)
+              :table (get-table-metrics)
+              :field (get-field-metrics)
+              :pulse (get-pulse-metrics)
+              :segment (get-segment-metrics)
+              :metric (get-metric-metrics)
+              :group (get-group-metrics)
+              :label (get-label-metrics)
+              :execution (get-execution-metrics)}})))
+
+(defn- send-stats 
+  "send stats to Metabase tracking server"
+  [stats]
+   (try 
+     (print (client/post metabase-usage-url {:form-params stats :content-type :json :throw-entire-message? true}))
+      (catch Throwable e
+       (log/error "Sending usage stats FAILED: " (.getMessage e)))))
+
+(defn phone-home-stats 
+  "doc-string"
+  []
+  (when (anon-tracking-enabled?)
+      (send-stats (get-anonymous-usage-stats))))
\ No newline at end of file
diff --git a/test/metabase/util/stats_test.clj b/test/metabase/util/stats_test.clj
new file mode 100644
index 00000000000..345efa8600d
--- /dev/null
+++ b/test/metabase/util/stats_test.clj
@@ -0,0 +1,58 @@
+(ns metabase.util.stats-test
+  (:require [expectations :refer :all]
+            [metabase.util.stats :refer :all]
+            [metabase.test.util :as tu]))
+
+(tu/resolve-private-vars metabase.util.stats
+  bin-micro-number bin-small-number bin-medium-number bin-large-number)
+
+
+(expect "0" (bin-micro-number 0))
+(expect "1" (bin-micro-number 1))
+(expect "2" (bin-micro-number 2))
+(expect "3+" (bin-micro-number 3))
+(expect "3+" (bin-micro-number 100))
+
+
+(expect "0" (bin-small-number 0))
+(expect "1-5" (bin-small-number 1))
+(expect "1-5" (bin-small-number 5))
+(expect "6-10" (bin-small-number 6))
+(expect "6-10" (bin-small-number 10))
+(expect "11-25" (bin-small-number 11))
+(expect "11-25" (bin-small-number 25))
+(expect "25+" (bin-small-number 26))
+(expect "25+" (bin-small-number 500))
+
+(expect "0" (bin-medium-number 0))
+(expect "1-5" (bin-medium-number 1))
+(expect "1-5" (bin-medium-number 5))
+(expect "6-10" (bin-medium-number 6))
+(expect "6-10" (bin-medium-number 10))
+(expect "11-25" (bin-medium-number 11))
+(expect "11-25" (bin-medium-number 25))
+(expect "26-50" (bin-medium-number 26))
+(expect "26-50" (bin-medium-number 50))
+(expect "51-100" (bin-medium-number 51))
+(expect "51-100" (bin-medium-number 100))
+(expect "101-250" (bin-medium-number 101))
+(expect "101-250" (bin-medium-number 250))
+(expect "250+" (bin-medium-number 251))
+(expect "250+" (bin-medium-number 5000))
+
+
+(expect "0" (bin-large-number 0))
+(expect "1-10" (bin-large-number 1))
+(expect "1-10" (bin-large-number 10))
+
+(expect "11-50" (bin-large-number 11))
+(expect "11-50" (bin-large-number 50))
+(expect "51-250" (bin-large-number 51))
+(expect "51-250" (bin-large-number 250))
+(expect "251-1000" (bin-large-number 251))
+(expect "251-1000" (bin-large-number 1000))
+(expect "1001-10000" (bin-large-number 1001))
+(expect "1001-10000" (bin-large-number 10000))
+(expect "10000+" (bin-large-number 10001))
+(expect "10000+" (bin-large-number 100000))
+
-- 
GitLab