From 8e3ac99a851df4a76ae159c5181ec1f58ff28c20 Mon Sep 17 00:00:00 2001
From: Cal Herries <39073188+calherries@users.noreply.github.com>
Date: Thu, 18 Apr 2024 12:18:59 +0300
Subject: [PATCH] Exclude sample content and "Metabase Analytics" content from
 usage stats (clone) (#41522)

---
 .../analytics/stats_test.clj                  | 31 +++++++++++++++++++
 .../migrations/001_update_migrations.yaml     | 16 ++++++++++
 resources/sample-content.edn                  |  1 +
 src/metabase/analytics/stats.clj              | 14 ++++++---
 test/metabase/analytics/stats_test.clj        | 23 ++++++++++++++
 5 files changed, 80 insertions(+), 5 deletions(-)
 create mode 100644 enterprise/backend/test/metabase_enterprise/analytics/stats_test.clj

diff --git a/enterprise/backend/test/metabase_enterprise/analytics/stats_test.clj b/enterprise/backend/test/metabase_enterprise/analytics/stats_test.clj
new file mode 100644
index 00000000000..367c1f3ec07
--- /dev/null
+++ b/enterprise/backend/test/metabase_enterprise/analytics/stats_test.clj
@@ -0,0 +1,31 @@
+(ns metabase-enterprise.analytics.stats-test
+  (:require
+   [clojure.test :refer :all]
+   [metabase-enterprise.audit-db :as audit-db]
+   [metabase.analytics.stats :as stats]
+   [metabase.db :as mdb]
+   [metabase.test :as mt]
+   [toucan2.core :as t2]))
+
+(deftest metabase-analytics-metrics-test
+  (testing "Metabase Analytics doesn't contribute to stats"
+    (mt/with-temp-empty-app-db [_conn :h2]
+      (mdb/setup-db! :create-sample-content? false)
+      (is (= ::audit-db/installed (audit-db/ensure-audit-db-installed!)))
+      (testing "sense check: Collection, Dashboard, and Cards exist"
+        (is (true? (t2/exists? :model/Collection)))
+        (is (true? (t2/exists? :model/Dashboard)))
+        (is (true? (t2/exists? :model/Card))))
+      (testing "All metrics should be empty"
+        (is (= {:collections 0, :cards_in_collections 0, :cards_not_in_collections 0, :num_cards_per_collection {}}
+               (#'stats/collection-metrics)))
+        (is (= {:questions {}, :public {}, :embedded {}}
+               (#'stats/question-metrics)))
+        (is (= {:dashboards         0
+                :with_params        0
+                :num_dashs_per_user {}
+                :num_cards_per_dash {}
+                :num_dashs_per_card {}
+                :public             {}
+                :embedded           {}}
+               (#'stats/dashboard-metrics)))))))
diff --git a/resources/migrations/001_update_migrations.yaml b/resources/migrations/001_update_migrations.yaml
index 6305e0de2cb..6925c3e8432 100644
--- a/resources/migrations/001_update_migrations.yaml
+++ b/resources/migrations/001_update_migrations.yaml
@@ -6272,6 +6272,22 @@ databaseChangeLog:
                   constraints:
                     nullable: true
 
+  - changeSet:
+      id: v50.2024-04-09T15:55:19
+      author: calherries
+      comment: Add collection.is_sample column
+      changes:
+        - addColumn:
+            tableName: collection
+            columns:
+              - column:
+                  name: is_sample
+                  type: ${boolean.type}
+                  remarks: "Is the collection part of the sample content?"
+                  constraints:
+                    nullable: false
+                  defaultValue: false
+
   - changeSet:
       id: v50.2024-04-09T15:55:22
       author: calherries
diff --git a/resources/sample-content.edn b/resources/sample-content.edn
index 1026dc95793..5b88a06763c 100644
--- a/resources/sample-content.edn
+++ b/resources/sample-content.edn
@@ -3979,5 +3979,6 @@
    :entity_id "53YGAg4EE6MC76nxx-f5f",
    :location "/",
    :namespace nil,
+   :is_sample true,
    :created_at #t "2024-04-11T12:41:25.429317Z"}),
  :parameter_card ()}
diff --git a/src/metabase/analytics/stats.clj b/src/metabase/analytics/stats.clj
index 447a3087f2e..270c5626ed8 100644
--- a/src/metabase/analytics/stats.clj
+++ b/src/metabase/analytics/stats.clj
@@ -179,7 +179,8 @@
   "Get metrics based on questions
   TODO characterize by # executions and avg latency"
   []
-  (let [cards (t2/select [Card :query_type :public_uuid :enable_embedding :embedding_params :dataset_query])]
+  (let [cards (t2/select [Card :query_type :public_uuid :enable_embedding :embedding_params :dataset_query]
+                         :creator_id [:not= config/internal-mb-user-id])]
     {:questions (merge-count-maps (for [card cards]
                                     (let [native? (= (keyword (:query_type card)) :native)]
                                       {:total       1
@@ -203,8 +204,11 @@
   "Get metrics based on dashboards
   TODO characterize by # of revisions, and created by an admin"
   []
-  (let [dashboards (t2/select [Dashboard :creator_id :public_uuid :parameters :enable_embedding :embedding_params])
-        dashcards  (t2/select [DashboardCard :card_id :dashboard_id])]
+  (let [dashboards (t2/select [Dashboard :creator_id :public_uuid :parameters :enable_embedding :embedding_params] :creator_id [:not= config/internal-mb-user-id])
+        dashcards  (t2/query {:select :dc.*
+                              :from [[(t2/table-name DashboardCard) :dc]]
+                              :join [[(t2/table-name Dashboard) :d] [:= :d.id :dc.dashboard_id]]
+                              :where [:not [:= :d.creator_id config/internal-mb-user-id]]})]
     {:dashboards         (count dashboards)
      :with_params        (count (filter (comp seq :parameters) dashboards))
      :num_dashs_per_user (medium-histogram dashboards :creator_id)
@@ -298,8 +302,8 @@
 (defn- collection-metrics
   "Get metrics on Collection usage."
   []
-  (let [collections (t2/select Collection)
-        cards       (t2/select [Card :collection_id])]
+  (let [collections (t2/select Collection :type [:not= "instance-analytics"] :is_sample false)
+        cards       (t2/select [Card :collection_id] :creator_id [:not= config/internal-mb-user-id])]
     {:collections              (count collections)
      :cards_in_collections     (count (filter :collection_id cards))
      :cards_not_in_collections (count (remove :collection_id cards))
diff --git a/test/metabase/analytics/stats_test.clj b/test/metabase/analytics/stats_test.clj
index f75dcf41af2..7f998ed1bfb 100644
--- a/test/metabase/analytics/stats_test.clj
+++ b/test/metabase/analytics/stats_test.clj
@@ -3,6 +3,7 @@
    [clojure.test :refer :all]
    [java-time.api :as t]
    [metabase.analytics.stats :as stats :refer [anonymous-usage-stats]]
+   [metabase.db :as mdb]
    [metabase.email :as email]
    [metabase.integrations.slack :as slack]
    [metabase.models.card :refer [Card]]
@@ -284,3 +285,25 @@
                                          ["1-5"  [:int {:min 1}]]
                                          ["6-10" [:int {:min 1}]]]]]
                 (#'stats/alert-metrics)))))
+
+(deftest sample-content-metrics-test
+  (testing "Sample content doesn't contribute to stats"
+    (mt/with-temp-empty-app-db [_conn :h2]
+      (mdb/setup-db! :create-sample-content? true)
+      (testing "sense check: Collection, Dashboard, and Cards exist"
+        (is (true? (t2/exists? :model/Collection)))
+        (is (true? (t2/exists? :model/Dashboard)))
+        (is (true? (t2/exists? :model/Card))))
+      (testing "All metrics should be empty"
+        (is (= {:collections 0, :cards_in_collections 0, :cards_not_in_collections 0, :num_cards_per_collection {}}
+               (#'stats/collection-metrics)))
+        (is (= {:questions {}, :public {}, :embedded {}}
+               (#'stats/question-metrics)))
+        (is (= {:dashboards         0
+                :with_params        0
+                :num_dashs_per_user {}
+                :num_cards_per_dash {}
+                :num_dashs_per_card {}
+                :public             {}
+                :embedded           {}}
+               (#'stats/dashboard-metrics)))))))
-- 
GitLab