From b6922de46269e3278b7b39e974dfee4a2ca0d8bb Mon Sep 17 00:00:00 2001 From: Cam Saul <cammsaul@gmail.com> Date: Wed, 26 Jul 2017 18:48:25 -0700 Subject: [PATCH] Backported cleanup from sync refactor :shower: [ci drivers] --- src/metabase/api/database.clj | 3 +- src/metabase/api/field.clj | 2 +- src/metabase/api/table.clj | 51 +++--- src/metabase/db.clj | 4 +- src/metabase/db/metadata_queries.clj | 20 ++- src/metabase/driver.clj | 9 ++ src/metabase/driver/druid/query_processor.clj | 2 +- .../googleanalytics/query_processor.clj | 4 +- src/metabase/driver/mongo/query_processor.clj | 2 +- src/metabase/models/field_values.clj | 88 +++++++---- src/metabase/models/permissions_group.clj | 2 +- src/metabase/sync_database/analyze.clj | 18 +-- src/metabase/task/sync_databases.clj | 11 +- src/metabase/util.clj | 6 - test/metabase/api/table_test.clj | 149 +++++++++--------- test/metabase/sync_database/analyze_test.clj | 21 ++- test/metabase/sync_database_test.clj | 85 +++++----- test/metabase/test/data.clj | 2 +- test/metabase/test/mock/moviedb.clj | 139 +++++++++------- 19 files changed, 340 insertions(+), 278 deletions(-) diff --git a/src/metabase/api/database.clj b/src/metabase/api/database.clj index 2a7cde7ae36..2a7622a2532 100644 --- a/src/metabase/api/database.clj +++ b/src/metabase/api/database.clj @@ -184,8 +184,7 @@ {:where [:and [:= :db_id db-id] [:= :active true] [:like :%lower.name (str (str/lower-case prefix) "%")] - [:or [:= :visibility_type nil] - [:not= :visibility_type "hidden"]]] + [:= :visibility_type nil]] :order-by [[:%lower.name :asc]]})) (defn- autocomplete-fields [db-id prefix] diff --git a/src/metabase/api/field.clj b/src/metabase/api/field.clj index 36e8358c702..751a4b691de 100644 --- a/src/metabase/api/field.clj +++ b/src/metabase/api/field.clj @@ -154,7 +154,7 @@ [_ _] empty-field-values) -(defn validate-human-readable-pairs +(defn- validate-human-readable-pairs "Human readable values are optional, but if present they must be present for each field value. Throws if invalid, returns a boolean indicating whether human readable values were found." diff --git a/src/metabase/api/table.clj b/src/metabase/api/table.clj index ddbb4476bbf..ee12b0cf26f 100644 --- a/src/metabase/api/table.clj +++ b/src/metabase/api/table.clj @@ -45,41 +45,36 @@ (-> (api/read-check Table id) (hydrate :db :pk_field))) -(defn- visible-state? - "only the nil state is considered visible." - [state] - {:pre [(or (nil? state) (table/visibility-types state))]} - (if (nil? state) - :show - :hide)) (api/defendpoint PUT "/:id" "Update `Table` with ID." - [id :as {{:keys [display_name entity_type visibility_type description caveats points_of_interest show_in_getting_started]} :body}] - {display_name (s/maybe su/NonBlankString) - entity_type (s/maybe TableEntityType) - visibility_type (s/maybe TableVisibilityType)} + [id :as {{:keys [display_name entity_type visibility_type description caveats points_of_interest show_in_getting_started], :as body} :body}] + {display_name (s/maybe su/NonBlankString) + entity_type (s/maybe TableEntityType) + visibility_type (s/maybe TableVisibilityType) + description (s/maybe su/NonBlankString) + caveats (s/maybe su/NonBlankString) + points_of_interest (s/maybe su/NonBlankString) + show_in_getting_started (s/maybe s/Bool)} (api/write-check Table id) - (let [original-visibility-type (:visibility_type (Table :id id))] - (api/check-500 (db/update-non-nil-keys! Table id - :display_name display_name - :caveats caveats - :points_of_interest points_of_interest - :show_in_getting_started show_in_getting_started - :entity_type entity_type - :description description)) - (api/check-500 (db/update! Table id, :visibility_type visibility_type)) - (let [updated-table (Table id) - new-visibility (visible-state? (:visibility_type updated-table)) - old-visibility (visible-state? original-visibility-type) - visibility-changed? (and (not= new-visibility - old-visibility) - (= :show new-visibility))] - (when visibility-changed? - (log/debug (u/format-color 'green "Table visibility changed, resyncing %s -> %s : %s") original-visibility-type visibility_type visibility-changed?) + (let [original-visibility-type (db/select-one-field :visibility_type Table :id id)] + ;; always update visibility type; update display_name, show_in_getting_started, entity_type if non-nil; update description and related fields if passed in + (api/check-500 + (db/update! Table id + (assoc (u/select-keys-when body + :non-nil [:display_name :show_in_getting_started :entity_type] + :present [:description :caveats :points_of_interest]) + :visibility_type visibility_type))) + (let [updated-table (Table id) + now-visible? (nil? (:visibility_type updated-table)) ; only Tables with `nil` visibility type are visible + was-visible? (nil? original-visibility-type) + became-visible? (and now-visible? (not was-visible?))] + (when became-visible? + (log/info (u/format-color 'green "Table '%s' is now visible. Resyncing." (:name updated-table))) (sync-database/sync-table! updated-table)) updated-table))) + (defn- format-fields-for-response [resp] (update resp :fields (fn [fields] diff --git a/src/metabase/db.clj b/src/metabase/db.clj index 5a33009f532..464d6b0ff8b 100644 --- a/src/metabase/db.clj +++ b/src/metabase/db.clj @@ -397,8 +397,8 @@ (defn join "Convenience for generating a HoneySQL `JOIN` clause. - (db/select-ids Table - (mdb/join [Table :raw_table_id] [RawTable :id]) + (db/select-ids FieldValues + (mdb/join [FieldValues :field_id] [Field :id]) :active true)" [[source-entity fk] [dest-entity pk]] {:left-join [(db/resolve-model dest-entity) [:= (db/qualify source-entity fk) diff --git a/src/metabase/db/metadata_queries.clj b/src/metabase/db/metadata_queries.clj index d068dc42ffd..516418d4eed 100644 --- a/src/metabase/db/metadata_queries.clj +++ b/src/metabase/db/metadata_queries.clj @@ -1,13 +1,17 @@ (ns metabase.db.metadata-queries "Predefined MBQL queries for getting metadata about an external database." - (:require [metabase + (:require [clojure.tools.logging :as log] + [metabase [query-processor :as qp] [util :as u]] - [metabase.models.table :refer [Table]] + [metabase.models + [field-values :as field-values] + [table :refer [Table]]] [metabase.query-processor.middleware.expand :as ql] [toucan.db :as db])) (defn- qp-query [db-id query] + {:pre [(integer? db-id)]} (-> (qp/process-query {:type :query :database db-id @@ -26,15 +30,19 @@ [table] {:pre [(map? table)] :post [(integer? %)]} - (-> (qp-query (:db_id table) (ql/query (ql/source-table (:id table)) - (ql/aggregation (ql/count)))) - first first long)) + (let [results (qp-query (:db_id table) (ql/query (ql/source-table (:id table)) + (ql/aggregation (ql/count))))] + (try (-> results first first long) + (catch Throwable e + (log/error "Error fetching table row count. Query returned:\n" + (u/pprint-to-str results)) + (throw e))))) (defn field-distinct-values "Return the distinct values of FIELD. This is used to create a `FieldValues` object for `:type/Category` Fields." ([field] - (field-distinct-values field @(resolve 'metabase.sync-database.analyze/low-cardinality-threshold))) + (field-distinct-values field field-values/low-cardinality-threshold)) ([field max-results] {:pre [(integer? max-results)]} (mapv first (field-query field (-> {} diff --git a/src/metabase/driver.clj b/src/metabase/driver.clj index cf1eaf81280..05675a9ebf1 100644 --- a/src/metabase/driver.clj +++ b/src/metabase/driver.clj @@ -372,6 +372,15 @@ (when-let [engine (db-id->engine db-id)] (engine->driver engine))))) +(defn ->driver + "Return an appropraiate driver for ENGINE-OR-DATABASE-OR-DB-ID. + Offered since this is somewhat more flexible in the arguments it accepts." + ;; TODO - we should make `engine->driver` and `database-id->driver` private and just use this for everything + [engine-or-database-or-db-id] + (if (keyword? engine-or-database-or-db-id) + (engine->driver engine-or-database-or-db-id) + (database-id->driver (u/get-id engine-or-database-or-db-id)))) + ;; ## Implementation-Agnostic Driver API diff --git a/src/metabase/driver/druid/query_processor.clj b/src/metabase/driver/druid/query_processor.clj index 17199fe0f85..f72d43e1b3c 100644 --- a/src/metabase/driver/druid/query_processor.clj +++ b/src/metabase/driver/druid/query_processor.clj @@ -107,7 +107,7 @@ (instance? DateTimeField arg))) (defn- expression->field-names [{:keys [args]}] - {:post [(every? u/string-or-keyword? %)]} + {:post [(every? (some-fn keyword? string?) %)]} (flatten (for [arg args :when (or (field? arg) (instance? Expression arg))] diff --git a/src/metabase/driver/googleanalytics/query_processor.clj b/src/metabase/driver/googleanalytics/query_processor.clj index cc0990c380f..71ab7c07497 100644 --- a/src/metabase/driver/googleanalytics/query_processor.clj +++ b/src/metabase/driver/googleanalytics/query_processor.clj @@ -57,7 +57,7 @@ ;;; ### source-table (defn- handle-source-table [{{source-table-name :name} :source-table}] - {:pre [(u/string-or-keyword? source-table-name)]} + {:pre [((some-fn keyword? string?) source-table-name)]} {:ids (str "ga:" source-table-name)}) @@ -265,7 +265,7 @@ (defn- filter-type ^clojure.lang.Keyword [filter-clause] (when (and (sequential? filter-clause) - (u/string-or-keyword? (first filter-clause))) + ((some-fn keyword? string?) (first filter-clause))) (qputil/normalize-token (first filter-clause)))) (defn- compound-filter? [filter-clause] diff --git a/src/metabase/driver/mongo/query_processor.clj b/src/metabase/driver/mongo/query_processor.clj index 3ba4aab839e..7eed0a7e421 100644 --- a/src/metabase/driver/mongo/query_processor.clj +++ b/src/metabase/driver/mongo/query_processor.clj @@ -413,7 +413,7 @@ (form->encoded-fn-name [:___ObjectId \"583327789137b2700a1621fb\"]) -> :ObjectId" [form] (when (vector? form) - (when (u/string-or-keyword? (first form)) + (when ((some-fn keyword? string?) (first form)) (when-let [[_ k] (re-matches #"^___(\w+$)" (name (first form)))] (let [k (keyword k)] (when (contains? fn-name->decoder k) diff --git a/src/metabase/models/field_values.clj b/src/metabase/models/field_values.clj index 955355d3abb..0a4959f4f9c 100644 --- a/src/metabase/models/field_values.clj +++ b/src/metabase/models/field_values.clj @@ -5,6 +5,19 @@ [db :as db] [models :as models]])) +(def ^:const ^Integer low-cardinality-threshold + "Fields with less than this many distinct values should automatically be given a special type of `:type/Category`." + 300) + +(def ^:private ^:const ^Integer entry-max-length + "The maximum character length for a stored `FieldValues` entry." + 100) + +(def ^:const ^Integer total-max-length + "Maximum total length for a `FieldValues` entry (combined length of all values for the field)." + (* low-cardinality-threshold entry-max-length)) + + ;; ## Entity + DB Multimethods (models/defmodel FieldValues :metabase_fieldvalues) @@ -16,13 +29,6 @@ :types (constantly {:human_readable_values :json, :values :json}) :post-select (u/rpartial update :human_readable_values #(or % {}))})) -;; columns: -;; * :id -;; * :field_id -;; * :updated_at WHY! I *DESPISE* THESE USELESS FIELDS -;; * :created_at -;; * :values (JSON-encoded array like ["table" "scalar" "pie"]) -;; * :human_readable_values (JSON-encoded map like {:table "Table" :scalar "Scalar"} ;; ## `FieldValues` Helper Functions @@ -39,26 +45,52 @@ (isa? (keyword special_type) :type/Category) (isa? (keyword special_type) :type/Enum)))) -(defn- create-field-values! - "Create `FieldValues` for a `Field`." - {:arglists '([field] [field human-readable-values])} - [{field-id :id, field-name :name, :as field} & [human-readable-values]] - {:pre [(integer? field-id)]} - (log/debug (format "Creating FieldValues for Field %s..." (or field-name field-id))) ; use field name if available - (db/insert! FieldValues - :field_id field-id - :values ((resolve 'metabase.db.metadata-queries/field-distinct-values) field) - :human_readable_values human-readable-values)) - -(defn update-field-values! - "Update the `FieldValues` for FIELD, creating them if needed" - [{field-id :id, :as field}] - {:pre [(integer? field-id) - (field-should-have-field-values? field)]} - (if-let [field-values (FieldValues :field_id field-id)] - (db/update! FieldValues (u/get-id field-values) - :values ((resolve 'metabase.db.metadata-queries/field-distinct-values) field)) - (create-field-values! field))) + +(defn- values-less-than-total-max-length? + "`true` if the combined length of all the values in DISTINCT-VALUES is below the + threshold for what we'll allow in a FieldValues entry." + [distinct-values] + (let [total-length (reduce + (map (comp count str) + distinct-values))] + (<= total-length total-max-length))) + + +(defn- distinct-values + "Fetch a sequence of distinct values for FIELD that are below the `total-max-length` threshold. + If the values are past the threshold, this returns `nil`." + [field] + (let [values ((resolve 'metabase.db.metadata-queries/field-distinct-values) field)] + (when (values-less-than-total-max-length? values) + values))) + + +(defn create-or-update-field-values! + "Create or update the FieldValues object for FIELD." + [field & [human-readable-values]] + (let [field-values (FieldValues :field_id (u/get-id field)) + values (distinct-values field) + field-name (or (:name field) (:id field))] + (cond + ;; if the FieldValues object already exists then update values in it + (and field-values values) + (do + (log/debug (format "Updating FieldValues for Field %s..." field-name)) + (db/update! FieldValues (u/get-id field-values) + :values values)) + ;; if FieldValues object doesn't exist create one + values + (do + (log/debug (format "Creating FieldValues for Field %s..." field-name)) + (db/insert! FieldValues + :field_id (u/get-id field) + :values values + :human_readable_values human-readable-values)) + ;; otherwise this Field isn't eligible, so delete any FieldValues that might exist + :else + (do + (log/debug (format "Values for field %s exceed the total max length threshold." field-name)) + (db/delete! FieldValues :field_id (u/get-id field)))))) + (defn field-values->pairs "Returns a list of pairs (or single element vectors if there are no @@ -76,7 +108,7 @@ {:pre [(integer? field-id)]} (when (field-should-have-field-values? field) (or (FieldValues :field_id field-id) - (create-field-values! field human-readable-values)))) + (create-or-update-field-values! field human-readable-values)))) (defn save-field-values! "Save the `FieldValues` for FIELD-ID, creating them if needed, otherwise updating them." diff --git a/src/metabase/models/permissions_group.clj b/src/metabase/models/permissions_group.clj index a146b5a9559..34a9f0f635b 100644 --- a/src/metabase/models/permissions_group.clj +++ b/src/metabase/models/permissions_group.clj @@ -41,7 +41,7 @@ (defn exists-with-name? "Does a `PermissionsGroup` with GROUP-NAME exist in the DB? (case-insensitive)" ^Boolean [group-name] - {:pre [(u/string-or-keyword? group-name)]} + {:pre [((some-fn keyword? string?) group-name)]} (db/exists? PermissionsGroup :%lower.name (s/lower-case (name group-name)))) diff --git a/src/metabase/sync_database/analyze.clj b/src/metabase/sync_database/analyze.clj index 8881ba24d65..d6f5345d340 100644 --- a/src/metabase/sync_database/analyze.clj +++ b/src/metabase/sync_database/analyze.clj @@ -20,18 +20,6 @@ "Fields that have at least this percent of values that are valid URLs should be given a special type of `:type/URL`." 0.95) -(def ^:private ^:const ^Integer low-cardinality-threshold - "Fields with less than this many distinct values should automatically be given a special type of `:type/Category`." - 300) - -(def ^:private ^:const ^Integer field-values-entry-max-length - "The maximum character length for a stored `FieldValues` entry." - 100) - -(def ^:private ^:const ^Integer field-values-total-max-length - "Maximum total length for a FieldValues entry (combined length of all values for the field)." - (* low-cardinality-threshold field-values-entry-max-length)) - (def ^:private ^:const ^Integer average-length-no-preview-threshold "Fields whose values' average length is greater than this amount should be marked as `preview_display = false`." 50) @@ -57,17 +45,17 @@ (not (= (:base_type field) :type/*))))) (defn- field-values-below-low-cardinality-threshold? [non-nil-values] - (and (<= (count non-nil-values) low-cardinality-threshold) + (and (<= (count non-nil-values) field-values/low-cardinality-threshold) ;; very simple check to see if total length of field-values exceeds (total values * max per value) (let [total-length (reduce + (map (comp count str) non-nil-values))] - (<= total-length field-values-total-max-length)))) + (<= total-length field-values/total-max-length)))) (defn test:cardinality-and-extract-field-values "Extract field-values for FIELD. If number of values exceeds `low-cardinality-threshold` then we return an empty set of values." [field field-stats] ;; TODO: we need some way of marking a field as not allowing field-values so that we can skip this work if it's not appropriate ;; for example, :type/Category fields with more than MAX values don't need to be rescanned all the time - (let [non-nil-values (filter identity (queries/field-distinct-values field (inc low-cardinality-threshold))) + (let [non-nil-values (filter identity (queries/field-distinct-values field (inc field-values/low-cardinality-threshold))) ;; only return the list if we didn't exceed our MAX values and if the the total character count of our values is reasable (#2332) distinct-values (when (field-values-below-low-cardinality-threshold? non-nil-values) non-nil-values)] diff --git a/src/metabase/task/sync_databases.clj b/src/metabase/task/sync_databases.clj index 9fd4816d2b5..8a776d3aeb1 100644 --- a/src/metabase/task/sync_databases.clj +++ b/src/metabase/task/sync_databases.clj @@ -23,12 +23,11 @@ (doseq [database (db/select Database, :is_sample false)] ; skip Sample Dataset DB (try ;; NOTE: this happens synchronously for now to avoid excessive load if there are lots of databases - (if-not (and (zero? (t/hour (t/now))) - (driver/driver-supports? (driver/engine->driver (:engine database)) :dynamic-schema)) - ;; most of the time we do a quick sync and avoid the lengthy analysis process - (sync-database/sync-database! database :full-sync? false) - ;; at midnight we run the full sync - (sync-database/sync-database! database :full-sync? true)) + ;; most of the time we do a quick sync and avoid the lengthy analysis process + ;; at midnight we run the full sync + (let [full-sync? (not (and (zero? (t/hour (t/now))) + (driver/driver-supports? (driver/engine->driver (:engine database)) :dynamic-schema)))] + (sync-database/sync-database! database :full-sync? full-sync?)) (catch Throwable e (log/error (format "Error syncing database %d: " (:id database)) e))))) diff --git a/src/metabase/util.clj b/src/metabase/util.clj index 36e10258a69..85ee617e1bc 100644 --- a/src/metabase/util.clj +++ b/src/metabase/util.clj @@ -758,12 +758,6 @@ `(do-with-auto-retries ~num-retries (fn [] ~@body))) -(defn string-or-keyword? - "Is X a `String` or a `Keyword`?" - [x] - (or (string? x) - (keyword? x))) - (defn key-by "Convert a sequential COLL to a map of `(f item)` -> `item`. This is similar to `group-by`, but the resultant map's values are single items from COLL rather than sequences of items. diff --git a/test/metabase/api/table_test.clj b/test/metabase/api/table_test.clj index b0971920df5..987eb1fdd4c 100644 --- a/test/metabase/api/table_test.clj +++ b/test/metabase/api/table_test.clj @@ -15,11 +15,12 @@ [permissions-group :as perms-group] [table :refer [Table]]] [metabase.test - [data :as data :refer :all] + [data :as data] [util :as tu :refer [match-$ resolve-private-vars]]] [metabase.test.data [dataset-definitions :as defs] - [users :refer :all]] + [users :refer [user->client]]] + [toucan.hydrate :as hydrate] [toucan.db :as db] [toucan.util.test :as tt])) @@ -31,13 +32,13 @@ ;; authentication test on every single individual endpoint (expect (get middleware/response-unauthentic :body) (http/client :get 401 "table")) -(expect (get middleware/response-unauthentic :body) (http/client :get 401 (format "table/%d" (id :users)))) +(expect (get middleware/response-unauthentic :body) (http/client :get 401 (format "table/%d" (data/id :users)))) ;; Helper Fns (defn- db-details [] - (match-$ (db) + (match-$ (data/db) {:created_at $ :engine "h2" :id $ @@ -60,7 +61,7 @@ :db (db-details) :entity_name nil :active true - :db_id (id) + :db_id (data/id) :segments [] :metrics []}) @@ -79,24 +80,24 @@ ;; ## GET /api/table ;; These should come back in alphabetical order and include relevant metadata (expect - #{{:name (format-name "categories") + #{{:name (data/format-name "categories") :display_name "Categories" :rows 75 - :id (id :categories)} - {:name (format-name "checkins") + :id (data/id :categories)} + {:name (data/format-name "checkins") :display_name "Checkins" :rows 1000 - :id (id :checkins)} - {:name (format-name "users") + :id (data/id :checkins)} + {:name (data/format-name "users") :display_name "Users" :rows 15 - :id (id :users)} - {:name (format-name "venues") + :id (data/id :users)} + {:name (data/format-name "venues") :display_name "Venues" :rows 100 - :id (id :venues)}} + :id (data/id :venues)}} (->> ((user->client :rasta) :get 200 "table") - (filter #(= (:db_id %) (id))) ; prevent stray tables from affecting unit test results + (filter #(= (:db_id %) (data/id))) ; prevent stray tables from affecting unit test results (map #(dissoc % :raw_table_id :db :created_at :updated_at :schema :entity_name :description :entity_type :visibility_type :caveats :points_of_interest :show_in_getting_started :db_id :active)) @@ -106,18 +107,18 @@ ;; ## GET /api/table/:id (expect (merge (dissoc (table-defaults) :segments :field_values :metrics) - (match-$ (Table (id :venues)) + (match-$ (Table (data/id :venues)) {:schema "PUBLIC" :name "VENUES" :display_name "Venues" :rows 100 :updated_at $ :pk_field (pk-field-id $$) - :id (id :venues) - :db_id (id) + :id (data/id :venues) + :db_id (data/id) :raw_table_id $ :created_at $})) - ((user->client :rasta) :get 200 (format "table/%d" (id :venues)))) + ((user->client :rasta) :get 200 (format "table/%d" (data/id :venues)))) ;; GET /api/table/:id should return a 403 for a user that doesn't have read permissions for the table (tt/expect-with-temp [Database [{database-id :id}] @@ -130,12 +131,12 @@ ;; ## GET /api/table/:id/query_metadata (expect (merge (table-defaults) - (match-$ (Table (id :categories)) + (match-$ (Table (data/id :categories)) {:schema "PUBLIC" :name "CATEGORIES" :display_name "Categories" - :fields (let [defaults (assoc field-defaults :table_id (id :categories))] - [(merge defaults (match-$ (Field (id :categories :id)) + :fields (let [defaults (assoc field-defaults :table_id (data/id :categories))] + [(merge defaults (match-$ (Field (data/id :categories :id)) {:special_type "type/PK" :name "ID" :display_name "ID" @@ -149,7 +150,7 @@ :last_analyzed $ :dimensions [] :values []})) - (merge defaults (match-$ (Field (id :categories :name)) + (merge defaults (match-$ (Field (data/id :categories :name)) {:special_type "type/Name" :name "NAME" :display_name "Name" @@ -161,14 +162,14 @@ :fk_target_field_id $ :raw_column_id $ :last_analyzed $ - :values venue-categories + :values data/venue-categories :dimensions []}))]) :rows 75 :updated_at $ - :id (id :categories) + :id (data/id :categories) :raw_table_id $ :created_at $})) - ((user->client :rasta) :get 200 (format "table/%d/query_metadata" (id :categories)))) + ((user->client :rasta) :get 200 (format "table/%d/query_metadata" (data/id :categories)))) (def ^:private user-last-login-date-strs @@ -192,12 +193,12 @@ ;;; Make sure that getting the User table *does* include info about the password field, but not actual values themselves (expect (merge (table-defaults) - (match-$ (Table (id :users)) + (match-$ (Table (data/id :users)) {:schema "PUBLIC" :name "USERS" :display_name "Users" - :fields (let [defaults (assoc field-defaults :table_id (id :users))] - [(merge defaults (match-$ (Field (id :users :id)) + :fields (let [defaults (assoc field-defaults :table_id (data/id :users))] + [(merge defaults (match-$ (Field (data/id :users :id)) {:special_type "type/PK" :name "ID" :display_name "ID" @@ -211,7 +212,7 @@ :last_analyzed $ :dimensions [] :values []})) - (merge defaults (match-$ (Field (id :users :last_login)) + (merge defaults (match-$ (Field (data/id :users :last_login)) {:special_type nil :name "LAST_LOGIN" :display_name "Last Login" @@ -225,7 +226,7 @@ :last_analyzed $ :dimensions [] :values []})) - (merge defaults (match-$ (Field (id :users :name)) + (merge defaults (match-$ (Field (data/id :users :name)) {:special_type "type/Name" :name "NAME" :display_name "Name" @@ -239,7 +240,7 @@ :last_analyzed $ :dimensions [] :values (map vector (sort user-full-names))})) - (merge defaults (match-$ (Field :table_id (id :users), :name "PASSWORD") + (merge defaults (match-$ (Field :table_id (data/id :users), :name "PASSWORD") {:special_type "type/Category" :name "PASSWORD" :display_name "Password" @@ -255,21 +256,21 @@ :values []}))]) :rows 15 :updated_at $ - :id (id :users) + :id (data/id :users) :raw_table_id $ :created_at $})) - ((user->client :rasta) :get 200 (format "table/%d/query_metadata?include_sensitive_fields=true" (id :users)))) + ((user->client :rasta) :get 200 (format "table/%d/query_metadata?include_sensitive_fields=true" (data/id :users)))) ;;; GET api/table/:id/query_metadata ;;; Make sure that getting the User table does *not* include password info (expect (merge (table-defaults) - (match-$ (Table (id :users)) + (match-$ (Table (data/id :users)) {:schema "PUBLIC" :name "USERS" :display_name "Users" - :fields (let [defaults (assoc field-defaults :table_id (id :users))] - [(merge defaults (match-$ (Field (id :users :id)) + :fields (let [defaults (assoc field-defaults :table_id (data/id :users))] + [(merge defaults (match-$ (Field (data/id :users :id)) {:special_type "type/PK" :name "ID" :display_name "ID" @@ -282,7 +283,7 @@ :last_analyzed $ :dimensions [] :values []})) - (merge defaults (match-$ (Field (id :users :last_login)) + (merge defaults (match-$ (Field (data/id :users :last_login)) {:special_type nil :name "LAST_LOGIN" :display_name "Last Login" @@ -295,7 +296,7 @@ :last_analyzed $ :dimensions [] :values []})) - (merge defaults (match-$ (Field (id :users :name)) + (merge defaults (match-$ (Field (data/id :users :name)) {:special_type "type/Name" :name "NAME" :display_name "Name" @@ -324,10 +325,10 @@ ["Szymon Theutrich"]]}))]) :rows 15 :updated_at $ - :id (id :users) + :id (data/id :users) :raw_table_id $ :created_at $})) - ((user->client :rasta) :get 200 (format "table/%d/query_metadata" (id :users)))) + ((user->client :rasta) :get 200 (format "table/%d/query_metadata" (data/id :users)))) ;; Check that FK fields belonging to Tables we don't have permissions for don't come back as hydrated `:target`(#3867) (expect @@ -396,8 +397,8 @@ ;; ## GET /api/table/:id/fks ;; We expect a single FK from CHECKINS.USER_ID -> USERS.ID (expect - (let [checkins-user-field (Field (id :checkins :user_id)) - users-id-field (Field (id :users :id))] + (let [checkins-user-field (Field (data/id :checkins :user_id)) + users-id-field (Field (data/id :users :id))] [{:origin_id (:id checkins-user-field) :destination_id (:id users-id-field) :relationship "Mt1" @@ -417,7 +418,7 @@ :updated_at $ :last_analyzed $ :table (merge (dissoc (table-defaults) :segments :field_values :metrics) - (match-$ (Table (id :checkins)) + (match-$ (Table (data/id :checkins)) {:schema "PUBLIC" :name "CHECKINS" :display_name "Checkins" @@ -442,7 +443,7 @@ :updated_at $ :last_analyzed $ :table (merge (dissoc (table-defaults) :db :segments :field_values :metrics) - (match-$ (Table (id :users)) + (match-$ (Table (data/id :users)) {:schema "PUBLIC" :name "USERS" :display_name "Users" @@ -451,7 +452,7 @@ :id $ :raw_table_id $ :created_at $}))}))}]) - ((user->client :rasta) :get 200 (format "table/%d/fks" (id :users)))) + ((user->client :rasta) :get 200 (format "table/%d/fks" (data/id :users)))) ;; Make sure metadata for 'virtual' tables comes back as expected from GET /api/table/:id/query_metadata (tt/expect-with-temp [Card [card {:name "Go Dubs!" @@ -502,72 +503,72 @@ set to type/Category. This function will change that for category_id, then invoke `F` and roll it back afterwards" [special-type f] - (let [original-special-type (:special_type (Field (id :venues :category_id)))] + (let [original-special-type (:special_type (Field (data/id :venues :category_id)))] (try - (db/update! Field (id :venues :category_id) {:special_type special-type}) + (db/update! Field (data/id :venues :category_id) {:special_type special-type}) (f) (finally - (db/update! Field (id :venues :category_id) {:special_type original-special-type}))))) + (db/update! Field (data/id :venues :category_id) {:special_type original-special-type}))))) ;; ## GET /api/table/:id/query_metadata ;; Ensure internal remapped dimensions and human_readable_values are returned (expect - [{:table_id (id :venues) - :id (id :venues :category_id) + [{:table_id (data/id :venues) + :id (data/id :venues :category_id) :name "CATEGORY_ID" - :values (map-indexed (fn [idx [category]] [idx category]) venue-categories) - :dimensions {:name "Foo", :field_id (id :venues :category_id), :human_readable_field_id nil, :type "internal"}} - {:id (id :venues :price) - :table_id (id :venues) + :values (map-indexed (fn [idx [category]] [idx category]) data/venue-categories) + :dimensions {:name "Foo", :field_id (data/id :venues :category_id), :human_readable_field_id nil, :type "internal"}} + {:id (data/id :venues :price) + :table_id (data/id :venues) :name "PRICE" :values [[1] [2] [3] [4]] :dimensions []}] - (with-data - (create-venue-category-remapping "Foo") + (data/with-data + (data/create-venue-category-remapping "Foo") (category-id-special-type :type/Category (fn [] (narrow-fields ["PRICE" "CATEGORY_ID"] - ((user->client :rasta) :get 200 (format "table/%d/query_metadata" (id :venues)))))))) + ((user->client :rasta) :get 200 (format "table/%d/query_metadata" (data/id :venues)))))))) ;; ## GET /api/table/:id/query_metadata ;; Ensure internal remapped dimensions and human_readable_values are returned when type is enum (expect - [{:table_id (id :venues) - :id (id :venues :category_id) + [{:table_id (data/id :venues) + :id (data/id :venues :category_id) :name "CATEGORY_ID" - :values (map-indexed (fn [idx [category]] [idx category]) venue-categories) - :dimensions {:name "Foo", :field_id (id :venues :category_id), :human_readable_field_id nil, :type "internal"}} - {:id (id :venues :price) - :table_id (id :venues) + :values (map-indexed (fn [idx [category]] [idx category]) data/venue-categories) + :dimensions {:name "Foo", :field_id (data/id :venues :category_id), :human_readable_field_id nil, :type "internal"}} + {:id (data/id :venues :price) + :table_id (data/id :venues) :name "PRICE" :values [[1] [2] [3] [4]] :dimensions []}] - (with-data - (create-venue-category-remapping "Foo") + (data/with-data + (data/create-venue-category-remapping "Foo") (category-id-special-type :type/Enum (fn [] (narrow-fields ["PRICE" "CATEGORY_ID"] - ((user->client :rasta) :get 200 (format "table/%d/query_metadata" (id :venues)))))))) + ((user->client :rasta) :get 200 (format "table/%d/query_metadata" (data/id :venues)))))))) ;; ## GET /api/table/:id/query_metadata ;; Ensure FK remappings are returned (expect - [{:table_id (id :venues) - :id (id :venues :category_id) + [{:table_id (data/id :venues) + :id (data/id :venues :category_id) :name "CATEGORY_ID" :values [] - :dimensions {:name "Foo", :field_id (id :venues :category_id), :human_readable_field_id (id :categories :name), :type "external"}} - {:id (id :venues :price) - :table_id (id :venues) + :dimensions {:name "Foo", :field_id (data/id :venues :category_id), :human_readable_field_id (data/id :categories :name), :type "external"}} + {:id (data/id :venues :price) + :table_id (data/id :venues) :name "PRICE" :values [[1] [2] [3] [4]] :dimensions []}] - (with-data - (create-venue-category-fk-remapping "Foo") + (data/with-data + (data/create-venue-category-fk-remapping "Foo") (category-id-special-type :type/Category (fn [] (narrow-fields ["PRICE" "CATEGORY_ID"] - ((user->client :rasta) :get 200 (format "table/%d/query_metadata" (id :venues)))))))) + ((user->client :rasta) :get 200 (format "table/%d/query_metadata" (data/id :venues)))))))) diff --git a/test/metabase/sync_database/analyze_test.clj b/test/metabase/sync_database/analyze_test.clj index fe57c4d943a..0a10399d1ed 100644 --- a/test/metabase/sync_database/analyze_test.clj +++ b/test/metabase/sync_database/analyze_test.clj @@ -83,11 +83,15 @@ (expect false (values-are-valid-emails? ["true"])) (expect false (values-are-valid-emails? ["false"])) -;; Tests to avoid analyzing hidden tables + +;;; +------------------------------------------------------------------------------------------------------------------------+ +;;; | Tests to avoid analyzing hidden tables | +;;; +------------------------------------------------------------------------------------------------------------------------+ + (defn- unanalyzed-fields-count [table] - (assert (pos? ;; don't let ourselves be fooled if the test passes because the table is - ;; totally broken or has no fields. Make sure we actually test something - (db/count Field :table_id (u/get-id table)))) + ;; don't let ourselves be fooled if the test passes because the table is + ;; totally broken or has no fields. Make sure we actually test something + (assert (pos? (db/count Field :table_id (u/get-id table)))) (db/count Field :last_analyzed nil, :table_id (u/get-id table))) (defn- latest-sync-time [table] @@ -96,13 +100,18 @@ :table_id (u/get-id table) {:order-by [[:last_analyzed :desc]]})) -(defn- set-table-visibility-type! [table visibility-type] +(defn- set-table-visibility-type! + "Change the VISIBILITY-TYPE of TABLE via an API call. + (This is done via the API so we can see which, if any, side effects (e.g. analysis) get triggered.)" + [table visibility-type] ((user->client :crowberto) :put 200 (format "table/%d" (:id table)) {:display_name "hiddentable" :entity_type "person" :visibility_type visibility-type :description "What a nice table!"})) -(defn- api-sync! [table] +(defn- api-sync! + "Trigger a sync of TABLE via the API." + [table] ((user->client :crowberto) :post 200 (format "database/%d/sync" (:db_id table)))) (defn- analyze! [table] diff --git a/test/metabase/sync_database_test.clj b/test/metabase/sync_database_test.clj index 7a8c4628e52..c5f91b43856 100644 --- a/test/metabase/sync_database_test.clj +++ b/test/metabase/sync_database_test.clj @@ -11,7 +11,7 @@ [metabase.models [database :refer [Database]] [field :refer [Field]] - [field-values :refer [FieldValues]] + [field-values :as field-values :refer [FieldValues]] [raw-table :refer [RawTable]] [table :refer [Table]]] metabase.sync-database.analyze @@ -38,32 +38,41 @@ {:name "name" :base-type :type/Text}}}}) + +;; TODO - I'm 90% sure we could just reüse the "MovieDB" instead of having this subset of it used here (defrecord SyncTestDriver [] clojure.lang.Named (getName [_] "SyncTestDriver")) + +(defn- describe-database [& _] + {:tables (set (for [table (vals sync-test-tables)] + (dissoc table :fields)))}) + +(defn- describe-table [_ _ table] + (get sync-test-tables (:name table))) + +(defn- describe-table-fks [_ _ table] + (set (when (= "movie" (:name table)) + #{{:fk-column-name "studio" + :dest-table {:name "studio" + :schema nil} + :dest-column-name "studio"}}))) + (extend SyncTestDriver driver/IDriver (merge driver/IDriverDefaultsMixin - {:analyze-table (constantly nil) - :describe-database (constantly {:tables (set (for [table (vals sync-test-tables)] - (dissoc table :fields)))}) - :describe-table (fn [_ _ table] - (get sync-test-tables (:name table))) - :describe-table-fks (fn [_ _ table] - (if (= "movie" (:name table)) - #{{:fk-column-name "studio" - :dest-table {:name "studio" - :schema nil} - :dest-column-name "studio"}} - #{})) - :features (constantly #{:foreign-keys}) - :details-fields (constantly [])})) + {:analyze-table (constantly nil) + :describe-database describe-database + :describe-table describe-table + :describe-table-fks describe-table-fks + :features (constantly #{:foreign-keys}) + :details-fields (constantly []) + :field-values-lazy-seq (constantly [])})) -(driver/register-driver! :sync-test (SyncTestDriver.)) +(driver/register-driver! :sync-test (SyncTestDriver.)) -(def ^:private venues-table (delay (Table (id :venues)))) (defn- table-details [table] (into {} (-> (dissoc table :db :pk_field :field_values) @@ -251,14 +260,14 @@ (do (db/update! Field (id :venues :id), :special_type nil) (get-special-type)) ;; Calling sync-table! should set the special type again - (do (sync-table! @venues-table) + (do (sync-table! (Table (id :venues))) (get-special-type)) ;; sync-table! should *not* change the special type of fields that are marked with a different type (do (db/update! Field (id :venues :id), :special_type :type/Latitude) (get-special-type)) ;; Make sure that sync-table runs set-table-pks-if-needed! (do (db/update! Field (id :venues :id), :special_type nil) - (sync-table! @venues-table) + (sync-table! (Table (id :venues))) (get-special-type))])) ;; ## FK SYNCING @@ -308,7 +317,7 @@ (do (db/delete! FieldValues :id (get-field-values-id)) (get-field-values)) ;; 3. Now re-sync the table and make sure they're back - (do (sync-table! @venues-table) + (do (sync-table! (Table (id :venues))) (get-field-values))]) ;; Test that syncing will cause FieldValues to be updated @@ -322,11 +331,12 @@ (do (db/update! FieldValues (get-field-values-id), :values [1 2 3]) (get-field-values)) ;; 3. Now re-sync the table and make sure the value is back - (do (sync-table! @venues-table) + (do (sync-table! (Table (id :venues))) (get-field-values))])) -;;; -------------------- Make sure that if a Field's cardinality passes `metabase.sync-database.analyze/low-cardinality-threshold` (currently 300) (#3215) -------------------- +;; Make sure that if a Field's cardinality passes `low-cardinality-threshold` (currently 300) +;; the corresponding FieldValues entry will be deleted (#3215) (defn- insert-range-sql [rang] (str "INSERT INTO blueberries_consumed (num) VALUES " (str/join ", " (for [n rang] @@ -337,20 +347,19 @@ (let [details {:db (str "mem:" (tu/random-name) ";DB_CLOSE_DELAY=10")}] (binding [mdb/*allow-potentailly-unsafe-connections* true] (tt/with-temp Database [db {:engine :h2, :details details}] - (let [driver (driver/engine->driver :h2) - spec (sql/connection-details->spec driver details) - exec! #(doseq [statement %] - (jdbc/execute! spec [statement]))] - ;; create the `blueberries_consumed` table and insert a 100 values - (exec! ["CREATE TABLE blueberries_consumed (num INTEGER NOT NULL);" - (insert-range-sql (range 100))]) - (sync-database! db, :full-sync? true) - (let [table-id (db/select-one-id Table :db_id (u/get-id db)) - field-id (db/select-one-id Field :table_id table-id)] - ;; field values should exist... - (assert (= (count (db/select-one-field :values FieldValues :field_id field-id)) - 100)) - ;; ok, now insert enough rows to push the field past the `low-cardinality-threshold` and sync again, there should be no more field values - (exec! [(insert-range-sql (range 100 (+ 100 @(resolve 'metabase.sync-database.analyze/low-cardinality-threshold))))]) + (jdbc/with-db-connection [conn (sql/connection-details->spec (driver/engine->driver :h2) details)] + (let [exec! #(doseq [statement %] + (jdbc/execute! conn [statement]))] + ;; create the `blueberries_consumed` table and insert a 100 values + (exec! ["CREATE TABLE blueberries_consumed (num INTEGER NOT NULL);" + (insert-range-sql (range 100))]) (sync-database! db, :full-sync? true) - (db/exists? FieldValues :field_id field-id))))))) + (let [table-id (db/select-one-id Table :db_id (u/get-id db)) + field-id (db/select-one-id Field :table_id table-id)] + ;; field values should exist... + (assert (= (count (db/select-one-field :values FieldValues :field_id field-id)) + 100)) + ;; ok, now insert enough rows to push the field past the `low-cardinality-threshold` and sync again, there should be no more field values + (exec! [(insert-range-sql (range 100 (+ 100 field-values/low-cardinality-threshold)))]) + (sync-database! db) + (db/exists? FieldValues :field_id field-id)))))))) diff --git a/test/metabase/test/data.clj b/test/metabase/test/data.clj index 377fa8bde6c..ed29f95d3f0 100644 --- a/test/metabase/test/data.clj +++ b/test/metabase/test/data.clj @@ -131,7 +131,7 @@ (i/format-name *driver* (name nm))) (defn- get-table-id-or-explode [db-id table-name] - {:pre [(integer? db-id) (u/string-or-keyword? table-name)]} + {:pre [(integer? db-id) ((some-fn keyword? string?) table-name)]} (let [table-name (format-name table-name)] (or (db/select-one-id Table, :db_id db-id, :name table-name) (db/select-one-id Table, :db_id db-id, :name (i/db-qualified-table-name (db/select-one-field :name Database :id db-id) table-name)) diff --git a/test/metabase/test/mock/moviedb.clj b/test/metabase/test/mock/moviedb.clj index cf011e4d95f..2e297afc108 100644 --- a/test/metabase/test/mock/moviedb.clj +++ b/test/metabase/test/mock/moviedb.clj @@ -4,77 +4,96 @@ (def ^:private ^:const moviedb-tables - {"movies" {:name "movies" - :schema nil - :fields #{{:name "id" - :base-type :type/Integer} - {:name "title" - :base-type :type/Text} - {:name "filming" - :base-type :type/Boolean}}} - "actors" {:name "actors" - :schema nil - :fields #{{:name "id" - :base-type :type/Integer} - {:name "name" - :base-type :type/Text}}} - "roles" {:name "roles" - :schema nil - :fields #{{:name "id" - :base-type :type/Integer} - {:name "movie_id" - :base-type :type/Integer} - {:name "actor_id" - :base-type :type/Integer} - {:name "character" - :base-type :type/Text} - {:name "salary" - :base-type :type/Decimal}} - :fks #{{:fk-column-name "movie_id" - :dest-table {:name "movies" - :schema nil} - :dest-column-name "id"} - {:fk-column-name "actor_id" - :dest-table {:name "actors" - :schema nil} - :dest-column-name "id"}}} - "reviews" {:name "reviews" - :schema nil - :fields #{{:name "id" - :base-type :type/Integer} - {:name "movie_id" - :base-type :type/Integer} - {:name "stars" - :base-type :type/Integer}} - :fks #{{:fk-column-name "movie_id" - :dest-table {:name "movies" - :schema nil} - :dest-column-name "id"}}}}) + {"movies" + {:name "movies" + :schema nil + :fields #{{:name "id" + :base-type :type/Integer} + {:name "title" + :base-type :type/Text} + {:name "filming" + :base-type :type/Boolean}}} + + "actors" + {:name "actors" + :schema nil + :fields #{{:name "id" + :base-type :type/Integer} + {:name "name" + :base-type :type/Text}}} + + "roles" + {:name "roles" + :schema nil + :fields #{{:name "id" + :base-type :type/Integer} + {:name "movie_id" + :base-type :type/Integer} + {:name "actor_id" + :base-type :type/Integer} + {:name "character" + :base-type :type/Text} + {:name "salary" + :base-type :type/Decimal}} + :fks #{{:fk-column-name "movie_id" + :dest-table {:name "movies" + :schema nil} + :dest-column-name "id"} + {:fk-column-name "actor_id" + :dest-table {:name "actors" + :schema nil} + :dest-column-name "id"}}} + + "reviews" + {:name "reviews" + :schema nil + :fields #{{:name "id" + :base-type :type/Integer} + {:name "movie_id" + :base-type :type/Integer} + {:name "stars" + :base-type :type/Integer}} + :fks #{{:fk-column-name "movie_id" + :dest-table {:name "movies" + :schema nil} + :dest-column-name "id"}}}}) + (defrecord MovieDbDriver [] clojure.lang.Named (getName [_] "MovieDbDriver")) + +(defn- describe-database [_ {:keys [exclude-tables]}] + (let [tables (for [table (vals moviedb-tables) + :when (not (contains? exclude-tables (:name table)))] + (select-keys table [:schema :name]))] + {:tables (set tables)})) + +(defn- describe-table [_ _ table] + (-> (get moviedb-tables (:name table)) + (dissoc :fks))) + +(defn- describe-table-fks [_ _ table] + (-> (get moviedb-tables (:name table)) + :fks + set)) + +(defn- table-rows-seq [_ _ table] + [{:keypath "movies.filming.description", :value "If the movie is currently being filmed."} + {:keypath "movies.description", :value "A cinematic adventure."}]) + + (extend MovieDbDriver driver/IDriver (merge driver/IDriverDefaultsMixin {:analyze-table (constantly nil) - :describe-database (fn [_ {:keys [exclude-tables]}] - (let [tables (for [table (vals moviedb-tables) - :when (not (contains? exclude-tables (:name table)))] - (select-keys table [:schema :name]))] - {:tables (set tables)})) - :describe-table (fn [_ _ table] - (-> (get moviedb-tables (:name table)) - (dissoc :fks))) - :describe-table-fks (fn [_ _ table] - (-> (get moviedb-tables (:name table)) - :fks - set)) + :describe-database describe-database + :describe-table describe-table + :describe-table-fks describe-table-fks :features (constantly #{:foreign-keys}) :details-fields (constantly []) - :table-rows-seq (constantly [{:keypath "movies.filming.description", :value "If the movie is currently being filmed."} - {:keypath "movies.description", :value "A cinematic adventure."}])})) + :table-rows-seq table-rows-seq})) (driver/register-driver! :moviedb (MovieDbDriver.)) -- GitLab