From f49e32a6d6e79937aa942a9f245a3db027f75ffb Mon Sep 17 00:00:00 2001 From: Ryan Senior <ryan@metabase.com> Date: Wed, 27 Jun 2018 17:31:29 -0500 Subject: [PATCH] Store fingerprints and special types for native cards This commit stores fingerprints and special type information for the results of cards that have native queries, similar to what we store for MBQL queries. --- src/metabase/api/card.clj | 11 +- .../middleware/expand_macros.clj | 1 - .../middleware/results_metadata.clj | 46 +------- src/metabase/query_processor/util.clj | 2 +- .../sync/analyze/classifiers/name.clj | 24 +++- src/metabase/sync/analyze/classify.clj | 2 +- src/metabase/sync/analyze/fingerprint.clj | 10 +- src/metabase/sync/analyze/query_results.clj | 84 +++++++++++++ test/metabase/api/card_test.clj | 18 +-- test/metabase/api/table_test.clj | 74 +++++++++--- .../middleware/results_metadata_test.clj | 56 ++++++--- .../sync/analyze/classifiers/name_test.clj | 4 +- .../sync/analyze/fingerprint_test.clj | 2 +- .../sync/analyze/query_results_test.clj | 111 ++++++++++++++++++ .../sync/sync_metadata/fields_test.clj | 11 +- test/metabase/test/mock/util.clj | 16 +++ test/metabase/test/util.clj | 16 +++ 17 files changed, 376 insertions(+), 112 deletions(-) create mode 100644 src/metabase/sync/analyze/query_results.clj create mode 100644 test/metabase/sync/analyze/query_results_test.clj diff --git a/src/metabase/api/card.clj b/src/metabase/api/card.clj index a4b1456352c..55c24658661 100644 --- a/src/metabase/api/card.clj +++ b/src/metabase/api/card.clj @@ -32,6 +32,7 @@ [metabase.query-processor.middleware [cache :as cache] [results-metadata :as results-metadata]] + [metabase.sync.analyze.query-results :as qr] [metabase.util.schema :as su] [puppetlabs.i18n.core :refer [trs]] [schema.core :as s] @@ -180,7 +181,7 @@ ;; we'll also pass a simple checksum and have the frontend pass it back to us. See the QP `results-metadata` ;; middleware namespace for more details -(s/defn ^:private result-metadata-for-query :- results-metadata/ResultsMetadata +(s/defn ^:private result-metadata-for-query :- qr/ResultsMetadata "Fetch the results metadata for a QUERY by running the query and seeing what the QP gives us in return. This is obviously a bit wasteful so hopefully we can avoid having to do this." [query] @@ -191,12 +192,12 @@ (u/pprint-to-str 'red results)) (get-in results [:data :results_metadata :columns]))))) -(s/defn ^:private result-metadata :- (s/maybe results-metadata/ResultsMetadata) +(s/defn ^:private result-metadata :- (s/maybe qr/ResultsMetadata) "Get the right results metadata for this CARD. We'll check to see whether the METADATA passed in seems valid; otherwise we'll run the query ourselves to get the right values." [query metadata checksum] (let [valid-metadata? (and (results-metadata/valid-checksum? metadata checksum) - (s/validate results-metadata/ResultsMetadata metadata))] + (s/validate qr/ResultsMetadata metadata))] (log/info (str "Card results metadata passed in to API is " (cond valid-metadata? "VALID. Thanks!" @@ -216,7 +217,7 @@ visualization_settings su/Map collection_id (s/maybe su/IntGreaterThanZero) collection_position (s/maybe su/IntGreaterThanZero) - result_metadata (s/maybe results-metadata/ResultsMetadata) + result_metadata (s/maybe qr/ResultsMetadata) metadata_checksum (s/maybe su/NonBlankString)} ;; check that we have permissions to run the query that we're trying to save (api/check-403 (perms/set-has-full-permissions-for-set? @api/*current-user-permissions-set* @@ -395,7 +396,7 @@ embedding_params (s/maybe su/EmbeddingParams) collection_id (s/maybe su/IntGreaterThanZero) collection_position (s/maybe su/IntGreaterThanZero) - result_metadata (s/maybe results-metadata/ResultsMetadata) + result_metadata (s/maybe qr/ResultsMetadata) metadata_checksum (s/maybe su/NonBlankString)} (let [card-before-update (api/write-check Card id)] ;; Do various permissions checks diff --git a/src/metabase/query_processor/middleware/expand_macros.clj b/src/metabase/query_processor/middleware/expand_macros.clj index aa7f74ddc40..d4365fa7d25 100644 --- a/src/metabase/query_processor/middleware/expand_macros.clj +++ b/src/metabase/query_processor/middleware/expand_macros.clj @@ -8,7 +8,6 @@ TODO - this namespace is ancient and written with MBQL '95 in mind, e.g. it is case-sensitive. At some point this ought to be reworked to be case-insensitive and cleaned up." (:require [clojure.tools.logging :as log] - [clojure.walk :as walk] [metabase.models [metric :refer [Metric]] [segment :refer [Segment]]] diff --git a/src/metabase/query_processor/middleware/results_metadata.clj b/src/metabase/query_processor/middleware/results_metadata.clj index 76110b33ed0..68457283c6f 100644 --- a/src/metabase/query_processor/middleware/results_metadata.clj +++ b/src/metabase/query_processor/middleware/results_metadata.clj @@ -5,53 +5,13 @@ (:require [buddy.core.hash :as hash] [cheshire.core :as json] [clojure.tools.logging :as log] - [metabase.models.humanization :as humanization] [metabase.query-processor.interface :as i] - [metabase.sync.interface :as si] + [metabase.sync.analyze.query-results :as qr] [metabase.util :as u] - [metabase.util - [encryption :as encryption] - [schema :as su]] + [metabase.util.encryption :as encryption] [ring.util.codec :as codec] - [schema.core :as s] [toucan.db :as db])) -(def ^:private DateTimeUnitKeywordOrString - "Schema for a valid datetime unit string like \"default\" or \"minute-of-hour\"." - (s/constrained su/KeywordOrString - (fn [unit] - (contains? i/datetime-field-units (keyword unit))) - "Valid field datetime unit keyword or string")) - -(def ResultsMetadata - "Schema for valid values of the `result_metadata` column." - (su/with-api-error-message (s/named [{:name su/NonBlankString - :display_name su/NonBlankString - (s/optional-key :description) (s/maybe su/NonBlankString) - :base_type su/FieldTypeKeywordOrString - (s/optional-key :special_type) (s/maybe su/FieldTypeKeywordOrString) - (s/optional-key :unit) (s/maybe DateTimeUnitKeywordOrString) - (s/optional-key :fingerprint) (s/maybe si/Fingerprint)}] - "Valid array of results column metadata maps") - "value must be an array of valid results column metadata maps.")) - -(s/defn ^:private results->column-metadata :- (s/maybe ResultsMetadata) - "Return the desired storage format for the column metadata coming back from RESULTS, or `nil` if no columns were returned." - [results] - ;; rarely certain queries will return columns with no names, for example `SELECT COUNT(*)` in SQL Server seems to come back with no name - ;; since we can't use those as field literals in subsequent queries just filter them out - (seq (for [col (:cols results) - :when (seq (:name col))] - (merge - ;; if base-type isn't set put a default one in there. Similarly just use humanized value of `:name` for `:display_name` if one isn't set - {:base_type :type/* - :display_name (humanization/name->human-readable-name (name (:name col)))} - (u/select-non-nil-keys col [:name :display_name :description :base_type :special_type :unit :fingerprint]) - ;; since years are actually returned as text they can't be used for breakout purposes so don't advertise them as DateTime columns - (when (= (:unit col) :year) - {:base_type :type/Text - :unit nil}))))) - ;; TODO - is there some way we could avoid doing this every single time a Card is ran? Perhaps by passing the current Card ;; metadata as part of the query context so we can compare for changes (defn- record-metadata! [card-id metadata] @@ -90,7 +50,7 @@ (fn [{{:keys [card-id nested?]} :info, :as query}] (let [results (qp query)] (try - (let [metadata (results->column-metadata results)] + (let [metadata (seq (qr/results->column-metadata results))] ;; At the very least we can skip the Extra DB call to update this Card's metadata results ;; if its DB doesn't support nested queries in the first place (when (i/driver-supports? :nested-queries) diff --git a/src/metabase/query_processor/util.clj b/src/metabase/query_processor/util.clj index fedcc1671da..7ae656f8d86 100644 --- a/src/metabase/query_processor/util.clj +++ b/src/metabase/query_processor/util.clj @@ -144,7 +144,7 @@ ;;; ---------------------------------------- General Tree Manipulation Helpers --------------------------------------- (defn postwalk-pred - "Transform `form` by applying `f` to a node in the tree when that `pred` returns true for that node" + "Transform `form` by applying `f` to each node where `pred` returns true" [pred f form] (walk/postwalk (fn [node] (if (pred node) diff --git a/src/metabase/sync/analyze/classifiers/name.clj b/src/metabase/sync/analyze/classifiers/name.clj index 8d326bc9b3c..3d23a6e13ce 100644 --- a/src/metabase/sync/analyze/classifiers/name.clj +++ b/src/metabase/sync/analyze/classifiers/name.clj @@ -121,17 +121,29 @@ special-type)) pattern+base-types+special-type))) -(s/defn infer-special-type :- (s/maybe i/FieldInstance) +(def ^:private FieldOrColumn + "Schema that allows a `metabase.model.field/Field` or a column from a query resultset" + {:name su/NonBlankString + :base_type s/Keyword + (s/optional-key :special_type) (s/maybe s/Keyword) + s/Any s/Any}) + +(s/defn infer-special-type :- (s/maybe s/Keyword) "Classifer that infers the special type of a FIELD based on its name and base type." - [field :- i/FieldInstance, _ :- (s/maybe i/Fingerprint)] + [field-or-column :- FieldOrColumn] ;; Don't overwrite keys, else we're ok with overwriting as a new more precise type might have ;; been added. - (when (not-any? (partial isa? (:special_type field)) [:type/PK :type/FK]) - (when-let [inferred-special-type (special-type-for-name-and-base-type (:name field) (:base_type field))] + (when (not-any? (partial isa? (:special_type field-or-column)) [:type/PK :type/FK]) + (special-type-for-name-and-base-type (:name field-or-column) (:base_type field-or-column)))) + +(s/defn infer-and-assoc-special-type :- (s/maybe FieldOrColumn) + "Returns `field-or-column` with a computed special type based on the name and base type of the `field-or-column`" + [field-or-column :- FieldOrColumn, _ :- (s/maybe i/Fingerprint)] + (when-let [inferred-special-type (infer-special-type field-or-column)] (log/debug (format "Based on the name of %s, we're giving it a special type of %s." - (sync-util/name-for-logging field) + (sync-util/name-for-logging field-or-column) inferred-special-type)) - (assoc field :special_type inferred-special-type)))) + (assoc field-or-column :special_type inferred-special-type))) (defn- prefix-or-postfix [s] diff --git a/src/metabase/sync/analyze/classify.clj b/src/metabase/sync/analyze/classify.clj index fa323457be2..8438a84a7a3 100644 --- a/src/metabase/sync/analyze/classify.clj +++ b/src/metabase/sync/analyze/classify.clj @@ -69,7 +69,7 @@ "Various classifier functions available. These should all take two args, a `FieldInstance` and a possibly `nil` `Fingerprint`, and return `FieldInstance` with any inferred property changes, or `nil` if none could be inferred. Order is important!" - [name/infer-special-type + [name/infer-and-assoc-special-type category/infer-is-category-or-list no-preview-display/infer-no-preview-display text-fingerprint/infer-special-type]) diff --git a/src/metabase/sync/analyze/fingerprint.clj b/src/metabase/sync/analyze/fingerprint.clj index 72de0c977b0..12f69a6565c 100644 --- a/src/metabase/sync/analyze/fingerprint.clj +++ b/src/metabase/sync/analyze/fingerprint.clj @@ -21,18 +21,22 @@ [schema.core :as s] [toucan.db :as db])) +(def ^:private FieldOrColumn + {:base_type s/Keyword + s/Any s/Any}) + (s/defn ^:private type-specific-fingerprint :- (s/maybe i/TypeSpecificFingerprint) "Return type-specific fingerprint info for FIELD AND. a FieldSample of Values if it has an elligible base type" - [field :- i/FieldInstance, values :- i/FieldSample] + [field :- FieldOrColumn, values :- i/FieldSample] (condp #(isa? %2 %1) (:base_type field) :type/Text {:type/Text (text/text-fingerprint values)} :type/Number {:type/Number (number/number-fingerprint values)} :type/DateTime {:type/DateTime (datetime/datetime-fingerprint values)} nil)) -(s/defn ^:private fingerprint :- i/Fingerprint +(s/defn fingerprint :- i/Fingerprint "Generate a 'fingerprint' from a FieldSample of VALUES." - [field :- i/FieldInstance, values :- i/FieldSample] + [field :- FieldOrColumn, values :- i/FieldSample] (merge (when-let [global-fingerprint (global/global-fingerprint values)] {:global global-fingerprint}) diff --git a/src/metabase/sync/analyze/query_results.clj b/src/metabase/sync/analyze/query_results.clj new file mode 100644 index 00000000000..a36e7cc743e --- /dev/null +++ b/src/metabase/sync/analyze/query_results.clj @@ -0,0 +1,84 @@ +(ns metabase.sync.analyze.query-results + "Analysis similar to what we do as part of the Sync process, but aimed at analyzing and introspecting query + results. The current focus of this namespace is around column metadata from the results of a query. Going forward + this is likely to extend beyond just metadata about columns but also about the query results as a whole and over + time." + (:require [metabase.models.humanization :as humanization] + [metabase.query-processor.interface :as i] + [metabase.sync.analyze.classifiers.name :as classify-name] + [metabase.sync.analyze.fingerprint :as f] + [metabase.sync.interface :as si] + [metabase.util :as u] + [metabase.util.schema :as su] + [schema.core :as s])) + +(def ^:private DateTimeUnitKeywordOrString + "Schema for a valid datetime unit string like \"default\" or \"minute-of-hour\"." + (s/constrained su/KeywordOrString + (fn [unit] + (contains? i/datetime-field-units (keyword unit))) + "Valid field datetime unit keyword or string")) + +(def ^:private ResultColumnMetadata + "Result metadata for a single column" + {:name su/NonBlankString + :display_name su/NonBlankString + (s/optional-key :description) (s/maybe su/NonBlankString) + :base_type su/FieldTypeKeywordOrString + (s/optional-key :special_type) (s/maybe su/FieldTypeKeywordOrString) + (s/optional-key :unit) (s/maybe DateTimeUnitKeywordOrString) + (s/optional-key :fingerprint) (s/maybe si/Fingerprint)}) + +(def ResultsMetadata + "Schema for valid values of the `result_metadata` column." + (su/with-api-error-message (s/named [ResultColumnMetadata] + "Valid array of results column metadata maps") + "value must be an array of valid results column metadata maps.")) + +(s/defn ^:private maybe-infer-special-type :- ResultColumnMetadata + "Infer the special type and add it to the result metadata. If the inferred special type is nil, don't override the + special type with a nil special type" + [result-metadata col] + (update result-metadata :special_type (fn [original-value] + (or (classify-name/infer-special-type col) + original-value)))) + +(s/defn ^:private maybe-compute-fingerprint :- ResultColumnMetadata + "If we already have a fingerprint from our stored metadata, use that. Queries that we don't have fingerprint data + on, such as native queries, we'll need to run the fingerprint code similar to what sync does." + [result-metadata col results-for-column] + (if-let [values (and (not (contains? result-metadata :fingerprint)) + (seq (remove nil? results-for-column)))] + (assoc result-metadata :fingerprint (f/fingerprint col values)) + result-metadata)) + +(s/defn ^:private stored-column-metadata->result-column-metadata :- ResultColumnMetadata + "The metadata in the column of our resultsets come from the metadata we store in the `Field` associated with the + column. It is cheapest and easiest to just use that. This function takes what it can from the column metadata to + populate the ResultColumnMetadata" + [column] + (merge + ;; if base-type isn't set put a default one in there. Similarly just use humanized value of `:name` for `:display_name` if one isn't set + {:base_type :type/* + :display_name (humanization/name->human-readable-name (name (:name column)))} + (u/select-non-nil-keys column [:name :display_name :description :base_type :special_type :unit :fingerprint]) + ;; since years are actually returned as text they can't be used for breakout purposes so don't advertise them as DateTime columns + (when (= (:unit column) :year) + {:base_type :type/Text + :unit nil}))) + +(s/defn results->column-metadata :- ResultsMetadata + "Return the desired storage format for the column metadata coming back from RESULTS, or `nil` if no columns were returned." + [results] + ;; rarely certain queries will return columns with no names, for example `SELECT COUNT(*)` in SQL Server seems to come back with no name + ;; since we can't use those as field literals in subsequent queries just filter them out + (for [[index col] (map-indexed vector (:cols results)) + :when (seq (:name col)) + ;; TODO: This is really inefficient. This will make one pass over the data for each column returned, which + ;; could be bad. The fingerprinting code expects to have a list of values to fingerprint, rather than more of + ;; an accumulator style that will take one value at a time as we make a single pass over the data. Once we can + ;; get the fingerprint code changed to support this, we should apply that change here + :let [results-for-column (map #(nth % index) (:rows results))]] + (-> (stored-column-metadata->result-column-metadata col) + (maybe-infer-special-type col) + (maybe-compute-fingerprint col results-for-column)))) diff --git a/test/metabase/api/card_test.clj b/test/metabase/api/card_test.clj index 3c8d18d77dd..51d864714e3 100644 --- a/test/metabase/api/card_test.clj +++ b/test/metabase/api/card_test.clj @@ -320,11 +320,13 @@ [{:base_type "type/Integer" :display_name "count" :name "count" - :special_type "type/Number"}] - (let [metadata [{:base_type :type/Integer - :display_name "Count Chocula" - :name "count_chocula" - :special_type :type/Number}] + :special_type "type/Quantity" + :fingerprint {:global {:distinct-count 1}, + :type {:type/Number {:min 100, :max 100, :avg 100.0}}}}] + (let [metadata [{:base_type :type/Integer + :display_name "Count Chocula" + :name "count_chocula" + :special_type :type/Quantity}] card-name (tu/random-name)] (tt/with-temp Collection [collection] (perms/grant-collection-readwrite-permissions! (perms-group/all-users) collection) @@ -521,11 +523,13 @@ [{:base_type "type/Integer" :display_name "count" :name "count" - :special_type "type/Number"}] + :special_type "type/Quantity" + :fingerprint {:global {:distinct-count 1}, + :type {:type/Number {:min 100, :max 100, :avg 100.0}}}}] (let [metadata [{:base_type :type/Integer :display_name "Count Chocula" :name "count_chocula" - :special_type :type/Number}]] + :special_type :type/Quantity}]] (tt/with-temp Card [card] (with-cards-in-writeable-collection card ;; update the Card's query diff --git a/test/metabase/api/table_test.clj b/test/metabase/api/table_test.clj index 573b4bad625..75bf74efde5 100644 --- a/test/metabase/api/table_test.clj +++ b/test/metabase/api/table_test.clj @@ -18,6 +18,7 @@ [permissions :as perms] [permissions-group :as perms-group] [table :as table :refer [Table]]] + [metabase.query-processor.util :as qputil] [metabase.test [data :as data] [util :as tu :refer [match-$]]] @@ -25,6 +26,7 @@ [dataset-definitions :as defs] [datasets :as datasets] [users :refer [user->client]]] + [metabase.test.mock.util :as mutil] [metabase.timeseries-query-processor-test.util :as tqpt] [toucan [db :as db] @@ -437,6 +439,25 @@ :fields_hash $}))))}]) ((user->client :rasta) :get 200 (format "table/%d/fks" (data/id :users)))) +(defn- with-field-literal-id [{field-name :name, base-type :base_type :as field}] + (assoc field :id ["field-literal" field-name base-type])) + +(defn- default-card-field-for-venues [table-id] + {:table_id table-id + :special_type nil + :default_dimension_option nil + :dimension_options []}) + +(defn- with-numeric-dimension-options [field] + (assoc field + :default_dimension_option (var-get #'table-api/numeric-default-index) + :dimension_options (var-get #'table-api/numeric-dimension-indexes))) + +(defn- with-coordinate-dimension-options [field] + (assoc field + :default_dimension_option (var-get #'table-api/coordinate-default-index) + :dimension_options (var-get #'table-api/coordinate-dimension-indexes))) + ;; Make sure metadata for 'virtual' tables comes back as expected from GET /api/table/:id/query_metadata (tt/expect-with-temp [Card [card {:name "Go Dubs!" :database_id (data/id) @@ -450,23 +471,36 @@ :id card-virtual-table-id :description nil :dimension_options (default-dimension-options) - :fields (for [[field-name display-name base-type] [["NAME" "Name" "type/Text"] - ["ID" "ID" "type/Integer"] - ["PRICE" "Price" "type/Integer"] - ["LATITUDE" "Latitude" "type/Float"]]] - {:name field-name - :display_name display-name - :base_type base-type - :table_id card-virtual-table-id - :id ["field-literal" field-name base-type] - :special_type nil - :default_dimension_option nil - :dimension_options []})}) + :fields (map (comp #(merge (default-card-field-for-venues card-virtual-table-id) %) + with-field-literal-id) + [{:name "NAME" + :display_name "Name" + :base_type "type/Text" + :special_type "type/Name" + :fingerprint (:name mutil/venue-fingerprints)} + (with-numeric-dimension-options + {:name "ID" + :display_name "ID" + :base_type "type/Integer" + :special_type nil + :fingerprint (:id mutil/venue-fingerprints)}) + (with-numeric-dimension-options + {:name "PRICE" + :display_name "Price" + :base_type "type/Integer" + :special_type nil + :fingerprint (:price mutil/venue-fingerprints)}) + (with-coordinate-dimension-options + {:name "LATITUDE" + :display_name "Latitude" + :base_type "type/Float" + :special_type "type/Latitude" + :fingerprint (:latitude mutil/venue-fingerprints)})])}) (do ;; run the Card which will populate its result_metadata column ((user->client :crowberto) :post 200 (format "card/%d/query" (u/get-id card))) ;; Now fetch the metadata for this "table" - ((user->client :crowberto) :get 200 (format "table/card__%d/query_metadata" (u/get-id card))))) + (tu/round-all-decimals 2 ((user->client :crowberto) :get 200 (format "table/card__%d/query_metadata" (u/get-id card)))))) ;; Test date dimensions being included with a nested query (tt/expect-with-temp [Card [card {:name "Users" @@ -486,9 +520,12 @@ :base_type "type/Text" :table_id card-virtual-table-id :id ["field-literal" "NAME" "type/Text"] - :special_type nil + :special_type "type/Name" :default_dimension_option nil - :dimension_options []} + :dimension_options [] + :fingerprint {:global {:distinct-count 15}, + :type {:type/Text {:percent-json 0.0, :percent-url 0.0, + :percent-email 0.0, :average-length 13.27}}}} {:name "LAST_LOGIN" :display_name "Last Login" :base_type "type/DateTime" @@ -496,12 +533,15 @@ :id ["field-literal" "LAST_LOGIN" "type/DateTime"] :special_type nil :default_dimension_option (var-get #'table-api/date-default-index) - :dimension_options (var-get #'table-api/datetime-dimension-indexes)}]}) + :dimension_options (var-get #'table-api/datetime-dimension-indexes) + :fingerprint {:global {:distinct-count 15}, + :type {:type/DateTime {:earliest "2014-01-01T08:30:00.000Z", + :latest "2014-12-05T15:15:00.000Z"}}}}]}) (do ;; run the Card which will populate its result_metadata column ((user->client :crowberto) :post 200 (format "card/%d/query" (u/get-id card))) ;; Now fetch the metadata for this "table" - ((user->client :crowberto) :get 200 (format "table/card__%d/query_metadata" (u/get-id card))))) + (tu/round-all-decimals 2 ((user->client :crowberto) :get 200 (format "table/card__%d/query_metadata" (u/get-id card)))))) ;; make sure GET /api/table/:id/fks just returns nothing for 'virtual' tables diff --git a/test/metabase/query_processor/middleware/results_metadata_test.clj b/test/metabase/query_processor/middleware/results_metadata_test.clj index e681b7e3186..cb97580d3b5 100644 --- a/test/metabase/query_processor/middleware/results_metadata_test.clj +++ b/test/metabase/query_processor/middleware/results_metadata_test.clj @@ -10,8 +10,11 @@ [permissions :as perms] [permissions-group :as group]] [metabase.query-processor.middleware.results-metadata :as results-metadata] - [metabase.test.data :as data] + [metabase.test + [data :as data] + [util :as tu]] [metabase.test.data.users :as users] + [metabase.test.mock.util :as mutil] [toucan.db :as db] [toucan.util.test :as tt])) @@ -23,19 +26,33 @@ (defn- card-metadata [card] (db/select-one-field :result_metadata Card :id (u/get-id card))) +(defn- round-all-decimals' + "Defaults `tu/round-all-decimals` to 2 digits" + [data] + (tu/round-all-decimals 2 data)) + +(def ^:private default-card-results + [{:name "ID", :display_name "ID", :base_type "type/Integer", + :special_type "type/PK", :fingerprint (:id mutil/venue-fingerprints)} + {:name "NAME", :display_name "Name", :base_type "type/Text", + :special_type "type/Name", :fingerprint (:name mutil/venue-fingerprints)} + {:name "PRICE", :display_name "Price", :base_type "type/Integer", + :special_type nil, :fingerprint (:price mutil/venue-fingerprints)} + {:name "CATEGORY_ID", :display_name "Category ID", :base_type "type/Integer", + :special_type nil, :fingerprint (:category_id mutil/venue-fingerprints)} + {:name "LATITUDE", :display_name "Latitude", :base_type "type/Float", + :special_type "type/Latitude", :fingerprint (:latitude mutil/venue-fingerprints)} + {:name "LONGITUDE", :display_name "Longitude", :base_type "type/Float" + :special_type "type/Longitude", :fingerprint (:longitude mutil/venue-fingerprints)}]) + ;; test that Card result metadata is saved after running a Card (expect - [{:name "ID", :display_name "ID", :base_type "type/Integer"} - {:name "NAME", :display_name "Name", :base_type "type/Text"} - {:name "PRICE", :display_name "Price", :base_type "type/Integer"} - {:name "CATEGORY_ID", :display_name "Category ID", :base_type "type/Integer"} - {:name "LATITUDE", :display_name "Latitude", :base_type "type/Float"} - {:name "LONGITUDE", :display_name "Longitude", :base_type "type/Float"}] + default-card-results (tt/with-temp Card [card] (qp/process-query (assoc (native-query "SELECT ID, NAME, PRICE, CATEGORY_ID, LATITUDE, LONGITUDE FROM VENUES") :info {:card-id (u/get-id card) :query-hash (byte-array 0)})) - (card-metadata card))) + (round-all-decimals' (card-metadata card)))) ;; check that using a Card as your source doesn't overwrite the results metadata... (expect @@ -73,17 +90,17 @@ ;; make sure that queries come back with metadata (expect {:checksum java.lang.String - :columns [{:base_type :type/Integer, :display_name "ID", :name "ID"} - {:base_type :type/Text, :display_name "Name", :name "NAME"} - {:base_type :type/Integer, :display_name "Price", :name "PRICE"} - {:base_type :type/Integer, :display_name "Category ID", :name "CATEGORY_ID"} - {:base_type :type/Float, :display_name "Latitude", :name "LATITUDE"} - {:base_type :type/Float, :display_name "Longitude", :name "LONGITUDE"}]} + :columns (map (fn [col] + (-> col + (update :special_type keyword) + (update :base_type keyword))) + default-card-results)} (-> (qp/process-query {:database (data/id) :type :native :native {:query "SELECT ID, NAME, PRICE, CATEGORY_ID, LATITUDE, LONGITUDE FROM VENUES"}}) (get-in [:data :results_metadata]) - (update :checksum class))) + (update :checksum class) + round-all-decimals')) ;; make sure that a Card where a DateTime column is broken out by year advertises that column as Text, since you can't ;; do datetime breakouts on years @@ -92,12 +109,15 @@ :display_name "Date" :name "DATE" :unit nil + :special_type nil :fingerprint {:global {:distinct-count 618}, :type {:type/DateTime {:earliest "2013-01-03T00:00:00.000Z" - :latest "2015-12-29T00:00:00.000Z"}}}} + :latest "2015-12-29T00:00:00.000Z"}}}} {:base_type "type/Integer" :display_name "count" :name "count" - :special_type "type/Number"}] + :special_type "type/Quantity" + :fingerprint {:global {:distinct-count 3}, + :type {:type/Number {:min 235, :max 498, :avg 333.33}}}}] (tt/with-temp Card [card] (qp/process-query {:database (data/id) :type :query @@ -106,4 +126,4 @@ :breakout [[:datetime-field [:field-id (data/id :checkins :date)] :year]]} :info {:card-id (u/get-id card) :query-hash (byte-array 0)}}) - (card-metadata card))) + (round-all-decimals' (card-metadata card)))) diff --git a/test/metabase/sync/analyze/classifiers/name_test.clj b/test/metabase/sync/analyze/classifiers/name_test.clj index 6b244c08753..8741638cda4 100644 --- a/test/metabase/sync/analyze/classifiers/name_test.clj +++ b/test/metabase/sync/analyze/classifiers/name_test.clj @@ -37,7 +37,7 @@ :special_type :type/FK :name "City" :base_type :type/Text}]] - (-> field-id Field (infer-special-type nil) :special_type))) + (-> field-id Field (infer-and-assoc-special-type nil) :special_type))) ;; ... but overwrite other types to alow evolution of our type system (expect @@ -47,4 +47,4 @@ :special_type :type/Category :name "City" :base_type :type/Text}]] - (-> field-id Field (infer-special-type nil) :special_type))) + (-> field-id Field (infer-and-assoc-special-type nil) :special_type))) diff --git a/test/metabase/sync/analyze/fingerprint_test.clj b/test/metabase/sync/analyze/fingerprint_test.clj index c5e8bedf880..4c01bd60949 100644 --- a/test/metabase/sync/analyze/fingerprint_test.clj +++ b/test/metabase/sync/analyze/fingerprint_test.clj @@ -135,7 +135,7 @@ ;; Make sure that the above functions are used correctly to determine which Fields get (re-)fingerprinted (defn- field-was-fingerprinted? {:style/indent 0} [fingerprint-versions field-properties] (let [fingerprinted? (atom false) - fake-field (field/map->FieldInstance {:name "Fake Field"})] + fake-field (field/map->FieldInstance {:name "Fake Field", :base_type :type/Number})] (with-redefs [i/fingerprint-version->types-that-should-be-re-fingerprinted fingerprint-versions sample/sample-fields (constantly [[fake-field [1 2 3 4 5]]]) fingerprint/save-fingerprint! (fn [& _] (reset! fingerprinted? true))] diff --git a/test/metabase/sync/analyze/query_results_test.clj b/test/metabase/sync/analyze/query_results_test.clj new file mode 100644 index 00000000000..281a5c8357e --- /dev/null +++ b/test/metabase/sync/analyze/query_results_test.clj @@ -0,0 +1,111 @@ +(ns metabase.sync.analyze.query-results-test + (:require [clojure.string :as str] + [expectations :refer :all] + [metabase + [query-processor :as qp] + [util :as u]] + [metabase.models + [card :refer [Card]] + [database :as database]] + [metabase.sync.analyze + [fingerprint :as fprint] + [query-results :as qr :refer :all]] + [metabase.sync.analyze.classifiers.name :as classify-name] + [metabase.test + [data :as data] + [util :as tu]] + [metabase.test.mock.util :as mutil] + [toucan.util.test :as tt])) + +(defn- column->name-keyword [field-or-column-metadata] + (-> field-or-column-metadata + :name + str/lower-case + keyword)) + +(defn- name->fingerprints [field-or-metadata] + (zipmap (map column->name-keyword field-or-metadata) + (map :fingerprint field-or-metadata))) + +(defn- name->special-type [field-or-metadata] + (zipmap (map column->name-keyword field-or-metadata) + (map :special_type field-or-metadata))) + +(defn- query->result-metadata + [query-map] + (->> query-map + qp/process-query + :data + results->column-metadata + (tu/round-all-decimals 2))) + +(defn- query-for-card [card] + {:database database/virtual-id + :type :query + :query {:source-table (str "card__" (u/get-id card))}}) + +(def ^:private venue-name->special-types + {:id :type/PK, + :name :type/Name, + :price :type/Category, + :category_id :type/FK, + :latitude :type/Latitude, + :longitude :type/Longitude}) + +;; Getting the result metadata for a card backed by an MBQL query should use the fingerprints from the related fields +(expect + mutil/venue-fingerprints + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :query + :query {:source-table (data/id :venues)}}}] + (tu/throw-if-called fprint/fingerprint + (name->fingerprints + (query->result-metadata (query-for-card card)))))) + +;; Getting the result metadata for a card backed by an MBQL query should just use the special types from the related fields +(expect + venue-name->special-types + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :query + :query {:source-table (data/id :venues)}}}] + (name->special-type (query->result-metadata (query-for-card card))))) + +;; Native queries don't know what the associated Fields are for the results, we need to compute the fingerprints, but +;; they should sill be the same +(expect + mutil/venue-fingerprints + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :native + :native {:query "select * from venues"}}}] + (name->fingerprints + (query->result-metadata (query-for-card card))))) + +;; Similarly, check that we computed the correct special types. Note that we don't know that the category_id is an FK +;; as it's just an integer flowing through, similarly Price isn't found to be a category as we're inferring by name +;; only +(expect + (assoc venue-name->special-types :category_id nil, :price nil) + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :native + :native {:query "select * from venues"}}}] + (name->special-type + (query->result-metadata (query-for-card card))))) + +;; Limiting to just 1 column on an MBQL query should still get the result metadata from the Field +(expect + (select-keys mutil/venue-fingerprints [:longitude]) + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :query + :query {:source-table (data/id :venues)}}}] + (tu/throw-if-called fprint/fingerprint + (name->fingerprints + (query->result-metadata (assoc-in (query-for-card card) [:query :fields] (data/id :venues :longitude))))))) + +;; Similar query as above, just native so that we need to calculate the fingerprint +(expect + (select-keys mutil/venue-fingerprints [:longitude]) + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :native + :native {:query "select longitude from venues"}}}] + (name->fingerprints + (query->result-metadata (query-for-card card))))) diff --git a/test/metabase/sync/sync_metadata/fields_test.clj b/test/metabase/sync/sync_metadata/fields_test.clj index b029c4c51cb..4f9e3cb54a6 100644 --- a/test/metabase/sync/sync_metadata/fields_test.clj +++ b/test/metabase/sync/sync_metadata/fields_test.clj @@ -12,7 +12,9 @@ [field :refer [Field]] [table :refer [Table]]] [metabase.sync.util-test :as sut] - [metabase.test.data :as data] + [metabase.test + [data :as data] + [util :as tu]] [metabase.test.data.one-off-dbs :as one-off-dbs] [toucan [db :as db] @@ -181,11 +183,6 @@ (doseq [statement statements] (jdbc/execute! one-off-dbs/*conn* [statement]))) -(defmacro ^:private throw-if-called {:style/indent 1} [fn-var & body] - `(with-redefs [~fn-var (fn [& args#] - (throw (RuntimeException. "Should not be called!")))] - ~@body)) - ;; Validate the changing of a column's type triggers a hash miss and sync (expect [ ;; Original column type @@ -212,7 +209,7 @@ {new-db-type :database_type} (get-field)] ;; Syncing again with no change should not call sync-field-instances! or update the hash - (throw-if-called metabase.sync.sync-metadata.fields/sync-field-instances! + (tu/throw-if-called metabase.sync.sync-metadata.fields/sync-field-instances! (sync/sync-database! (data/db)) [old-db-type new-db-type diff --git a/test/metabase/test/mock/util.clj b/test/metabase/test/mock/util.clj index a3a5b77455e..8683f261574 100644 --- a/test/metabase/test/mock/util.clj +++ b/test/metabase/test/mock/util.clj @@ -43,6 +43,22 @@ :schedule_day nil :enabled true}) +(def venue-fingerprints + "Fingerprints for the full venues table" + {:name {:global {:distinct-count 100}, + :type {:type/Text {:percent-json 0.0, :percent-url 0.0, + :percent-email 0.0, :average-length 15.63}}} + :id {:global {:distinct-count 100}, + :type {:type/Number {:min 1, :max 100, :avg 50.5}}} + :price {:global {:distinct-count 4}, + :type {:type/Number {:min 1, :max 4, :avg 2.03}}} + :latitude {:global {:distinct-count 94}, + :type {:type/Number {:min 10.06, :max 40.78, :avg 35.51}}} + :category_id {:global {:distinct-count 28}, + :type {:type/Number {:min 2, :max 74, :avg 29.98}}} + :longitude {:global {:distinct-count 84}, + :type {:type/Number {:min -165.37, :max -73.95, :avg -116.0}}}}) + ;; This is just a fake implementation that just swoops in and returns somewhat-correct looking results for different ;; queries we know will get ran as part of sync (defn- is-table-row-count-query? [expanded-query] diff --git a/test/metabase/test/util.clj b/test/metabase/test/util.clj index a45f36610b4..006e5cf54a7 100644 --- a/test/metabase/test/util.clj +++ b/test/metabase/test/util.clj @@ -29,6 +29,7 @@ [setting :as setting] [table :refer [Table]] [user :refer [User]]] + [metabase.query-processor.util :as qputil] [metabase.query-processor.middleware.expand :as ql] [metabase.test.data :as data] [metabase.test.data @@ -379,6 +380,13 @@ [:cols])] (update-in query-results maybe-data-cols #(map round-fingerprint %)))) +(defn round-all-decimals + "Uses `walk/postwalk` to crawl `data`, looking for any double values, will round any it finds" + [decimal-place data] + (qputil/postwalk-pred double? + #(u/round-to-decimals decimal-place %) + data)) + ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | SCHEDULER | @@ -558,3 +566,11 @@ ;; This releases the fake query function so it finishes (deliver pause-query true) true)]))) + +(defmacro throw-if-called + "Redefines `fn-var` with a function that throws an exception if it's called" + {:style/indent 1} + [fn-var & body] + `(with-redefs [~fn-var (fn [& args#] + (throw (RuntimeException. "Should not be called!")))] + ~@body)) -- GitLab