diff --git a/src/metabase/api/card.clj b/src/metabase/api/card.clj index a4b1456352c611b7dbdca9d2663b922b8634d8fc..55c2465866110a34063e0dc1111f7bcf093e1146 100644 --- a/src/metabase/api/card.clj +++ b/src/metabase/api/card.clj @@ -32,6 +32,7 @@ [metabase.query-processor.middleware [cache :as cache] [results-metadata :as results-metadata]] + [metabase.sync.analyze.query-results :as qr] [metabase.util.schema :as su] [puppetlabs.i18n.core :refer [trs]] [schema.core :as s] @@ -180,7 +181,7 @@ ;; we'll also pass a simple checksum and have the frontend pass it back to us. See the QP `results-metadata` ;; middleware namespace for more details -(s/defn ^:private result-metadata-for-query :- results-metadata/ResultsMetadata +(s/defn ^:private result-metadata-for-query :- qr/ResultsMetadata "Fetch the results metadata for a QUERY by running the query and seeing what the QP gives us in return. This is obviously a bit wasteful so hopefully we can avoid having to do this." [query] @@ -191,12 +192,12 @@ (u/pprint-to-str 'red results)) (get-in results [:data :results_metadata :columns]))))) -(s/defn ^:private result-metadata :- (s/maybe results-metadata/ResultsMetadata) +(s/defn ^:private result-metadata :- (s/maybe qr/ResultsMetadata) "Get the right results metadata for this CARD. We'll check to see whether the METADATA passed in seems valid; otherwise we'll run the query ourselves to get the right values." [query metadata checksum] (let [valid-metadata? (and (results-metadata/valid-checksum? metadata checksum) - (s/validate results-metadata/ResultsMetadata metadata))] + (s/validate qr/ResultsMetadata metadata))] (log/info (str "Card results metadata passed in to API is " (cond valid-metadata? "VALID. Thanks!" @@ -216,7 +217,7 @@ visualization_settings su/Map collection_id (s/maybe su/IntGreaterThanZero) collection_position (s/maybe su/IntGreaterThanZero) - result_metadata (s/maybe results-metadata/ResultsMetadata) + result_metadata (s/maybe qr/ResultsMetadata) metadata_checksum (s/maybe su/NonBlankString)} ;; check that we have permissions to run the query that we're trying to save (api/check-403 (perms/set-has-full-permissions-for-set? @api/*current-user-permissions-set* @@ -395,7 +396,7 @@ embedding_params (s/maybe su/EmbeddingParams) collection_id (s/maybe su/IntGreaterThanZero) collection_position (s/maybe su/IntGreaterThanZero) - result_metadata (s/maybe results-metadata/ResultsMetadata) + result_metadata (s/maybe qr/ResultsMetadata) metadata_checksum (s/maybe su/NonBlankString)} (let [card-before-update (api/write-check Card id)] ;; Do various permissions checks diff --git a/src/metabase/query_processor/middleware/expand_macros.clj b/src/metabase/query_processor/middleware/expand_macros.clj index aa7f74ddc40ae06f9ea3da1177368d33c3b0ba19..d4365fa7d252632e1a8feb8c2315d73de698e5a6 100644 --- a/src/metabase/query_processor/middleware/expand_macros.clj +++ b/src/metabase/query_processor/middleware/expand_macros.clj @@ -8,7 +8,6 @@ TODO - this namespace is ancient and written with MBQL '95 in mind, e.g. it is case-sensitive. At some point this ought to be reworked to be case-insensitive and cleaned up." (:require [clojure.tools.logging :as log] - [clojure.walk :as walk] [metabase.models [metric :refer [Metric]] [segment :refer [Segment]]] diff --git a/src/metabase/query_processor/middleware/results_metadata.clj b/src/metabase/query_processor/middleware/results_metadata.clj index 76110b33ed0f40ab94d7794cb72f176131bada3b..68457283c6f9563cc3784ed8318e31fe83c4e670 100644 --- a/src/metabase/query_processor/middleware/results_metadata.clj +++ b/src/metabase/query_processor/middleware/results_metadata.clj @@ -5,53 +5,13 @@ (:require [buddy.core.hash :as hash] [cheshire.core :as json] [clojure.tools.logging :as log] - [metabase.models.humanization :as humanization] [metabase.query-processor.interface :as i] - [metabase.sync.interface :as si] + [metabase.sync.analyze.query-results :as qr] [metabase.util :as u] - [metabase.util - [encryption :as encryption] - [schema :as su]] + [metabase.util.encryption :as encryption] [ring.util.codec :as codec] - [schema.core :as s] [toucan.db :as db])) -(def ^:private DateTimeUnitKeywordOrString - "Schema for a valid datetime unit string like \"default\" or \"minute-of-hour\"." - (s/constrained su/KeywordOrString - (fn [unit] - (contains? i/datetime-field-units (keyword unit))) - "Valid field datetime unit keyword or string")) - -(def ResultsMetadata - "Schema for valid values of the `result_metadata` column." - (su/with-api-error-message (s/named [{:name su/NonBlankString - :display_name su/NonBlankString - (s/optional-key :description) (s/maybe su/NonBlankString) - :base_type su/FieldTypeKeywordOrString - (s/optional-key :special_type) (s/maybe su/FieldTypeKeywordOrString) - (s/optional-key :unit) (s/maybe DateTimeUnitKeywordOrString) - (s/optional-key :fingerprint) (s/maybe si/Fingerprint)}] - "Valid array of results column metadata maps") - "value must be an array of valid results column metadata maps.")) - -(s/defn ^:private results->column-metadata :- (s/maybe ResultsMetadata) - "Return the desired storage format for the column metadata coming back from RESULTS, or `nil` if no columns were returned." - [results] - ;; rarely certain queries will return columns with no names, for example `SELECT COUNT(*)` in SQL Server seems to come back with no name - ;; since we can't use those as field literals in subsequent queries just filter them out - (seq (for [col (:cols results) - :when (seq (:name col))] - (merge - ;; if base-type isn't set put a default one in there. Similarly just use humanized value of `:name` for `:display_name` if one isn't set - {:base_type :type/* - :display_name (humanization/name->human-readable-name (name (:name col)))} - (u/select-non-nil-keys col [:name :display_name :description :base_type :special_type :unit :fingerprint]) - ;; since years are actually returned as text they can't be used for breakout purposes so don't advertise them as DateTime columns - (when (= (:unit col) :year) - {:base_type :type/Text - :unit nil}))))) - ;; TODO - is there some way we could avoid doing this every single time a Card is ran? Perhaps by passing the current Card ;; metadata as part of the query context so we can compare for changes (defn- record-metadata! [card-id metadata] @@ -90,7 +50,7 @@ (fn [{{:keys [card-id nested?]} :info, :as query}] (let [results (qp query)] (try - (let [metadata (results->column-metadata results)] + (let [metadata (seq (qr/results->column-metadata results))] ;; At the very least we can skip the Extra DB call to update this Card's metadata results ;; if its DB doesn't support nested queries in the first place (when (i/driver-supports? :nested-queries) diff --git a/src/metabase/query_processor/util.clj b/src/metabase/query_processor/util.clj index fedcc1671da0f148f10a0cab651226fbccd14e0d..7ae656f8d8611bd846c6b5dce4736e4c1fef849c 100644 --- a/src/metabase/query_processor/util.clj +++ b/src/metabase/query_processor/util.clj @@ -144,7 +144,7 @@ ;;; ---------------------------------------- General Tree Manipulation Helpers --------------------------------------- (defn postwalk-pred - "Transform `form` by applying `f` to a node in the tree when that `pred` returns true for that node" + "Transform `form` by applying `f` to each node where `pred` returns true" [pred f form] (walk/postwalk (fn [node] (if (pred node) diff --git a/src/metabase/sync/analyze/classifiers/name.clj b/src/metabase/sync/analyze/classifiers/name.clj index 8d326bc9b3c5fdc9e75019da2f4b5341a6b42112..3d23a6e13ce61be37c92d0dddd2313e4aa1c776a 100644 --- a/src/metabase/sync/analyze/classifiers/name.clj +++ b/src/metabase/sync/analyze/classifiers/name.clj @@ -121,17 +121,29 @@ special-type)) pattern+base-types+special-type))) -(s/defn infer-special-type :- (s/maybe i/FieldInstance) +(def ^:private FieldOrColumn + "Schema that allows a `metabase.model.field/Field` or a column from a query resultset" + {:name su/NonBlankString + :base_type s/Keyword + (s/optional-key :special_type) (s/maybe s/Keyword) + s/Any s/Any}) + +(s/defn infer-special-type :- (s/maybe s/Keyword) "Classifer that infers the special type of a FIELD based on its name and base type." - [field :- i/FieldInstance, _ :- (s/maybe i/Fingerprint)] + [field-or-column :- FieldOrColumn] ;; Don't overwrite keys, else we're ok with overwriting as a new more precise type might have ;; been added. - (when (not-any? (partial isa? (:special_type field)) [:type/PK :type/FK]) - (when-let [inferred-special-type (special-type-for-name-and-base-type (:name field) (:base_type field))] + (when (not-any? (partial isa? (:special_type field-or-column)) [:type/PK :type/FK]) + (special-type-for-name-and-base-type (:name field-or-column) (:base_type field-or-column)))) + +(s/defn infer-and-assoc-special-type :- (s/maybe FieldOrColumn) + "Returns `field-or-column` with a computed special type based on the name and base type of the `field-or-column`" + [field-or-column :- FieldOrColumn, _ :- (s/maybe i/Fingerprint)] + (when-let [inferred-special-type (infer-special-type field-or-column)] (log/debug (format "Based on the name of %s, we're giving it a special type of %s." - (sync-util/name-for-logging field) + (sync-util/name-for-logging field-or-column) inferred-special-type)) - (assoc field :special_type inferred-special-type)))) + (assoc field-or-column :special_type inferred-special-type))) (defn- prefix-or-postfix [s] diff --git a/src/metabase/sync/analyze/classify.clj b/src/metabase/sync/analyze/classify.clj index fa323457be2b1bbe89b1f58d5bb0aa77ce527ff5..8438a84a7a34d2e5760acc53d22e2dbd24e9e8c3 100644 --- a/src/metabase/sync/analyze/classify.clj +++ b/src/metabase/sync/analyze/classify.clj @@ -69,7 +69,7 @@ "Various classifier functions available. These should all take two args, a `FieldInstance` and a possibly `nil` `Fingerprint`, and return `FieldInstance` with any inferred property changes, or `nil` if none could be inferred. Order is important!" - [name/infer-special-type + [name/infer-and-assoc-special-type category/infer-is-category-or-list no-preview-display/infer-no-preview-display text-fingerprint/infer-special-type]) diff --git a/src/metabase/sync/analyze/fingerprint.clj b/src/metabase/sync/analyze/fingerprint.clj index 72de0c977b040b0bcf8d2c0f876624c211fd49d7..12f69a6565c921339fa0b8f1ba502da0cbe69c6a 100644 --- a/src/metabase/sync/analyze/fingerprint.clj +++ b/src/metabase/sync/analyze/fingerprint.clj @@ -21,18 +21,22 @@ [schema.core :as s] [toucan.db :as db])) +(def ^:private FieldOrColumn + {:base_type s/Keyword + s/Any s/Any}) + (s/defn ^:private type-specific-fingerprint :- (s/maybe i/TypeSpecificFingerprint) "Return type-specific fingerprint info for FIELD AND. a FieldSample of Values if it has an elligible base type" - [field :- i/FieldInstance, values :- i/FieldSample] + [field :- FieldOrColumn, values :- i/FieldSample] (condp #(isa? %2 %1) (:base_type field) :type/Text {:type/Text (text/text-fingerprint values)} :type/Number {:type/Number (number/number-fingerprint values)} :type/DateTime {:type/DateTime (datetime/datetime-fingerprint values)} nil)) -(s/defn ^:private fingerprint :- i/Fingerprint +(s/defn fingerprint :- i/Fingerprint "Generate a 'fingerprint' from a FieldSample of VALUES." - [field :- i/FieldInstance, values :- i/FieldSample] + [field :- FieldOrColumn, values :- i/FieldSample] (merge (when-let [global-fingerprint (global/global-fingerprint values)] {:global global-fingerprint}) diff --git a/src/metabase/sync/analyze/query_results.clj b/src/metabase/sync/analyze/query_results.clj new file mode 100644 index 0000000000000000000000000000000000000000..a36e7cc743e6ae5aa47db53c534e2891fee215d7 --- /dev/null +++ b/src/metabase/sync/analyze/query_results.clj @@ -0,0 +1,84 @@ +(ns metabase.sync.analyze.query-results + "Analysis similar to what we do as part of the Sync process, but aimed at analyzing and introspecting query + results. The current focus of this namespace is around column metadata from the results of a query. Going forward + this is likely to extend beyond just metadata about columns but also about the query results as a whole and over + time." + (:require [metabase.models.humanization :as humanization] + [metabase.query-processor.interface :as i] + [metabase.sync.analyze.classifiers.name :as classify-name] + [metabase.sync.analyze.fingerprint :as f] + [metabase.sync.interface :as si] + [metabase.util :as u] + [metabase.util.schema :as su] + [schema.core :as s])) + +(def ^:private DateTimeUnitKeywordOrString + "Schema for a valid datetime unit string like \"default\" or \"minute-of-hour\"." + (s/constrained su/KeywordOrString + (fn [unit] + (contains? i/datetime-field-units (keyword unit))) + "Valid field datetime unit keyword or string")) + +(def ^:private ResultColumnMetadata + "Result metadata for a single column" + {:name su/NonBlankString + :display_name su/NonBlankString + (s/optional-key :description) (s/maybe su/NonBlankString) + :base_type su/FieldTypeKeywordOrString + (s/optional-key :special_type) (s/maybe su/FieldTypeKeywordOrString) + (s/optional-key :unit) (s/maybe DateTimeUnitKeywordOrString) + (s/optional-key :fingerprint) (s/maybe si/Fingerprint)}) + +(def ResultsMetadata + "Schema for valid values of the `result_metadata` column." + (su/with-api-error-message (s/named [ResultColumnMetadata] + "Valid array of results column metadata maps") + "value must be an array of valid results column metadata maps.")) + +(s/defn ^:private maybe-infer-special-type :- ResultColumnMetadata + "Infer the special type and add it to the result metadata. If the inferred special type is nil, don't override the + special type with a nil special type" + [result-metadata col] + (update result-metadata :special_type (fn [original-value] + (or (classify-name/infer-special-type col) + original-value)))) + +(s/defn ^:private maybe-compute-fingerprint :- ResultColumnMetadata + "If we already have a fingerprint from our stored metadata, use that. Queries that we don't have fingerprint data + on, such as native queries, we'll need to run the fingerprint code similar to what sync does." + [result-metadata col results-for-column] + (if-let [values (and (not (contains? result-metadata :fingerprint)) + (seq (remove nil? results-for-column)))] + (assoc result-metadata :fingerprint (f/fingerprint col values)) + result-metadata)) + +(s/defn ^:private stored-column-metadata->result-column-metadata :- ResultColumnMetadata + "The metadata in the column of our resultsets come from the metadata we store in the `Field` associated with the + column. It is cheapest and easiest to just use that. This function takes what it can from the column metadata to + populate the ResultColumnMetadata" + [column] + (merge + ;; if base-type isn't set put a default one in there. Similarly just use humanized value of `:name` for `:display_name` if one isn't set + {:base_type :type/* + :display_name (humanization/name->human-readable-name (name (:name column)))} + (u/select-non-nil-keys column [:name :display_name :description :base_type :special_type :unit :fingerprint]) + ;; since years are actually returned as text they can't be used for breakout purposes so don't advertise them as DateTime columns + (when (= (:unit column) :year) + {:base_type :type/Text + :unit nil}))) + +(s/defn results->column-metadata :- ResultsMetadata + "Return the desired storage format for the column metadata coming back from RESULTS, or `nil` if no columns were returned." + [results] + ;; rarely certain queries will return columns with no names, for example `SELECT COUNT(*)` in SQL Server seems to come back with no name + ;; since we can't use those as field literals in subsequent queries just filter them out + (for [[index col] (map-indexed vector (:cols results)) + :when (seq (:name col)) + ;; TODO: This is really inefficient. This will make one pass over the data for each column returned, which + ;; could be bad. The fingerprinting code expects to have a list of values to fingerprint, rather than more of + ;; an accumulator style that will take one value at a time as we make a single pass over the data. Once we can + ;; get the fingerprint code changed to support this, we should apply that change here + :let [results-for-column (map #(nth % index) (:rows results))]] + (-> (stored-column-metadata->result-column-metadata col) + (maybe-infer-special-type col) + (maybe-compute-fingerprint col results-for-column)))) diff --git a/test/metabase/api/card_test.clj b/test/metabase/api/card_test.clj index 3c8d18d77ddfc8cfdbecf6ab41be8dbb3aa2af63..51d864714e3d2b89959ac0930346f61497224669 100644 --- a/test/metabase/api/card_test.clj +++ b/test/metabase/api/card_test.clj @@ -320,11 +320,13 @@ [{:base_type "type/Integer" :display_name "count" :name "count" - :special_type "type/Number"}] - (let [metadata [{:base_type :type/Integer - :display_name "Count Chocula" - :name "count_chocula" - :special_type :type/Number}] + :special_type "type/Quantity" + :fingerprint {:global {:distinct-count 1}, + :type {:type/Number {:min 100, :max 100, :avg 100.0}}}}] + (let [metadata [{:base_type :type/Integer + :display_name "Count Chocula" + :name "count_chocula" + :special_type :type/Quantity}] card-name (tu/random-name)] (tt/with-temp Collection [collection] (perms/grant-collection-readwrite-permissions! (perms-group/all-users) collection) @@ -521,11 +523,13 @@ [{:base_type "type/Integer" :display_name "count" :name "count" - :special_type "type/Number"}] + :special_type "type/Quantity" + :fingerprint {:global {:distinct-count 1}, + :type {:type/Number {:min 100, :max 100, :avg 100.0}}}}] (let [metadata [{:base_type :type/Integer :display_name "Count Chocula" :name "count_chocula" - :special_type :type/Number}]] + :special_type :type/Quantity}]] (tt/with-temp Card [card] (with-cards-in-writeable-collection card ;; update the Card's query diff --git a/test/metabase/api/table_test.clj b/test/metabase/api/table_test.clj index 573b4bad625aa929e9bd3401fae35c71fc4fb5fe..75bf74efde56e6aa11eb1be9846bcd85fbad6db6 100644 --- a/test/metabase/api/table_test.clj +++ b/test/metabase/api/table_test.clj @@ -18,6 +18,7 @@ [permissions :as perms] [permissions-group :as perms-group] [table :as table :refer [Table]]] + [metabase.query-processor.util :as qputil] [metabase.test [data :as data] [util :as tu :refer [match-$]]] @@ -25,6 +26,7 @@ [dataset-definitions :as defs] [datasets :as datasets] [users :refer [user->client]]] + [metabase.test.mock.util :as mutil] [metabase.timeseries-query-processor-test.util :as tqpt] [toucan [db :as db] @@ -437,6 +439,25 @@ :fields_hash $}))))}]) ((user->client :rasta) :get 200 (format "table/%d/fks" (data/id :users)))) +(defn- with-field-literal-id [{field-name :name, base-type :base_type :as field}] + (assoc field :id ["field-literal" field-name base-type])) + +(defn- default-card-field-for-venues [table-id] + {:table_id table-id + :special_type nil + :default_dimension_option nil + :dimension_options []}) + +(defn- with-numeric-dimension-options [field] + (assoc field + :default_dimension_option (var-get #'table-api/numeric-default-index) + :dimension_options (var-get #'table-api/numeric-dimension-indexes))) + +(defn- with-coordinate-dimension-options [field] + (assoc field + :default_dimension_option (var-get #'table-api/coordinate-default-index) + :dimension_options (var-get #'table-api/coordinate-dimension-indexes))) + ;; Make sure metadata for 'virtual' tables comes back as expected from GET /api/table/:id/query_metadata (tt/expect-with-temp [Card [card {:name "Go Dubs!" :database_id (data/id) @@ -450,23 +471,36 @@ :id card-virtual-table-id :description nil :dimension_options (default-dimension-options) - :fields (for [[field-name display-name base-type] [["NAME" "Name" "type/Text"] - ["ID" "ID" "type/Integer"] - ["PRICE" "Price" "type/Integer"] - ["LATITUDE" "Latitude" "type/Float"]]] - {:name field-name - :display_name display-name - :base_type base-type - :table_id card-virtual-table-id - :id ["field-literal" field-name base-type] - :special_type nil - :default_dimension_option nil - :dimension_options []})}) + :fields (map (comp #(merge (default-card-field-for-venues card-virtual-table-id) %) + with-field-literal-id) + [{:name "NAME" + :display_name "Name" + :base_type "type/Text" + :special_type "type/Name" + :fingerprint (:name mutil/venue-fingerprints)} + (with-numeric-dimension-options + {:name "ID" + :display_name "ID" + :base_type "type/Integer" + :special_type nil + :fingerprint (:id mutil/venue-fingerprints)}) + (with-numeric-dimension-options + {:name "PRICE" + :display_name "Price" + :base_type "type/Integer" + :special_type nil + :fingerprint (:price mutil/venue-fingerprints)}) + (with-coordinate-dimension-options + {:name "LATITUDE" + :display_name "Latitude" + :base_type "type/Float" + :special_type "type/Latitude" + :fingerprint (:latitude mutil/venue-fingerprints)})])}) (do ;; run the Card which will populate its result_metadata column ((user->client :crowberto) :post 200 (format "card/%d/query" (u/get-id card))) ;; Now fetch the metadata for this "table" - ((user->client :crowberto) :get 200 (format "table/card__%d/query_metadata" (u/get-id card))))) + (tu/round-all-decimals 2 ((user->client :crowberto) :get 200 (format "table/card__%d/query_metadata" (u/get-id card)))))) ;; Test date dimensions being included with a nested query (tt/expect-with-temp [Card [card {:name "Users" @@ -486,9 +520,12 @@ :base_type "type/Text" :table_id card-virtual-table-id :id ["field-literal" "NAME" "type/Text"] - :special_type nil + :special_type "type/Name" :default_dimension_option nil - :dimension_options []} + :dimension_options [] + :fingerprint {:global {:distinct-count 15}, + :type {:type/Text {:percent-json 0.0, :percent-url 0.0, + :percent-email 0.0, :average-length 13.27}}}} {:name "LAST_LOGIN" :display_name "Last Login" :base_type "type/DateTime" @@ -496,12 +533,15 @@ :id ["field-literal" "LAST_LOGIN" "type/DateTime"] :special_type nil :default_dimension_option (var-get #'table-api/date-default-index) - :dimension_options (var-get #'table-api/datetime-dimension-indexes)}]}) + :dimension_options (var-get #'table-api/datetime-dimension-indexes) + :fingerprint {:global {:distinct-count 15}, + :type {:type/DateTime {:earliest "2014-01-01T08:30:00.000Z", + :latest "2014-12-05T15:15:00.000Z"}}}}]}) (do ;; run the Card which will populate its result_metadata column ((user->client :crowberto) :post 200 (format "card/%d/query" (u/get-id card))) ;; Now fetch the metadata for this "table" - ((user->client :crowberto) :get 200 (format "table/card__%d/query_metadata" (u/get-id card))))) + (tu/round-all-decimals 2 ((user->client :crowberto) :get 200 (format "table/card__%d/query_metadata" (u/get-id card)))))) ;; make sure GET /api/table/:id/fks just returns nothing for 'virtual' tables diff --git a/test/metabase/query_processor/middleware/results_metadata_test.clj b/test/metabase/query_processor/middleware/results_metadata_test.clj index e681b7e31866bf3b2480945e0737ef5b4bea3dc3..cb97580d3b5a4b6dd0abc88c493c7578337dfe88 100644 --- a/test/metabase/query_processor/middleware/results_metadata_test.clj +++ b/test/metabase/query_processor/middleware/results_metadata_test.clj @@ -10,8 +10,11 @@ [permissions :as perms] [permissions-group :as group]] [metabase.query-processor.middleware.results-metadata :as results-metadata] - [metabase.test.data :as data] + [metabase.test + [data :as data] + [util :as tu]] [metabase.test.data.users :as users] + [metabase.test.mock.util :as mutil] [toucan.db :as db] [toucan.util.test :as tt])) @@ -23,19 +26,33 @@ (defn- card-metadata [card] (db/select-one-field :result_metadata Card :id (u/get-id card))) +(defn- round-all-decimals' + "Defaults `tu/round-all-decimals` to 2 digits" + [data] + (tu/round-all-decimals 2 data)) + +(def ^:private default-card-results + [{:name "ID", :display_name "ID", :base_type "type/Integer", + :special_type "type/PK", :fingerprint (:id mutil/venue-fingerprints)} + {:name "NAME", :display_name "Name", :base_type "type/Text", + :special_type "type/Name", :fingerprint (:name mutil/venue-fingerprints)} + {:name "PRICE", :display_name "Price", :base_type "type/Integer", + :special_type nil, :fingerprint (:price mutil/venue-fingerprints)} + {:name "CATEGORY_ID", :display_name "Category ID", :base_type "type/Integer", + :special_type nil, :fingerprint (:category_id mutil/venue-fingerprints)} + {:name "LATITUDE", :display_name "Latitude", :base_type "type/Float", + :special_type "type/Latitude", :fingerprint (:latitude mutil/venue-fingerprints)} + {:name "LONGITUDE", :display_name "Longitude", :base_type "type/Float" + :special_type "type/Longitude", :fingerprint (:longitude mutil/venue-fingerprints)}]) + ;; test that Card result metadata is saved after running a Card (expect - [{:name "ID", :display_name "ID", :base_type "type/Integer"} - {:name "NAME", :display_name "Name", :base_type "type/Text"} - {:name "PRICE", :display_name "Price", :base_type "type/Integer"} - {:name "CATEGORY_ID", :display_name "Category ID", :base_type "type/Integer"} - {:name "LATITUDE", :display_name "Latitude", :base_type "type/Float"} - {:name "LONGITUDE", :display_name "Longitude", :base_type "type/Float"}] + default-card-results (tt/with-temp Card [card] (qp/process-query (assoc (native-query "SELECT ID, NAME, PRICE, CATEGORY_ID, LATITUDE, LONGITUDE FROM VENUES") :info {:card-id (u/get-id card) :query-hash (byte-array 0)})) - (card-metadata card))) + (round-all-decimals' (card-metadata card)))) ;; check that using a Card as your source doesn't overwrite the results metadata... (expect @@ -73,17 +90,17 @@ ;; make sure that queries come back with metadata (expect {:checksum java.lang.String - :columns [{:base_type :type/Integer, :display_name "ID", :name "ID"} - {:base_type :type/Text, :display_name "Name", :name "NAME"} - {:base_type :type/Integer, :display_name "Price", :name "PRICE"} - {:base_type :type/Integer, :display_name "Category ID", :name "CATEGORY_ID"} - {:base_type :type/Float, :display_name "Latitude", :name "LATITUDE"} - {:base_type :type/Float, :display_name "Longitude", :name "LONGITUDE"}]} + :columns (map (fn [col] + (-> col + (update :special_type keyword) + (update :base_type keyword))) + default-card-results)} (-> (qp/process-query {:database (data/id) :type :native :native {:query "SELECT ID, NAME, PRICE, CATEGORY_ID, LATITUDE, LONGITUDE FROM VENUES"}}) (get-in [:data :results_metadata]) - (update :checksum class))) + (update :checksum class) + round-all-decimals')) ;; make sure that a Card where a DateTime column is broken out by year advertises that column as Text, since you can't ;; do datetime breakouts on years @@ -92,12 +109,15 @@ :display_name "Date" :name "DATE" :unit nil + :special_type nil :fingerprint {:global {:distinct-count 618}, :type {:type/DateTime {:earliest "2013-01-03T00:00:00.000Z" - :latest "2015-12-29T00:00:00.000Z"}}}} + :latest "2015-12-29T00:00:00.000Z"}}}} {:base_type "type/Integer" :display_name "count" :name "count" - :special_type "type/Number"}] + :special_type "type/Quantity" + :fingerprint {:global {:distinct-count 3}, + :type {:type/Number {:min 235, :max 498, :avg 333.33}}}}] (tt/with-temp Card [card] (qp/process-query {:database (data/id) :type :query @@ -106,4 +126,4 @@ :breakout [[:datetime-field [:field-id (data/id :checkins :date)] :year]]} :info {:card-id (u/get-id card) :query-hash (byte-array 0)}}) - (card-metadata card))) + (round-all-decimals' (card-metadata card)))) diff --git a/test/metabase/sync/analyze/classifiers/name_test.clj b/test/metabase/sync/analyze/classifiers/name_test.clj index 6b244c08753d9502cafc681324449bfdf51801ce..8741638cda4d1386c388c91b29dd422bec582952 100644 --- a/test/metabase/sync/analyze/classifiers/name_test.clj +++ b/test/metabase/sync/analyze/classifiers/name_test.clj @@ -37,7 +37,7 @@ :special_type :type/FK :name "City" :base_type :type/Text}]] - (-> field-id Field (infer-special-type nil) :special_type))) + (-> field-id Field (infer-and-assoc-special-type nil) :special_type))) ;; ... but overwrite other types to alow evolution of our type system (expect @@ -47,4 +47,4 @@ :special_type :type/Category :name "City" :base_type :type/Text}]] - (-> field-id Field (infer-special-type nil) :special_type))) + (-> field-id Field (infer-and-assoc-special-type nil) :special_type))) diff --git a/test/metabase/sync/analyze/fingerprint_test.clj b/test/metabase/sync/analyze/fingerprint_test.clj index c5e8bedf8800538ce7e0888ea9c4f50eb995f536..4c01bd60949825ab4b04692eb7bde5185170f4f9 100644 --- a/test/metabase/sync/analyze/fingerprint_test.clj +++ b/test/metabase/sync/analyze/fingerprint_test.clj @@ -135,7 +135,7 @@ ;; Make sure that the above functions are used correctly to determine which Fields get (re-)fingerprinted (defn- field-was-fingerprinted? {:style/indent 0} [fingerprint-versions field-properties] (let [fingerprinted? (atom false) - fake-field (field/map->FieldInstance {:name "Fake Field"})] + fake-field (field/map->FieldInstance {:name "Fake Field", :base_type :type/Number})] (with-redefs [i/fingerprint-version->types-that-should-be-re-fingerprinted fingerprint-versions sample/sample-fields (constantly [[fake-field [1 2 3 4 5]]]) fingerprint/save-fingerprint! (fn [& _] (reset! fingerprinted? true))] diff --git a/test/metabase/sync/analyze/query_results_test.clj b/test/metabase/sync/analyze/query_results_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..281a5c8357e9de6a2ff6db65a41db00b8cbe0757 --- /dev/null +++ b/test/metabase/sync/analyze/query_results_test.clj @@ -0,0 +1,111 @@ +(ns metabase.sync.analyze.query-results-test + (:require [clojure.string :as str] + [expectations :refer :all] + [metabase + [query-processor :as qp] + [util :as u]] + [metabase.models + [card :refer [Card]] + [database :as database]] + [metabase.sync.analyze + [fingerprint :as fprint] + [query-results :as qr :refer :all]] + [metabase.sync.analyze.classifiers.name :as classify-name] + [metabase.test + [data :as data] + [util :as tu]] + [metabase.test.mock.util :as mutil] + [toucan.util.test :as tt])) + +(defn- column->name-keyword [field-or-column-metadata] + (-> field-or-column-metadata + :name + str/lower-case + keyword)) + +(defn- name->fingerprints [field-or-metadata] + (zipmap (map column->name-keyword field-or-metadata) + (map :fingerprint field-or-metadata))) + +(defn- name->special-type [field-or-metadata] + (zipmap (map column->name-keyword field-or-metadata) + (map :special_type field-or-metadata))) + +(defn- query->result-metadata + [query-map] + (->> query-map + qp/process-query + :data + results->column-metadata + (tu/round-all-decimals 2))) + +(defn- query-for-card [card] + {:database database/virtual-id + :type :query + :query {:source-table (str "card__" (u/get-id card))}}) + +(def ^:private venue-name->special-types + {:id :type/PK, + :name :type/Name, + :price :type/Category, + :category_id :type/FK, + :latitude :type/Latitude, + :longitude :type/Longitude}) + +;; Getting the result metadata for a card backed by an MBQL query should use the fingerprints from the related fields +(expect + mutil/venue-fingerprints + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :query + :query {:source-table (data/id :venues)}}}] + (tu/throw-if-called fprint/fingerprint + (name->fingerprints + (query->result-metadata (query-for-card card)))))) + +;; Getting the result metadata for a card backed by an MBQL query should just use the special types from the related fields +(expect + venue-name->special-types + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :query + :query {:source-table (data/id :venues)}}}] + (name->special-type (query->result-metadata (query-for-card card))))) + +;; Native queries don't know what the associated Fields are for the results, we need to compute the fingerprints, but +;; they should sill be the same +(expect + mutil/venue-fingerprints + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :native + :native {:query "select * from venues"}}}] + (name->fingerprints + (query->result-metadata (query-for-card card))))) + +;; Similarly, check that we computed the correct special types. Note that we don't know that the category_id is an FK +;; as it's just an integer flowing through, similarly Price isn't found to be a category as we're inferring by name +;; only +(expect + (assoc venue-name->special-types :category_id nil, :price nil) + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :native + :native {:query "select * from venues"}}}] + (name->special-type + (query->result-metadata (query-for-card card))))) + +;; Limiting to just 1 column on an MBQL query should still get the result metadata from the Field +(expect + (select-keys mutil/venue-fingerprints [:longitude]) + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :query + :query {:source-table (data/id :venues)}}}] + (tu/throw-if-called fprint/fingerprint + (name->fingerprints + (query->result-metadata (assoc-in (query-for-card card) [:query :fields] (data/id :venues :longitude))))))) + +;; Similar query as above, just native so that we need to calculate the fingerprint +(expect + (select-keys mutil/venue-fingerprints [:longitude]) + (tt/with-temp Card [card {:dataset_query {:database (data/id) + :type :native + :native {:query "select longitude from venues"}}}] + (name->fingerprints + (query->result-metadata (query-for-card card))))) diff --git a/test/metabase/sync/sync_metadata/fields_test.clj b/test/metabase/sync/sync_metadata/fields_test.clj index b029c4c51cb6eea6957576f1a26cfd3177c686e0..4f9e3cb54a615df88ae37fd619d6f7398389d599 100644 --- a/test/metabase/sync/sync_metadata/fields_test.clj +++ b/test/metabase/sync/sync_metadata/fields_test.clj @@ -12,7 +12,9 @@ [field :refer [Field]] [table :refer [Table]]] [metabase.sync.util-test :as sut] - [metabase.test.data :as data] + [metabase.test + [data :as data] + [util :as tu]] [metabase.test.data.one-off-dbs :as one-off-dbs] [toucan [db :as db] @@ -181,11 +183,6 @@ (doseq [statement statements] (jdbc/execute! one-off-dbs/*conn* [statement]))) -(defmacro ^:private throw-if-called {:style/indent 1} [fn-var & body] - `(with-redefs [~fn-var (fn [& args#] - (throw (RuntimeException. "Should not be called!")))] - ~@body)) - ;; Validate the changing of a column's type triggers a hash miss and sync (expect [ ;; Original column type @@ -212,7 +209,7 @@ {new-db-type :database_type} (get-field)] ;; Syncing again with no change should not call sync-field-instances! or update the hash - (throw-if-called metabase.sync.sync-metadata.fields/sync-field-instances! + (tu/throw-if-called metabase.sync.sync-metadata.fields/sync-field-instances! (sync/sync-database! (data/db)) [old-db-type new-db-type diff --git a/test/metabase/test/mock/util.clj b/test/metabase/test/mock/util.clj index a3a5b77455e4e2403b2b9684cedfbad71cbb6b19..8683f2615748d1e34420764538c1b2276e3d544e 100644 --- a/test/metabase/test/mock/util.clj +++ b/test/metabase/test/mock/util.clj @@ -43,6 +43,22 @@ :schedule_day nil :enabled true}) +(def venue-fingerprints + "Fingerprints for the full venues table" + {:name {:global {:distinct-count 100}, + :type {:type/Text {:percent-json 0.0, :percent-url 0.0, + :percent-email 0.0, :average-length 15.63}}} + :id {:global {:distinct-count 100}, + :type {:type/Number {:min 1, :max 100, :avg 50.5}}} + :price {:global {:distinct-count 4}, + :type {:type/Number {:min 1, :max 4, :avg 2.03}}} + :latitude {:global {:distinct-count 94}, + :type {:type/Number {:min 10.06, :max 40.78, :avg 35.51}}} + :category_id {:global {:distinct-count 28}, + :type {:type/Number {:min 2, :max 74, :avg 29.98}}} + :longitude {:global {:distinct-count 84}, + :type {:type/Number {:min -165.37, :max -73.95, :avg -116.0}}}}) + ;; This is just a fake implementation that just swoops in and returns somewhat-correct looking results for different ;; queries we know will get ran as part of sync (defn- is-table-row-count-query? [expanded-query] diff --git a/test/metabase/test/util.clj b/test/metabase/test/util.clj index a45f36610b491c8aa85a6f33379ace43b85582c0..006e5cf54a798830e5f89b0e9df7472d8509d03b 100644 --- a/test/metabase/test/util.clj +++ b/test/metabase/test/util.clj @@ -29,6 +29,7 @@ [setting :as setting] [table :refer [Table]] [user :refer [User]]] + [metabase.query-processor.util :as qputil] [metabase.query-processor.middleware.expand :as ql] [metabase.test.data :as data] [metabase.test.data @@ -379,6 +380,13 @@ [:cols])] (update-in query-results maybe-data-cols #(map round-fingerprint %)))) +(defn round-all-decimals + "Uses `walk/postwalk` to crawl `data`, looking for any double values, will round any it finds" + [decimal-place data] + (qputil/postwalk-pred double? + #(u/round-to-decimals decimal-place %) + data)) + ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | SCHEDULER | @@ -558,3 +566,11 @@ ;; This releases the fake query function so it finishes (deliver pause-query true) true)]))) + +(defmacro throw-if-called + "Redefines `fn-var` with a function that throws an exception if it's called" + {:style/indent 1} + [fn-var & body] + `(with-redefs [~fn-var (fn [& args#] + (throw (RuntimeException. "Should not be called!")))] + ~@body))