diff --git a/src/metabase/sync/analyze.clj b/src/metabase/sync/analyze.clj index 2c022f52242da0370d6f8b6be080cb5736744288..a94e677904ae31d5e82da9fee76da82dbee6b448 100644 --- a/src/metabase/sync/analyze.clj +++ b/src/metabase/sync/analyze.clj @@ -3,7 +3,8 @@ This is significantly more expensive than the basic sync-metadata step, and involves things like running MBQL queries and fetching values to do things like determine Table row counts and infer field special types." - (:require [metabase.models.field :refer [Field]] + (:require [clojure.tools.logging :as log] + [metabase.models.field :refer [Field]] [metabase.sync [interface :as i] [util :as sync-util]] @@ -19,25 +20,28 @@ [table :- i/TableInstance] (db/update-where! Field {:table_id (u/get-id table) :active true - :visibility_type [:not= "retired"]} + :visibility_type [:not= "retired"] + :preview_display true + :last_analyzed nil} :last_analyzed (u/new-sql-timestamp))) +(s/defn ^:always-validate analyze-table! + "Perform in-depth analysis for a TABLE." + [table :- i/TableInstance] + (table-row-count/update-row-count! table) + (special-types/infer-special-types! table) + (update-fields-last-analyzed! table)) + + (s/defn ^:always-validate analyze-db! "Perform in-depth analysis on the data for all Tables in a given DATABASE. This is dependent on what each database driver supports, but includes things like cardinality testing and table row counting. This also updates the `:last_analyzed` value for each affected Field." [database :- i/DatabaseInstance] (sync-util/sync-operation :analyze database (format "Analyze data for %s" (sync-util/name-for-logging database)) - (table-row-count/update-table-row-counts! database) - (special-types/infer-special-types! database) - (doseq [table (sync-util/db->sync-tables database)] - (update-fields-last-analyzed! table)))) - - -(s/defn ^:always-validate analyze-table! - "Perform in-depth analysis for a TABLE." - [table :- i/TableInstance] - (table-row-count/update-row-count-for-table! table) - (special-types/infer-special-types-for-table! table) - (update-fields-last-analyzed! table)) + (let [tables (sync-util/db->sync-tables database)] + (sync-util/with-emoji-progress-bar [emoji-progress-bar (count tables)] + (doseq [table tables] + (analyze-table! table) + (log/info (u/format-color 'blue "%s Analyzed %s" (emoji-progress-bar) (sync-util/name-for-logging table)))))))) diff --git a/src/metabase/sync/analyze/special_types.clj b/src/metabase/sync/analyze/special_types.clj index 166aa54f8fc8f37448a445af7fc842a4f896cb55..82a97a09dc6b00a39f1b5b22846763732834a32c 100644 --- a/src/metabase/sync/analyze/special_types.clj +++ b/src/metabase/sync/analyze/special_types.clj @@ -5,8 +5,7 @@ (Note: this namespace is sort of a misnomer, since special type isn't the only thing that can get set by the functions here. `:preview_display` can also get set to `false` if a Field has on average very large (long) values.)" - (:require [clojure.tools.logging :as log] - [metabase.models.field :refer [Field]] + (:require [metabase.models.field :refer [Field]] [metabase.sync [interface :as i] [util :as sync-util]] @@ -19,16 +18,18 @@ (s/defn ^:private ^:always-validate fields-to-infer-special-types-for :- (s/maybe [i/FieldInstance]) "Return a sequences of Fields belonging to TABLE for which we should attempt to determine special type. - This should include fields that are active, visibile, and without an existing special type." + This should include NEW fields that are active, visibile, and without an existing special type." [table :- i/TableInstance] (seq (db/select Field :table_id (u/get-id table) :special_type nil :active true :visibility_type [:not= "retired"] - :preview_display true))) + :preview_display true + :last_analyzed nil))) ; only analyze NEW fields -(s/defn ^:always-validate infer-special-types-for-table! + +(s/defn ^:always-validate infer-special-types! "Infer (and set) the special types and preview display status for Fields belonging to TABLE, and mark the fields as recently analyzed." [table :- i/TableInstance] @@ -39,13 +40,3 @@ ;; Ok, now fetch fields that *still* don't have a special type. Try to infer a type from a sequence of their values. (when-let [fields (fields-to-infer-special-types-for table)] (values/infer-special-types-by-value! table fields)))) - -(s/defn ^:always-validate infer-special-types! - "Infer (and set) the special types and preview display status for all the - Fields belonging to DATABASE, and mark the fields as recently analyzed." - [database :- i/DatabaseInstance] - (let [tables (sync-util/db->sync-tables database)] - (sync-util/with-emoji-progress-bar [emoji-progress-bar (count tables)] - (doseq [table tables] - (infer-special-types-for-table! table) - (log/info (u/format-color 'blue "%s Analyzed %s" (emoji-progress-bar) (sync-util/name-for-logging table))))))) diff --git a/src/metabase/sync/analyze/table_row_count.clj b/src/metabase/sync/analyze/table_row_count.clj index 6a264f34c866364d181ab0bb573e08be18c5446a..a4aa5021f3ea3c79800914241708af3babc1371f 100644 --- a/src/metabase/sync/analyze/table_row_count.clj +++ b/src/metabase/sync/analyze/table_row_count.clj @@ -16,7 +16,7 @@ (sync-util/with-error-handling (format "Unable to determine row count for %s" (sync-util/name-for-logging table)) (queries/table-row-count table))) -(s/defn ^:always-validate update-row-count-for-table! +(s/defn ^:always-validate update-row-count! "Update the cached row count (`rows`) for a single TABLE." [table :- i/TableInstance] (sync-util/with-error-handling (format "Error setting table row count for %s" (sync-util/name-for-logging table)) @@ -24,9 +24,3 @@ (log/debug (format "Set table row count for %s to %d" (sync-util/name-for-logging table) row-count)) (db/update! Table (u/get-id table) :rows row-count)))) - -(s/defn ^:always-validate update-table-row-counts! - "Update the cached row count (`rows`) for all the syncable tables in DATABASE." - [database :- i/DatabaseInstance] - (doseq [table (sync-util/db->sync-tables database)] - (update-row-count-for-table! table))) diff --git a/src/metabase/sync/sync_metadata.clj b/src/metabase/sync/sync_metadata.clj index 91e473d6d34fcddbea95824a22acccbb2f22b806..9975f7c055d480d2eeb6339acebc1027e57ad516 100644 --- a/src/metabase/sync/sync_metadata.clj +++ b/src/metabase/sync/sync_metadata.clj @@ -17,6 +17,7 @@ [schema.core :as s])) (s/defn ^:always-validate sync-db-metadata! + "Sync the metadata for a Metabase DATABASE. This makes sure child Table & Field objects are synchronized." [database :- i/DatabaseInstance] (sync-util/sync-operation :sync-metadata database (format "Sync metadata for %s" (sync-util/name-for-logging database)) ;; Make sure the relevant table models are up-to-date diff --git a/test/metabase/sync/analyze/table_row_count_test.clj b/test/metabase/sync/analyze/table_row_count_test.clj index eb5d6787ec004a8c59f00088e55309016c805ffb..3c2b3decbe72750f5119dfc21ca2fb60c377831c 100644 --- a/test/metabase/sync/analyze/table_row_count_test.clj +++ b/test/metabase/sync/analyze/table_row_count_test.clj @@ -18,5 +18,5 @@ (tt/with-temp Table [venues-copy (let [venues-table (Table (data/id :venues))] (assoc (select-keys venues-table [:schema :name :db_id]) :rows 0))] - (table-row-count/update-row-count-for-table! venues-copy) + (table-row-count/update-row-count! venues-copy) (db/select-one-field :rows Table :id (u/get-id venues-copy)))) diff --git a/test/metabase/sync/analyze_test.clj b/test/metabase/sync/analyze_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..255b3cb61a582bd9e0af20e5fe55a3c2a0e4eb19 --- /dev/null +++ b/test/metabase/sync/analyze_test.clj @@ -0,0 +1,55 @@ +(ns metabase.sync.analyze-test + (:require [expectations :refer :all] + [metabase.models + [database :refer [Database]] + [field :refer [Field]] + [table :refer [Table]]] + [metabase.sync + [analyze :as analyze] + [sync-metadata :as sync-metadata]] + [metabase.test.data :as data] + [metabase.util :as u] + [toucan.db :as db] + [toucan.util.test :as tt])) + +(def ^:private fake-analysis-completion-date + (u/->Timestamp "2017-08-01")) + +;; Check that Fields do *not* get analyzed if they're not newly created +(expect + #{{:name "LONGITUDE", :special_type nil, :last_analyzed fake-analysis-completion-date} + {:name "CATEGORY_ID", :special_type nil, :last_analyzed fake-analysis-completion-date} + {:name "PRICE", :special_type nil, :last_analyzed fake-analysis-completion-date} + {:name "LATITUDE", :special_type nil, :last_analyzed fake-analysis-completion-date} + {:name "NAME", :special_type nil, :last_analyzed fake-analysis-completion-date} + {:name "ID", :special_type :type/PK, :last_analyzed fake-analysis-completion-date}} ; PK is ok because it gets marked as part of metadata sync + (tt/with-temp* [Database [db {:engine "h2", :details (:details (data/db))}] + Table [table {:name "VENUES", :db_id (u/get-id db)}]] + ;; sync the metadata, but DON't do analysis YET + (sync-metadata/sync-table-metadata! table) + ;; now mark all the Tables as analyzed so they won't be subject to analysis + (db/update-where! Field {:table_id (u/get-id table)} + :last_analyzed fake-analysis-completion-date) + ;; ok, NOW run the analysis process + (analyze/analyze-table! table) + ;; check and make sure all the Fields don't have special types and their last_analyzed date didn't change + (set (for [field (db/select [Field :name :special_type :last_analyzed] :table_id (u/get-id table))] + (into {} field))))) + +;; ...but they *SHOULD* get analyzed if they ARE newly created +(expect + #{{:name "LATITUDE", :special_type :type/Latitude, :last_analyzed true} + {:name "ID", :special_type :type/PK, :last_analyzed true} + {:name "PRICE", :special_type :type/Category, :last_analyzed true} + {:name "LONGITUDE", :special_type :type/Longitude, :last_analyzed true} + {:name "CATEGORY_ID", :special_type :type/Category, :last_analyzed true} + {:name "NAME", :special_type :type/Name, :last_analyzed true}} + (tt/with-temp* [Database [db {:engine "h2", :details (:details (data/db))}] + Table [table {:name "VENUES", :db_id (u/get-id db)}]] + ;; sync the metadata, but DON't do analysis YET + (sync-metadata/sync-table-metadata! table) + ;; ok, NOW run the analysis process + (analyze/analyze-table! table) + ;; fields *SHOULD* have special types now + (set (for [field (db/select [Field :name :special_type :last_analyzed] :table_id (u/get-id table))] + (into {} (update field :last_analyzed boolean))))))