Skip to content
Snippets Groups Projects
Commit fe232d5a authored by Cam Saul's avatar Cam Saul Committed by GitHub
Browse files

Merge pull request #5649 from metabase/only-analyze-new-fields

Only analyze newly created Fields
parents 17a6ddec f279ccc5
No related branches found
No related tags found
No related merge requests found
......@@ -3,7 +3,8 @@
This is significantly more expensive than the basic sync-metadata step, and involves things
like running MBQL queries and fetching values to do things like determine Table row counts
and infer field special types."
(:require [metabase.models.field :refer [Field]]
(:require [clojure.tools.logging :as log]
[metabase.models.field :refer [Field]]
[metabase.sync
[interface :as i]
[util :as sync-util]]
......@@ -19,25 +20,28 @@
[table :- i/TableInstance]
(db/update-where! Field {:table_id (u/get-id table)
:active true
:visibility_type [:not= "retired"]}
:visibility_type [:not= "retired"]
:preview_display true
:last_analyzed nil}
:last_analyzed (u/new-sql-timestamp)))
(s/defn ^:always-validate analyze-table!
"Perform in-depth analysis for a TABLE."
[table :- i/TableInstance]
(table-row-count/update-row-count! table)
(special-types/infer-special-types! table)
(update-fields-last-analyzed! table))
(s/defn ^:always-validate analyze-db!
"Perform in-depth analysis on the data for all Tables in a given DATABASE.
This is dependent on what each database driver supports, but includes things like cardinality testing and table row counting.
This also updates the `:last_analyzed` value for each affected Field."
[database :- i/DatabaseInstance]
(sync-util/sync-operation :analyze database (format "Analyze data for %s" (sync-util/name-for-logging database))
(table-row-count/update-table-row-counts! database)
(special-types/infer-special-types! database)
(doseq [table (sync-util/db->sync-tables database)]
(update-fields-last-analyzed! table))))
(s/defn ^:always-validate analyze-table!
"Perform in-depth analysis for a TABLE."
[table :- i/TableInstance]
(table-row-count/update-row-count-for-table! table)
(special-types/infer-special-types-for-table! table)
(update-fields-last-analyzed! table))
(let [tables (sync-util/db->sync-tables database)]
(sync-util/with-emoji-progress-bar [emoji-progress-bar (count tables)]
(doseq [table tables]
(analyze-table! table)
(log/info (u/format-color 'blue "%s Analyzed %s" (emoji-progress-bar) (sync-util/name-for-logging table))))))))
......@@ -5,8 +5,7 @@
(Note: this namespace is sort of a misnomer, since special type isn't the only thing that can get set by
the functions here. `:preview_display` can also get set to `false` if a Field has on average very
large (long) values.)"
(:require [clojure.tools.logging :as log]
[metabase.models.field :refer [Field]]
(:require [metabase.models.field :refer [Field]]
[metabase.sync
[interface :as i]
[util :as sync-util]]
......@@ -19,16 +18,18 @@
(s/defn ^:private ^:always-validate fields-to-infer-special-types-for :- (s/maybe [i/FieldInstance])
"Return a sequences of Fields belonging to TABLE for which we should attempt to determine special type.
This should include fields that are active, visibile, and without an existing special type."
This should include NEW fields that are active, visibile, and without an existing special type."
[table :- i/TableInstance]
(seq (db/select Field
:table_id (u/get-id table)
:special_type nil
:active true
:visibility_type [:not= "retired"]
:preview_display true)))
:preview_display true
:last_analyzed nil))) ; only analyze NEW fields
(s/defn ^:always-validate infer-special-types-for-table!
(s/defn ^:always-validate infer-special-types!
"Infer (and set) the special types and preview display status for Fields
belonging to TABLE, and mark the fields as recently analyzed."
[table :- i/TableInstance]
......@@ -39,13 +40,3 @@
;; Ok, now fetch fields that *still* don't have a special type. Try to infer a type from a sequence of their values.
(when-let [fields (fields-to-infer-special-types-for table)]
(values/infer-special-types-by-value! table fields))))
(s/defn ^:always-validate infer-special-types!
"Infer (and set) the special types and preview display status for all the
Fields belonging to DATABASE, and mark the fields as recently analyzed."
[database :- i/DatabaseInstance]
(let [tables (sync-util/db->sync-tables database)]
(sync-util/with-emoji-progress-bar [emoji-progress-bar (count tables)]
(doseq [table tables]
(infer-special-types-for-table! table)
(log/info (u/format-color 'blue "%s Analyzed %s" (emoji-progress-bar) (sync-util/name-for-logging table)))))))
......@@ -16,7 +16,7 @@
(sync-util/with-error-handling (format "Unable to determine row count for %s" (sync-util/name-for-logging table))
(queries/table-row-count table)))
(s/defn ^:always-validate update-row-count-for-table!
(s/defn ^:always-validate update-row-count!
"Update the cached row count (`rows`) for a single TABLE."
[table :- i/TableInstance]
(sync-util/with-error-handling (format "Error setting table row count for %s" (sync-util/name-for-logging table))
......@@ -24,9 +24,3 @@
(log/debug (format "Set table row count for %s to %d" (sync-util/name-for-logging table) row-count))
(db/update! Table (u/get-id table)
:rows row-count))))
(s/defn ^:always-validate update-table-row-counts!
"Update the cached row count (`rows`) for all the syncable tables in DATABASE."
[database :- i/DatabaseInstance]
(doseq [table (sync-util/db->sync-tables database)]
(update-row-count-for-table! table)))
......@@ -17,6 +17,7 @@
[schema.core :as s]))
(s/defn ^:always-validate sync-db-metadata!
"Sync the metadata for a Metabase DATABASE. This makes sure child Table & Field objects are synchronized."
[database :- i/DatabaseInstance]
(sync-util/sync-operation :sync-metadata database (format "Sync metadata for %s" (sync-util/name-for-logging database))
;; Make sure the relevant table models are up-to-date
......
......@@ -18,5 +18,5 @@
(tt/with-temp Table [venues-copy (let [venues-table (Table (data/id :venues))]
(assoc (select-keys venues-table [:schema :name :db_id])
:rows 0))]
(table-row-count/update-row-count-for-table! venues-copy)
(table-row-count/update-row-count! venues-copy)
(db/select-one-field :rows Table :id (u/get-id venues-copy))))
(ns metabase.sync.analyze-test
(:require [expectations :refer :all]
[metabase.models
[database :refer [Database]]
[field :refer [Field]]
[table :refer [Table]]]
[metabase.sync
[analyze :as analyze]
[sync-metadata :as sync-metadata]]
[metabase.test.data :as data]
[metabase.util :as u]
[toucan.db :as db]
[toucan.util.test :as tt]))
(def ^:private fake-analysis-completion-date
(u/->Timestamp "2017-08-01"))
;; Check that Fields do *not* get analyzed if they're not newly created
(expect
#{{:name "LONGITUDE", :special_type nil, :last_analyzed fake-analysis-completion-date}
{:name "CATEGORY_ID", :special_type nil, :last_analyzed fake-analysis-completion-date}
{:name "PRICE", :special_type nil, :last_analyzed fake-analysis-completion-date}
{:name "LATITUDE", :special_type nil, :last_analyzed fake-analysis-completion-date}
{:name "NAME", :special_type nil, :last_analyzed fake-analysis-completion-date}
{:name "ID", :special_type :type/PK, :last_analyzed fake-analysis-completion-date}} ; PK is ok because it gets marked as part of metadata sync
(tt/with-temp* [Database [db {:engine "h2", :details (:details (data/db))}]
Table [table {:name "VENUES", :db_id (u/get-id db)}]]
;; sync the metadata, but DON't do analysis YET
(sync-metadata/sync-table-metadata! table)
;; now mark all the Tables as analyzed so they won't be subject to analysis
(db/update-where! Field {:table_id (u/get-id table)}
:last_analyzed fake-analysis-completion-date)
;; ok, NOW run the analysis process
(analyze/analyze-table! table)
;; check and make sure all the Fields don't have special types and their last_analyzed date didn't change
(set (for [field (db/select [Field :name :special_type :last_analyzed] :table_id (u/get-id table))]
(into {} field)))))
;; ...but they *SHOULD* get analyzed if they ARE newly created
(expect
#{{:name "LATITUDE", :special_type :type/Latitude, :last_analyzed true}
{:name "ID", :special_type :type/PK, :last_analyzed true}
{:name "PRICE", :special_type :type/Category, :last_analyzed true}
{:name "LONGITUDE", :special_type :type/Longitude, :last_analyzed true}
{:name "CATEGORY_ID", :special_type :type/Category, :last_analyzed true}
{:name "NAME", :special_type :type/Name, :last_analyzed true}}
(tt/with-temp* [Database [db {:engine "h2", :details (:details (data/db))}]
Table [table {:name "VENUES", :db_id (u/get-id db)}]]
;; sync the metadata, but DON't do analysis YET
(sync-metadata/sync-table-metadata! table)
;; ok, NOW run the analysis process
(analyze/analyze-table! table)
;; fields *SHOULD* have special types now
(set (for [field (db/select [Field :name :special_type :last_analyzed] :table_id (u/get-id table))]
(into {} (update field :last_analyzed boolean))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment