diff --git a/src/metabase/sync/sync_metadata/fields.clj b/src/metabase/sync/sync_metadata/fields.clj index cb75e3756d3844489d76c8fd5182fc14b6b485e6..1403f619693db5f111f61882663ecc8c3ee27f4c 100644 --- a/src/metabase/sync/sync_metadata/fields.clj +++ b/src/metabase/sync/sync_metadata/fields.clj @@ -1,379 +1,117 @@ (ns metabase.sync.sync-metadata.fields "Logic for updating Metabase Field models from metadata fetched from a physical DB. + The basic idea here is to look at the metadata we get from calling `describe-table` on a connected database, then construct an identical set of metadata from what we have about that Table in the Metabase DB. Then we iterate over both sets of Metadata and perform whatever steps are needed to make sure the things in the DB match the things that - came back from `describe-table`." - (:require [clojure.string :as str] - [clojure.tools.logging :as log] - [medley.core :as m] - [metabase.models - [field :as field :refer [Field]] - [humanization :as humanization] - [table :as table :refer [Table]]] - [metabase.sync - [fetch-metadata :as fetch-metadata] - [interface :as i] - [util :as sync-util]] - [metabase.util :as u] - [metabase.util.schema :as su] - [schema.core :as s] - [toucan.db :as db])) - -(def ^:private ParentID (s/maybe su/IntGreaterThanZero)) + came back from `describe-table`. These steps are broken out into three main parts: -(def ^:private TableMetadataFieldWithID - "Schema for `TableMetadataField` with an included ID of the corresponding Metabase Field object. - `our-metadata` is always returned in this format. (The ID is needed in certain places so we know which Fields to - retire, and the parent ID of any nested-fields.)" - (assoc i/TableMetadataField - :id su/IntGreaterThanZero - (s/optional-key :nested-fields) #{(s/recursive #'TableMetadataFieldWithID)})) + * Fetch Metadata - logic is in `metabase.sync.sync-metadata.fields.fetch-metadata`. Construct a map of metadata from + the Metabase application database that matches the form of DB metadata about Fields in a Table. This metadata is + used to next two steps to determine what sync operations need to be performed by comparing the differences in the + two sets of Metadata. -(def ^:private TableMetadataFieldWithOptionalID - "Schema for either `i/TableMetadataField` (`db-metadata`) or `TableMetadataFieldWithID` (`our-metadata`)." - (assoc i/TableMetadataField - (s/optional-key :id) su/IntGreaterThanZero - (s/optional-key :nested-fields) #{(s/recursive #'TableMetadataFieldWithOptionalID)})) - - -(s/defn ^:private field-metadata-name-for-logging :- s/Str - "Return a 'name for logging' for a map that conforms to the `TableMetadataField` schema. - - (field-metadata-name-for-logging table field-metadata) ; -> \"Table 'venues' Field 'name'\"" - [table :- i/TableInstance, field-metadata :- TableMetadataFieldWithOptionalID] - (format "%s Field '%s'" (sync-util/name-for-logging table) (:name field-metadata))) - -(defn- canonical-name [field] - (str/lower-case (:name field))) - -(s/defn ^:private special-type [field :- (s/maybe i/TableMetadataField)] - (and field - (or (:special-type field) - (when (:pk? field) :type/PK)))) - -;;; +----------------------------------------------------------------------------------------------------------------+ -;;; | CREATING / REACTIVATING FIELDS | -;;; +----------------------------------------------------------------------------------------------------------------+ + * Sync Field instances -- logic is in `metabase.sync.sync-metadata.fields.sync-instances`. Make sure the `Field` + instances in the Metabase application database match up with those in the DB metadata, creating new Fields as + needed, and marking existing ones as active or inactive as appropriate. -(s/defn ^:private matching-inactive-fields :- (s/maybe [i/FieldInstance]) - "Return an inactive metabase Field that matches NEW-FIELD-METADATA, if any such Field existis." - [table :- i/TableInstance, new-field-metadatas :- [i/TableMetadataField], parent-id :- ParentID] - (when (seq new-field-metadatas) - (db/select Field - :table_id (u/get-id table) - :%lower.name [:in (map canonical-name new-field-metadatas)] - :parent_id parent-id - :active false))) + * Update instance metadata -- logic is in `metabase.sync.sync-metadata.fields.sync-metadata`. Update metadata + properties of `Field` instances in the application database as needed -- this includes the base type, database type, + special type, and comment/remark (description) properties. This primarily affects Fields that were not newly + created; newly created Fields are given appropriate metadata when first synced (by `sync-instances`). -(s/defn ^:private insert-fields-if-needed! :- (s/maybe [s/Int]) - [table :- i/TableInstance, new-fields :- [i/TableMetadataField], parent-id :- ParentID] - (when (seq new-fields) - (db/insert-many! Field - (for [{:keys [database-type base-type field-comment], field-name :name :as field} new-fields] - {:table_id (u/get-id table) - :name field-name - :display_name (humanization/name->human-readable-name field-name) - :database_type (or database-type "NULL") ; placeholder for Fields w/ no type info (e.g. Mongo) & all NULL - :base_type base-type - :special_type (special-type field) - :parent_id parent-id - :description field-comment})))) + A note on terminology used in `metabase.sync.sync-metadata.fields.*` namespaces: -(s/defn ^:private ->metabase-fields! :- [i/FieldInstance] - "Return an active Metabase Field instance that matches NEW-FIELD-METADATA. This object will be created or - reactivated as a side effect of calling this function." - [table :- i/TableInstance, new-field-metadata-chunk :- [i/TableMetadataField], parent-id :- ParentID] - (let [fields-to-reactivate (matching-inactive-fields table new-field-metadata-chunk parent-id)] + * `db-metadata` is a set of `field-metadata` maps coming back from the DB (e.g. from something like JDBC + `DatabaseMetaData`) describing the columns (or equivalent) currently present in the table (or equivalent) that we're + syncing. - ;; if the field already exists but was just marked inactive then reäctivate it - (when (seq fields-to-reactivate) - (db/update-where! Field {:id [:in (map u/get-id fields-to-reactivate)]} - :active true)) + * `field-metadata` is a map of information describing a single columnn currently present in the table being synced - (let [reactivated-pred (comp (set (map canonical-name fields-to-reactivate)) canonical-name) - ;; If we reactivated the fields, no need to insert them - new-field-ids (insert-fields-if-needed! table (remove reactivated-pred new-field-metadata-chunk) parent-id)] + * `our-metadata` is a set of maps of Field metadata reconstructed from the Metabase application database. - ;; now return the Field in question - (when-let [new-and-updated-fields (seq (map u/get-id (concat fields-to-reactivate new-field-ids)))] - (db/select Field :id [:in new-and-updated-fields]))))) + * `metabase-field` is a single map of Field metadata reconstructed from the Metabase application database; there is + a 1:1 correspondance between this metadata and a row in the `Field` table. Unlike `field-metadata`, these entries + always have an `:id` associated with them (because they are present in the Metabase application DB). -(s/defn ^:private create-or-reactivate-field-chunk! - "Create (or reactivate) a Metabase Field object(s) for NEW-FIELD-METABASE and any nested fields." - [table :- i/TableInstance, new-field-metadata-chunk :- [i/TableMetadataField], parent-id :- ParentID] - ;; Create (or reactivate) the Metabase Field entry for NEW-FIELD-METADATA... - (let [updated-fields (->metabase-fields! table new-field-metadata-chunk parent-id) - name->updated-field (u/key-by canonical-name updated-fields)] - ;; ...then recursively do the same for any nested fields that belong to it. - (doseq [{nested-fields :nested-fields, :as new-field} new-field-metadata-chunk - :when (seq nested-fields) - :let [new-parent-id (u/get-id (get name->updated-field (canonical-name new-field)))]] - (create-or-reactivate-field-chunk! table (seq nested-fields) new-parent-id)))) - - -;;; +----------------------------------------------------------------------------------------------------------------+ -;;; | UPDATING FIELD TYPE INFO | -;;; +----------------------------------------------------------------------------------------------------------------+ - -(s/defn ^:private update-field-metadata-if-needed! - "Update the metadata for a Metabase Field as needed if any of the info coming back from the DB has changed." - [table :- i/TableInstance, metabase-field :- TableMetadataFieldWithID, field-metadata :- i/TableMetadataField] - (let [{old-database-type :database-type, - old-base-type :base-type, - old-field-comment :field-comment} metabase-field - {new-database-type :database-type, - new-base-type :base-type, - new-field-comment :field-comment} field-metadata - new-db-type? (not= old-database-type new-database-type) - new-base-type? (not= old-base-type new-base-type)] - ;; If the driver is reporting a different `database-type` than what we have recorded in the DB, update it - (when new-db-type? - (log/info (format "Database type of %s has changed from '%s' to '%s'." - (field-metadata-name-for-logging table metabase-field) - old-database-type new-database-type)) - (db/update! Field (u/get-id metabase-field), :database_type new-database-type)) - ;; Now do the same for `base-type` - (when new-base-type? - (log/info (format "Base type of %s has changed from '%s' to '%s'." - (field-metadata-name-for-logging table metabase-field) - old-base-type new-base-type)) - (db/update! Field (u/get-id metabase-field), :base_type new-base-type)) - ;; And field comment, but only if the existing description is blank - (when (and (str/blank? old-field-comment) (not (str/blank? new-field-comment))) - (log/info (format "Comment has been added for %s." - (field-metadata-name-for-logging table metabase-field))) - (db/update! Field (u/get-id metabase-field), :description new-field-comment)) - - (or new-db-type? new-base-type?))) - - - -;;; +----------------------------------------------------------------------------------------------------------------+ -;;; | "RETIRING" INACTIVE FIELDS | -;;; +----------------------------------------------------------------------------------------------------------------+ - -(s/defn ^:private retire-field! - "Mark an OLD-FIELD belonging to TABLE as inactive if corresponding Field object exists." - [table :- i/TableInstance, old-field :- TableMetadataFieldWithID] - (log/info (format "Marking %s as inactive." (field-metadata-name-for-logging table old-field))) - (db/update! Field (:id old-field) - :active false) - ;; Now recursively mark and nested fields as inactive - (doseq [nested-field (:nested-fields old-field)] - (retire-field! table nested-field))) - - -;;; +----------------------------------------------------------------------------------------------------------------+ -;;; | SYNCING FIELDS IN DB (CREATING, REACTIVATING, OR RETIRING) | -;;; +----------------------------------------------------------------------------------------------------------------+ - -(s/defn ^:private matching-field-metadata :- (s/maybe TableMetadataFieldWithOptionalID) - "Find Metadata that matches FIELD-METADATA from a set of OTHER-METADATA, if any exists." - [field-metadata :- TableMetadataFieldWithOptionalID, other-metadata :- #{TableMetadataFieldWithOptionalID}] - (some (fn [other-field-metadata] - (when (= (canonical-name field-metadata) - (canonical-name other-field-metadata)) - other-field-metadata)) - other-metadata)) - -(declare sync-field-instances!) - -(s/defn ^:private update-field-chunk! - [table :- i/TableInstance - known-fields :- {s/Str TableMetadataFieldWithID} - our-field :- i/TableMetadataField] - (let [db-field (get known-fields (canonical-name our-field)) - ;; if field exists in both metadata sets then make sure the data recorded about the Field such as - ;; database_type is up-to-date... - updated-count (if (update-field-metadata-if-needed! table db-field our-field) - ;; One field was updated - 1 - ;; No fields were updated - 0)] - ;; ...and then recursively check the nested fields. - (if-let [db-nested-fields (seq (:nested-fields our-field))] - ;; Any nested fields that were updated should be included in this chunk's count - (+ updated-count (sync-field-instances! table (set db-nested-fields) (:nested-fields db-field) (:id our-field))) - ;; No nested fields, so just the updated count from above - updated-count))) - -(s/defn ^:private sync-field-instances! - "Make sure the instances of Metabase Field are in-sync with the DB-METADATA." - [table :- i/TableInstance, db-metadata :- #{i/TableMetadataField}, our-metadata :- #{TableMetadataFieldWithID} - parent-id :- ParentID] - (let [known-fields (u/key-by canonical-name our-metadata)] - (+ - ;; Loop thru fields in DB-METADATA. Create/reactivate any fields that don't exist in OUR-METADATA. - (sync-util/sum-numbers - (fn [db-field-chunk] - (sync-util/with-error-handling (format "Error checking if Fields '%s' needs to be created or reactivated" - (pr-str (map :name db-field-chunk))) - (let [known-field-pred (comp known-fields canonical-name) - fields-to-update (filter known-field-pred db-field-chunk) - new-fields (remove known-field-pred db-field-chunk) - updated-chunk-count (sync-util/sum-numbers #(update-field-chunk! table known-fields %) fields-to-update)] - - ;; otherwise if field doesn't exist, create or reactivate it - (when (seq new-fields) - (create-or-reactivate-field-chunk! table new-fields parent-id)) - ;; Add the updated number of fields with the number of newly created fields - (+ updated-chunk-count (count new-fields))))) - (partition-all 1000 db-metadata)) - - ;; ok, loop thru Fields in OUR-METADATA. Mark Fields as inactive if they don't exist in DB-METADATA. - (sync-util/sum-numbers - (fn [our-field] - (sync-util/with-error-handling (format "Error checking if '%s' needs to be retired" (:name our-field)) - (if-let [db-field (matching-field-metadata our-field db-metadata)] - ;; if field exists in both metadata sets we just need to recursively check the nested fields - (if-let [our-nested-fields (seq (:nested-fields our-field))] - (sync-field-instances! table (:nested-fields db-field) (set our-nested-fields) (:id our-field)) - ;; No fields were updated - 0) - ;; otherwise if field exists in our metadata but not DB metadata time to make it inactive - (do - (retire-field! table our-field) - ;; 1 field was updated (retired) - 1)))) - our-metadata)))) - - -;;; +----------------------------------------------------------------------------------------------------------------+ -;;; | UPDATING FIELD METADATA | -;;; +----------------------------------------------------------------------------------------------------------------+ - -(s/defn ^:private update-metadata! - "Make sure things like PK status and base-type are in sync with what has come back from the DB." - [table :- i/TableInstance, db-metadata :- #{i/TableMetadataField}, parent-id :- ParentID] - (let [existing-fields (db/select [Field :base_type :special_type :name :id :description] - :table_id (u/get-id table) - :active true - :parent_id parent-id) - field-name->db-metadata (u/key-by canonical-name db-metadata)] - ;; Make sure special types are up-to-date for all the fields - (sync-util/sum-numbers - (fn [field] - (let [db-field (get field-name->db-metadata (canonical-name field)) - new-special-type (special-type db-field)] - (if (and db-field - (or - ;; If the base_type has changed, we need to updated it - (not= (:base_type field) (:base-type db-field)) - ;; If the base_type hasn't changed, but we now have a special_type, we should - ;; update it. We should not overwrite a special_type that is already present - ;; (could have been specified by the user). - (and (not (:special_type field)) new-special-type))) - (do - ;; update special type if one came back from DB metadata but Field doesn't - ;; currently have one - (db/update! Field (u/get-id field) - (merge {:base_type (:base-type db-field)} - (when-not (:special_type field) - {:special_type new-special-type}))) - ;; now recursively do the same for any nested fields - (if-let [db-nested-fields (seq (:nested-fields db-field))] - ;; This field was updated + any nested fields - (+ 1 (update-metadata! table (set db-nested-fields) (u/get-id field))) - ;; No nested fields, so just this field was updated - 1)) - ;; The field was not updated - 0))) - existing-fields))) - - -;;; +----------------------------------------------------------------------------------------------------------------+ -;;; | FETCHING OUR CURRENT METADATA | -;;; +----------------------------------------------------------------------------------------------------------------+ - -(s/defn ^:private add-nested-fields :- TableMetadataFieldWithID - "Recursively add entries for any nested-fields to FIELD." - [field-metadata :- TableMetadataFieldWithID, parent-id->fields :- {ParentID #{TableMetadataFieldWithID}}] - (let [nested-fields (get parent-id->fields (u/get-id field-metadata))] - (if-not (seq nested-fields) - field-metadata - (assoc field-metadata :nested-fields (set (for [nested-field nested-fields] - (add-nested-fields nested-field parent-id->fields))))))) - -(s/defn ^:private parent-id->fields :- {ParentID #{TableMetadataFieldWithID}} - "Build a map of the Metabase Fields we have for TABLE, keyed by their parent id (usually `nil`)." - [table :- i/TableInstance] - (->> (for [field (db/select [Field :name :database_type :base_type :special_type :parent_id :id :description] - :table_id (u/get-id table) - :active true)] - {:parent-id (:parent_id field) - :id (:id field) - :name (:name field) - :database-type (:database_type field) - :base-type (:base_type field) - :special-type (:special_type field) - :pk? (isa? (:special_type field) :type/PK) - :field-comment (:description field)}) - ;; make a map of parent-id -> set of - (group-by :parent-id) - ;; remove the parent ID because the Metadata from `describe-table` won't have it. Save the results as a set - (m/map-vals (fn [fields] - (set (for [field fields] - (dissoc field :parent-id))))))) - -(s/defn ^:private our-metadata :- #{TableMetadataFieldWithID} - "Return information we have about Fields for a TABLE currently in the application database - in (almost) exactly the same `TableMetadataField` format returned by `describe-table`." - [table :- i/TableInstance] - ;; Fetch all the Fields for this TABLE. Then group them by their parent ID, which we'll use to construct our - ;; metadata in the correct format - (let [parent-id->fields (parent-id->fields table)] - ;; get all the top-level fields, then call `add-nested-fields` to recursively add the fields - (set (for [field (get parent-id->fields nil)] - (add-nested-fields field parent-id->fields))))) - - -;;; +----------------------------------------------------------------------------------------------------------------+ -;;; | FETCHING METADATA FROM CONNECTED DB | -;;; +----------------------------------------------------------------------------------------------------------------+ - -(s/defn ^:private db-metadata :- #{i/TableMetadataField} - "Fetch metadata about Fields belonging to a given TABLE directly from an external database by calling its - driver's implementation of `describe-table`." - [database :- i/DatabaseInstance, table :- i/TableInstance] - (:fields (fetch-metadata/table-metadata database table))) + Other notes: + * In general the methods in these namespaces return the number of rows updated; these numbers are summed and used + for logging purposes by higher-level sync logic." + (:require [clojure.tools.logging :as log] + [metabase.models.table :as table :refer [Table]] + [metabase.sync + [interface :as i] + [util :as sync-util]] + [metabase.sync.sync-metadata.fields + [fetch-metadata :as fetch-metadata] + [sync-instances :as sync-instances] + [sync-metadata :as sync-metadata]] + [metabase.util :as u] + [metabase.util + [i18n :refer [trs]] + [schema :as su]] + [schema.core :as s] + [toucan.db :as db])) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | PUTTING IT ALL TOGETHER | ;;; +----------------------------------------------------------------------------------------------------------------+ -(defn- calculate-table-hash [db-metadata] +(s/defn ^:private calculate-table-hash :- su/NonBlankString + "Calculate a hash of the `db-field-metadata` (metadata about the Fields in a given Table); this hash is saved after + sync is completed; if it is the same next time we attempt to sync a Table, we can skip the Table entirely; since the + metadata coming back from the DB/drivers is the same as last timw." + [db-metadata :- #{i/TableMetadataField}] (->> db-metadata (map (juxt :name :database-type :base-type :special-type :pk? :nested-fields :custom :field-comment)) ;; We need a predictable sort order as the hash will be different if the order is different (sort-by first) sync-util/calculate-hash)) -(s/defn sync-fields-for-table! - "Sync the Fields in the Metabase application database for a specific TABLE." +(s/defn ^:private sync-and-update! :- su/IntGreaterThanOrEqualToZero + "Sync Field instances (i.e., rows in the Field table in the Metabase application DB) for a Table, and update metadata + properties (e.g. base type and comment/remark) as needed. Returns number of Fields synced." + [table :- i/TableInstance, db-metadata :- #{i/TableMetadataField}] + (+ (sync-instances/sync-instances! table db-metadata (fetch-metadata/our-metadata table)) + ;; Now that tables are synced and fields created as needed make sure field properties are in sync. + ;; Re-fetch our metadata because there might be somethings that have changed after calling + ;; `sync-instances` + (sync-metadata/update-metadata! table db-metadata (fetch-metadata/our-metadata table)))) + + +(s/defn sync-fields-for-table! :- {:updated-fields su/IntGreaterThanOrEqualToZero + :total-fields su/IntGreaterThanOrEqualToZero} + "Sync the Fields in the Metabase application database for a specific `table`." ([table :- i/TableInstance] (sync-fields-for-table! (table/database table) table)) - ([database :- i/DatabaseInstance, {:keys [fields_hash] :as table} :- i/TableInstance] - (sync-util/with-error-handling (format "Error syncing fields for %s" (sync-util/name-for-logging table)) - (let [db-field-metadata (db-metadata database table) - total-fields (count db-field-metadata) - db-hash (calculate-table-hash db-field-metadata)] - (if (and fields_hash (= db-hash fields_hash)) - (do - (log/debugf "Hash of '%s' matches stored hash, skipping fields sync for table" (sync-util/name-for-logging table)) - {:updated-fields 0 :total-fields total-fields}) - ;; make sure the instances of Field are in-sync - (let [updated-fields (+ (sync-field-instances! table db-field-metadata (our-metadata table) nil) - ;; now that tables are synced and fields created as needed make sure field properties are in sync - (update-metadata! table db-field-metadata nil))] - ;; Either there was no hash or there has been some change, update the hash too - (db/update! Table (u/get-id table) :fields_hash (calculate-table-hash (db-metadata database table))) - {:updated-fields updated-fields - :total-fields total-fields})))))) -(s/defn sync-fields! + ([database :- i/DatabaseInstance, {old-hash :fields_hash, :as table} :- i/TableInstance] + (sync-util/with-error-handling (trs "Error syncing Fields for Table ''{0}''" (sync-util/name-for-logging table)) + (let [db-metadata (fetch-metadata/db-metadata database table) + total-fields (count db-metadata) + ;; hash the metadata about Fields in the Table; if it mashes the hash from last time we synced then we know + ;; there's nothing to do here and we can skip iterating over the Fields + new-hash (calculate-table-hash db-metadata) + hash-changed? (or (not old-hash) (not= new-hash old-hash))] + ;; if hash is unchanged we can skip the rest of the sync process + (when-not hash-changed? + (log/debug (trs "Hash of {0} matches stored hash, skipping Fields sync" (sync-util/name-for-logging table)))) + ;; Ok, sync Fields if needed + (let [num-synced-fields (or (when hash-changed? + (sync-and-update! table db-metadata)) + 0)] + ;; Now that Fields sync has completed successfully, save updated hash in the application DB... + (when hash-changed? + (db/update! Table (u/get-id table) :fields_hash new-hash)) + ;;; ...and return the results + {:total-fields total-fields, :updated-fields num-synced-fields}))))) + + +(s/defn sync-fields! :- (s/maybe {:updated-fields su/IntGreaterThanOrEqualToZero + :total-fields su/IntGreaterThanOrEqualToZero}) "Sync the Fields in the Metabase application database for all the Tables in a DATABASE." [database :- i/DatabaseInstance] (let [tables (sync-util/db->sync-tables database)] - (apply merge-with + (map #(sync-fields-for-table! database %) tables)))) + (apply merge-with + (for [table tables] + (sync-fields-for-table! database table))))) diff --git a/src/metabase/sync/sync_metadata/fields/common.clj b/src/metabase/sync/sync_metadata/fields/common.clj new file mode 100644 index 0000000000000000000000000000000000000000..ac9e076624213688bfff82f2dfcb4e3076d9b6e1 --- /dev/null +++ b/src/metabase/sync/sync_metadata/fields/common.clj @@ -0,0 +1,61 @@ +(ns metabase.sync.sync-metadata.fields.common + "Schemas and functions shared by different `metabase.sync.sync-metadata.fields.*` namespaces." + (:require [clojure.string :as str] + [metabase.sync + [interface :as i] + [util :as sync-util]] + [metabase.util + [i18n :refer [trs]] + [schema :as su]] + [schema.core :as s])) + +(def ParentID + "Schema for the `parent-id` of a Field, i.e. an optional ID." + (s/maybe su/IntGreaterThanZero)) + +(def TableMetadataFieldWithID + "Schema for `TableMetadataField` with an included ID of the corresponding Metabase Field object. + `our-metadata` is always returned in this format. (The ID is needed in certain places so we know which Fields to + retire, and the parent ID of any nested-fields.)" + (assoc i/TableMetadataField + :id su/IntGreaterThanZero + (s/optional-key :nested-fields) #{(s/recursive #'TableMetadataFieldWithID)})) + +(def TableMetadataFieldWithOptionalID + "Schema for either `i/TableMetadataField` (`db-metadata`) or `TableMetadataFieldWithID` (`our-metadata`)." + (assoc i/TableMetadataField + (s/optional-key :id) su/IntGreaterThanZero + (s/optional-key :nested-fields) #{(s/recursive #'TableMetadataFieldWithOptionalID)})) + + +(s/defn field-metadata-name-for-logging :- s/Str + "Return a 'name for logging' for a map that conforms to the `TableMetadataField` schema. + + (field-metadata-name-for-logging table field-metadata) ; -> \"Table 'venues' Field 'name'\"" + [table :- i/TableInstance, field-metadata :- TableMetadataFieldWithOptionalID] + (format "%s %s '%s'" (sync-util/name-for-logging table) (trs "Field") (:name field-metadata))) + +(defn canonical-name + "Return the lower-cased 'canonical' name that should be used to uniquely identify `field` -- this is done to ignore + case differences when syncing, e.g. we will consider `FIELD` and `field` to mean the same thing." + [field] + (str/lower-case (:name field))) + +(s/defn special-type :- (s/maybe su/FieldType) + "Determine a the appropriate `special-type` for a Field with `field-metadata`." + [field-metadata :- (s/maybe i/TableMetadataField)] + (and field-metadata + (or (:special-type field-metadata) + (when (:pk? field-metadata) :type/PK)))) + +(s/defn matching-field-metadata :- (s/maybe TableMetadataFieldWithOptionalID) + "Find Metadata that matches `field-metadata` from a set of `other-metadata`, if any exists. Useful for finding the + corresponding Metabase Field for field metadata from the DB, or vice versa." + [field-metadata :- TableMetadataFieldWithOptionalID + other-metadata :- #{TableMetadataFieldWithOptionalID}] + (some + (fn [other-field-metadata] + (when (= (canonical-name field-metadata) + (canonical-name other-field-metadata)) + other-field-metadata)) + other-metadata)) diff --git a/src/metabase/sync/sync_metadata/fields/fetch_metadata.clj b/src/metabase/sync/sync_metadata/fields/fetch_metadata.clj new file mode 100644 index 0000000000000000000000000000000000000000..bf8694108e3dd1853ec0202a2ec1fb02ffbe14b7 --- /dev/null +++ b/src/metabase/sync/sync_metadata/fields/fetch_metadata.clj @@ -0,0 +1,82 @@ +(ns metabase.sync.sync-metadata.fields.fetch-metadata + "Logic for constructing a map of metadata from the Metabase application database that matches the form of DB metadata + about Fields in a Table, and for fetching the DB metadata itself. This metadata is used by the logic in other + `metabase.sync.sync-metadata.fields.*` namespaces to determine what sync operations need to be performed by + comparing the differences in the two sets of Metadata." + (:require [medley.core :as m] + [metabase.models.field :as field :refer [Field]] + [metabase.sync + [fetch-metadata :as fetch-metadata] + [interface :as i]] + [metabase.sync.sync-metadata.fields.common :as common] + [metabase.util :as u] + [schema.core :as s] + [toucan.db :as db])) + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | FETCHING OUR CURRENT METADATA | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(s/defn ^:private fields->parent-id->fields :- {common/ParentID #{common/TableMetadataFieldWithID}} + [fields :- (s/maybe [i/FieldInstance])] + (->> (for [field fields] + {:parent-id (:parent_id field) + :id (:id field) + :name (:name field) + :database-type (:database_type field) + :base-type (:base_type field) + :special-type (:special_type field) + :pk? (isa? (:special_type field) :type/PK) + :field-comment (:description field)}) + ;; make a map of parent-id -> set of child Fields + (group-by :parent-id) + ;; remove the parent ID because the Metadata from `describe-table` won't have it. Save the results as a set + (m/map-vals (fn [fields] + (set (for [field fields] + (dissoc field :parent-id))))))) + +(s/defn ^:private add-nested-fields :- common/TableMetadataFieldWithID + "Recursively add entries for any nested-fields to `field`." + [metabase-field :- common/TableMetadataFieldWithID + parent-id->fields :- {common/ParentID #{common/TableMetadataFieldWithID}}] + (let [nested-fields (get parent-id->fields (u/get-id metabase-field))] + (if-not (seq nested-fields) + metabase-field + (assoc metabase-field :nested-fields (set (for [nested-field nested-fields] + (add-nested-fields nested-field parent-id->fields))))))) + +(s/defn fields->our-metadata :- #{common/TableMetadataFieldWithID} + "Given a sequence of Metabase Fields, format them and return them in a hierachy so the format matches the one + `db-metadata` comes back in." + ([fields :- (s/maybe [i/FieldInstance])] + (fields->our-metadata fields nil)) + + ([fields :- (s/maybe [i/FieldInstance]), top-level-parent-id :- common/ParentID] + (let [parent-id->fields (fields->parent-id->fields fields)] + ;; get all the top-level fields, then call `add-nested-fields` to recursively add the fields + (set (for [metabase-field (get parent-id->fields top-level-parent-id)] + (add-nested-fields metabase-field parent-id->fields)))))) + +(s/defn ^:private table->fields :- [i/FieldInstance] + "Fetch active Fields from the Metabase application database for a given `table`." + [table :- i/TableInstance] + (db/select [Field :name :database_type :base_type :special_type :parent_id :id :description] + :table_id (u/get-id table) + :active true)) + +(s/defn our-metadata :- #{common/TableMetadataFieldWithID} + "Return information we have about Fields for a `table` in the application database in (almost) exactly the same + `TableMetadataField` format returned by `describe-table`." + [table :- i/TableInstance] + (-> table table->fields fields->our-metadata)) + + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | FETCHING METADATA FROM CONNECTED DB | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(s/defn db-metadata :- #{i/TableMetadataField} + "Fetch metadata about Fields belonging to a given TABLE directly from an external database by calling its + driver's implementation of `describe-table`." + [database :- i/DatabaseInstance, table :- i/TableInstance] + (:fields (fetch-metadata/table-metadata database table))) diff --git a/src/metabase/sync/sync_metadata/fields/sync_instances.clj b/src/metabase/sync/sync_metadata/fields/sync_instances.clj new file mode 100644 index 0000000000000000000000000000000000000000..3b67b74e7b051525b5a0e7695f0e190f8675157f --- /dev/null +++ b/src/metabase/sync/sync_metadata/fields/sync_instances.clj @@ -0,0 +1,196 @@ +(ns metabase.sync.sync-metadata.fields.sync-instances + "Logic for syncing the instances of `Field` in the Metabase application DB with the set of Fields in the DB metadata. + Responsible for creating new instances of `Field` as needed, and marking existing ones as active or inactive as + needed. Recursively handles nested Fields. + + All nested Fields recursion is handled in one place, by the main entrypoint (`sync-instances!`) and helper + functions `sync-nested-field-instances!` and `sync-nested-fields-of-one-field!`. All other functions in this + namespace should ignore nested fields entirely; the will be invoked with those Fields as appropriate." + (:require [clojure.tools.logging :as log] + [metabase.models + [field :as field :refer [Field]] + [humanization :as humanization]] + [metabase.sync + [interface :as i] + [util :as sync-util]] + [metabase.sync.sync-metadata.fields + [common :as common] + [fetch-metadata :as fetch-metadata]] + [metabase.util :as u] + [metabase.util + [i18n :refer [trs]] + [schema :as su]] + [schema.core :as s] + [toucan.db :as db])) + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | CREATING / REACTIVATING FIELDS | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(s/defn ^:private matching-inactive-fields :- (s/maybe [i/FieldInstance]) + "Return inactive Metabase Fields that match any of the Fields described by `new-field-metadatas`, if any such Fields + exist." + [table :- i/TableInstance, new-field-metadatas :- [i/TableMetadataField], parent-id :- common/ParentID] + (when (seq new-field-metadatas) + (db/select Field + :table_id (u/get-id table) + :%lower.name [:in (map common/canonical-name new-field-metadatas)] + :parent_id parent-id + :active false))) + +(s/defn ^:private insert-new-fields! :- (s/maybe [s/Int]) + "Insert new Field rows for for all the Fields described by `new-field-metadatas`." + [table :- i/TableInstance, new-field-metadatas :- [i/TableMetadataField], parent-id :- common/ParentID] + (when (seq new-field-metadatas) + (db/insert-many! Field + (for [{:keys [database-type base-type field-comment], field-name :name :as field} new-field-metadatas] + {:table_id (u/get-id table) + :name field-name + :display_name (humanization/name->human-readable-name field-name) + :database_type (or database-type "NULL") ; placeholder for Fields w/ no type info (e.g. Mongo) & all NULL + :base_type base-type + :special_type (common/special-type field) + :parent_id parent-id + :description field-comment})))) + +(s/defn ^:private create-or-reactivate-fields! :- (s/maybe [i/FieldInstance]) + "Create (or reactivate) Metabase Field object(s) for any Fields in `new-field-metadatas`. Does *NOT* recursively + handle nested Fields." + [table :- i/TableInstance, new-field-metadatas :- [i/TableMetadataField], parent-id :- common/ParentID] + (let [fields-to-reactivate (matching-inactive-fields table new-field-metadatas parent-id)] + ;; if the fields already exist but were just marked inactive then reäctivate them + (when (seq fields-to-reactivate) + (db/update-where! Field {:id [:in (map u/get-id fields-to-reactivate)]} + :active true)) + (let [reactivated? (comp (set (map common/canonical-name fields-to-reactivate)) + common/canonical-name) + ;; If we reactivated the fields, no need to insert them; insert new rows for any that weren't reactivated + new-field-ids (insert-new-fields! table (remove reactivated? new-field-metadatas) parent-id)] + ;; now return the newly created or reactivated Fields + (when-let [new-and-updated-fields (seq (map u/get-id (concat fields-to-reactivate new-field-ids)))] + (db/select Field :id [:in new-and-updated-fields]))))) + + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | SYNCING INSTANCES OF 'ACTIVE' FIELDS (FIELDS IN DB METADATA) | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(def ^:private Updates + "Schema for the value returned by `sync-active-instances!`. Because we need to know about newly-inserted/reactivated + parent Fields when recursively syncing nested Fields, we need to propogate the updates to `our-metadata` made by + this function and pass them to other steps of the `sync-instances!` process." + {:num-updates su/IntGreaterThanOrEqualToZero + :our-metadata #{common/TableMetadataFieldWithID}}) + +(s/defn ^:private sync-active-instances! :- Updates + "Sync instances of `Field` in the application database with 'active' Fields in the DB being synced (i.e., ones that + are returned as part of the `db-metadata`). Creates or reactivates Fields as needed. Returns number of Fields + synced and updated `our-metadata` including the new Fields and their IDs." + [table :- i/TableInstance + db-metadata :- #{i/TableMetadataField} + our-metadata :- #{common/TableMetadataFieldWithID} + parent-id :- common/ParentID] + (let [known-fields (u/key-by common/canonical-name our-metadata) + our-metadata (atom our-metadata)] + {:num-updates + ;; Field sync logic below is broken out into chunks of 1000 fields for huge star schemas or other situations + ;; where we don't want to be updating way too many rows at once + (sync-util/sum-for [db-field-chunk (partition-all 1000 db-metadata)] + (sync-util/with-error-handling (trs "Error checking if Fields {0} need to be created or reactivated" + (pr-str (map :name db-field-chunk))) + (let [known-field? (comp known-fields common/canonical-name) + fields-to-update (filter known-field? db-field-chunk) + new-fields (remove known-field? db-field-chunk) + new-field-instances (create-or-reactivate-fields! table new-fields parent-id)] + ;; save any updates to `our-metadata` + (swap! our-metadata into (fetch-metadata/fields->our-metadata new-field-instances parent-id)) + ;; now return count of rows updated + (count new-fields)))) + + :our-metadata + @our-metadata})) + + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | "RETIRING" INACTIVE FIELDS | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(s/defn ^:private retire-field! :- (s/maybe (s/eq 1)) + "Mark an `old-field` belonging to `table` as inactive if corresponding Field object exists. Does *NOT* recurse over + nested Fields. Returns `1` if a Field was marked inactive, `nil` otherwise." + [table :- i/TableInstance, metabase-field :- common/TableMetadataFieldWithID] + (log/info (trs "Marking Field ''{0}'' as inactive." (common/field-metadata-name-for-logging table metabase-field))) + (when (db/update! Field (u/get-id metabase-field) :active false) + 1)) + +(s/defn ^:private retire-fields! :- su/IntGreaterThanOrEqualToZero + "Mark inactive any Fields in the application database that are no longer present in the DB being synced. These + Fields are ones that are in `our-metadata`, but not in `db-metadata`. Does *NOT* recurse over nested Fields. + Returns `1` if a Field was marked inactive." + [table :- i/TableInstance + db-metadata :- #{i/TableMetadataField} + our-metadata :- #{common/TableMetadataFieldWithID}] + ;; retire all the Fields not present in `db-metadata`, and count how many rows were actually affected + (sync-util/sum-for [metabase-field our-metadata + :when (not (common/matching-field-metadata metabase-field db-metadata))] + (sync-util/with-error-handling (trs "Error retiring {0}" + (common/field-metadata-name-for-logging table metabase-field)) + (retire-field! table metabase-field)))) + + +;;; +----------------------------------------------------------------------------------------------------------------+ +;;; | HIGH-LEVEL INSTANCE SYNCING LOGIC (CREATING/REACTIVATING/RETIRING/UPDATING) | +;;; +----------------------------------------------------------------------------------------------------------------+ + +(declare sync-instances!) + +(s/defn ^:private sync-nested-fields-of-one-field! :- (s/maybe su/IntGreaterThanOrEqualToZero) + "Recursively sync Field instances (i.e., rows in application DB) for nested Fields of a single Field, one or both + `field-metadata` (from synced DB) and `metabase-field` (from application DB)." + [table :- i/TableInstance + field-metadata :- (s/maybe i/TableMetadataField) + metabase-field :- (s/maybe common/TableMetadataFieldWithID)] + (let [nested-fields-metadata (:nested-fields field-metadata) + metabase-nested-fields (:nested-fields metabase-field)] + (when (or (seq nested-fields-metadata) + (seq metabase-nested-fields)) + (sync-instances! + table + (set nested-fields-metadata) + (set metabase-nested-fields) + (some-> metabase-field u/get-id))))) + +(s/defn ^:private sync-nested-field-instances! :- (s/maybe su/IntGreaterThanOrEqualToZero) + "Recursively sync Field instances (i.e., rows in application DB) for *all* the nested Fields of all Fields in + `db-metadata` and `our-metadata`." + [table :- i/TableInstance + db-metadata :- #{i/TableMetadataField} + our-metadata :- #{common/TableMetadataFieldWithID}] + (let [name->field-metadata (u/key-by common/canonical-name db-metadata) + name->metabase-field (u/key-by common/canonical-name our-metadata) + all-field-names (set (concat (keys name->field-metadata) + (keys name->metabase-field)))] + (sync-util/sum-for [field-name all-field-names + :let [field-metadata (get name->field-metadata field-name) + metabase-field (get name->metabase-field field-name)]] + (sync-nested-fields-of-one-field! table field-metadata metabase-field)))) + +(s/defn sync-instances! :- su/IntGreaterThanOrEqualToZero + "Sync rows in the Field table with `db-metadata` describing the current schema of the Table currently being synced, + creating Field objects or marking them active/inactive as needed." + ([table :- i/TableInstance + db-metadata :- #{i/TableMetadataField} + our-metadata :- #{common/TableMetadataFieldWithID}] + (sync-instances! table db-metadata our-metadata nil)) + + ([table :- i/TableInstance + db-metadata :- #{i/TableMetadataField} + our-metadata :- #{common/TableMetadataFieldWithID} + parent-id :- common/ParentID] + ;; syncing the active instances makes important changes to `our-metadata` that need to be passed to recursive + ;; calls, such as adding new Fields or making inactive ones active again. Keep updated version returned by + ;; `sync-active-instances!` + (let [{:keys [num-updates our-metadata]} (sync-active-instances! table db-metadata our-metadata parent-id)] + (+ num-updates + (retire-fields! table db-metadata our-metadata) + (sync-nested-field-instances! table db-metadata our-metadata))))) diff --git a/src/metabase/sync/sync_metadata/fields/sync_metadata.clj b/src/metabase/sync/sync_metadata/fields/sync_metadata.clj new file mode 100644 index 0000000000000000000000000000000000000000..903e1ec3c691ef4bc6c580393c3e39a2eb9dca7c --- /dev/null +++ b/src/metabase/sync/sync_metadata/fields/sync_metadata.clj @@ -0,0 +1,100 @@ +(ns metabase.sync.sync-metadata.fields.sync-metadata + "Logic for updating metadata properties of `Field` instances in the application database as needed -- this includes + the base type, database type, special type, and comment/remark (description) properties. This primarily affects + Fields that were not newly created; newly created Fields are given appropriate metadata when first synced." + (:require [clojure.string :as str] + [clojure.tools.logging :as log] + [metabase.models.field :as field :refer [Field]] + [metabase.sync + [interface :as i] + [util :as sync-util]] + [metabase.sync.sync-metadata.fields.common :as common] + [metabase.util :as u] + [metabase.util + [i18n :refer [trs]] + [schema :as su]] + [schema.core :as s] + [toucan.db :as db])) + +(s/defn ^:private update-field-metadata-if-needed! :- (s/enum 0 1) + "Update the metadata for a Metabase Field as needed if any of the info coming back from the DB has changed. Syncs + base type, database type, special type, and comments/remarks; returns `1` if the Field was updated; `0` otherwise." + [table :- i/TableInstance, field-metadata :- i/TableMetadataField, metabase-field :- common/TableMetadataFieldWithID] + (let [{old-database-type :database-type + old-base-type :base-type + old-field-comment :field-comment + old-special-type :special-type} metabase-field + {new-database-type :database-type + new-base-type :base-type + new-field-comment :field-comment} field-metadata + new-special-type (common/special-type field-metadata) + + new-db-type? + (not= old-database-type new-database-type) + + new-base-type? + (not= old-base-type new-base-type) + + new-special-type? + (and new-special-type + (not= old-special-type new-special-type)) + + ;; only sync comment if old Field description was blank + new-comment? + (and (str/blank? old-field-comment) + (not (str/blank? new-field-comment))) + + ;; calculate combined updates + updates + (merge + (when new-db-type? + (log/info (trs "Database type of {0} has changed from ''{1}'' to ''{2}''." + (common/field-metadata-name-for-logging table metabase-field) + old-database-type + new-database-type)) + {:database_type new-database-type}) + (when new-base-type? + (log/info (trs "Base type of {0} has changed from ''{1}'' to ''{2}''." + (common/field-metadata-name-for-logging table metabase-field) + old-base-type + new-base-type)) + {:base_type new-base-type}) + (when new-special-type? + (log/info (trs "Special type of {0} has changed from ''{1}'' to ''{2}''." + (common/field-metadata-name-for-logging table metabase-field) + old-special-type + new-special-type)) + {:special_type new-special-type}) + (when new-comment? + (log/info (trs "Comment has been added for {0}." + (common/field-metadata-name-for-logging table metabase-field))) + {:description new-field-comment}))] + ;; if any updates need to be done, do them and return 1 (because 1 Field was updated), otherwise return 0 + (if (and (seq updates) + (db/update! Field (u/get-id metabase-field) updates)) + 1 + 0))) + +(declare update-metadata!) + +(s/defn ^:private update-nested-fields-metadata! :- su/IntGreaterThanOrEqualToZero + "Recursively call `update-metadata!` for all the nested Fields in a `metabase-field`." + [table :- i/TableInstance, field-metadata :- i/TableMetadataField, metabase-field :- common/TableMetadataFieldWithID] + (let [nested-fields-metadata (:nested-fields field-metadata) + metabase-nested-fields (:nested-fields metabase-field)] + (if (seq metabase-nested-fields) + (update-metadata! table (set nested-fields-metadata) (set metabase-nested-fields)) + 0))) + +(s/defn update-metadata! :- su/IntGreaterThanOrEqualToZero + "Make sure things like PK status and base-type are in sync with what has come back from the DB. Recursively updates + nested Fields. Returns total number of Fields updated." + [table :- i/TableInstance + db-metadata :- #{i/TableMetadataField} + our-metadata :- #{common/TableMetadataFieldWithID}] + (sync-util/sum-for [metabase-field our-metadata] + ;; only update metadata for 'existing' Fields that are present in our Metadata (i.e., present in the application + ;; database) and that are still considered active (i.e., present in DB metadata) + (when-let [field-metadata (common/matching-field-metadata metabase-field db-metadata)] + (+ (update-field-metadata-if-needed! table field-metadata metabase-field) + (update-nested-fields-metadata! table field-metadata metabase-field))))) diff --git a/src/metabase/sync/util.clj b/src/metabase/sync/util.clj index a0c8b8f590f488e27f59864bb893cbb2eb581132..89260d16608eb99ff5664a49b2b3cfa63b13f27c 100644 --- a/src/metabase/sync/util.clj +++ b/src/metabase/sync/util.clj @@ -444,9 +444,28 @@ (log-sync-summary operation database sync-metadata))) (defn sum-numbers - "Similar to a 2-arg call to `map`, but will add all numbers that result from the invocations of `f`" + "Similar to a 2-arg call to `map`, but will add all numbers that result from the invocations of `f`. Used mainly for + logging purposes, such as to count and log the number of Fields updated by a sync operation. See also + `sum-for`, a `for`-style macro version." [f coll] (reduce + (for [item coll :let [result (f item)] :when (number? result)] result))) + +(defn sum-for* + "Impl for `sum-for` macro; see its docstring;" + [results] + (reduce + (filter number? results))) + +(defmacro sum-for + "Basically the same as `for`, but sums the results of each iteration of `body` that returned a number. See also + `sum-numbers`. + + As an added bonus, unlike normal `for`, this wraps `body` in an implicit `do`, so you can have more than one form + inside the loop. Nice" + {:style/indent 1} + [[item-binding coll & more-for-bindings] & body] + `(sum-for* (for [~item-binding ~coll + ~@more-for-bindings] + (do ~@body)))) diff --git a/test/metabase/sync/sync_metadata/fields/fetch_metadata_test.clj b/test/metabase/sync/sync_metadata/fields/fetch_metadata_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..a6f8a5605db2f427c3b113fcbbb2eece47b70089 --- /dev/null +++ b/test/metabase/sync/sync_metadata/fields/fetch_metadata_test.clj @@ -0,0 +1,66 @@ +(ns metabase.sync.sync-metadata.fields.fetch-metadata-test + (:require [clojure.walk :as walk] + [expectations :refer [expect]] + [medley.core :as m] + [metabase.models + [database :refer [Database]] + [table :refer [Table]]] + [metabase.sync.sync-metadata :as sync-metadata] + [metabase.sync.sync-metadata.fields.fetch-metadata :as sync-fields.fetch-metadata] + [metabase.test.mock.toucanery :as toucanery] + [metabase.util :as u] + [toucan.db :as db] + [toucan.util.test :as tt])) + +;; `our-metadata` should match up with what we have in the DB +(expect + #{{:name "id" + :database-type "SERIAL" + :base-type :type/Integer + :special-type :type/PK + :pk? true} + {:name "buyer" + :database-type "OBJECT" + :base-type :type/Dictionary + :pk? false + :nested-fields #{{:name "name" + :database-type "VARCHAR" + :base-type :type/Text + :pk? false} + {:name "cc" + :database-type "VARCHAR" + :base-type :type/Text + :pk? false}}} + {:name "ts" + :database-type "BIGINT" + :base-type :type/BigInteger + :special-type :type/UNIXTimestampMilliseconds + :pk? false} + {:name "toucan" + :database-type "OBJECT" + :base-type :type/Dictionary + :pk? false + :nested-fields #{{:name "name" + :database-type "VARCHAR" + :base-type :type/Text + :pk? false} + {:name "details" + :database-type "OBJECT" + :base-type :type/Dictionary + :pk? false + :nested-fields #{{:name "weight" + :database-type "DECIMAL" + :base-type :type/Decimal + :special-type :type/Category + :pk? false} + {:name "age" + :database-type "INT" + :base-type :type/Integer + :pk? false}}}}}} + (tt/with-temp* [Database [db {:engine ::toucanery/toucanery}]] + (sync-metadata/sync-db-metadata! db) + (let [transactions-table-id (u/get-id (db/select-one-id Table :db_id (u/get-id db), :name "transactions")) + remove-ids-and-nil-vals (partial walk/postwalk #(if-not (map? %) + % + (m/filter-vals some? (dissoc % :id))))] + (remove-ids-and-nil-vals (#'sync-fields.fetch-metadata/our-metadata (Table transactions-table-id)))))) diff --git a/test/metabase/sync/sync_metadata/fields/sync_instances_test.clj b/test/metabase/sync/sync_metadata/fields/sync_instances_test.clj new file mode 100644 index 0000000000000000000000000000000000000000..f476941e2457f5eeac21cc293cd98a332f5c35fa --- /dev/null +++ b/test/metabase/sync/sync_metadata/fields/sync_instances_test.clj @@ -0,0 +1,56 @@ +(ns metabase.sync.sync-metadata.fields.sync-instances-test + (:require [expectations :refer [expect]] + [metabase.models + [database :refer [Database]] + [field :refer [Field]] + [table :refer [Table]]] + [metabase.sync.sync-metadata.fields :as sync-fields] + [metabase.test.mock.toucanery :as toucanery] + [metabase.util :as u] + [toucan.db :as db] + [toucan.util.test :as tt])) + +(def ^:private toucannery-transactions-expected-fields-hierarchy + {"ts" nil + "id" nil + "buyer" {"cc" nil + "name" nil} + "toucan" {"details" {"age" nil + "weight" nil} + "name" nil}}) + +(defn- actual-fields-hierarchy [table-or-id] + (let [parent-id->children (group-by :parent_id (db/select [Field :id :parent_id :name] :table_id (u/get-id table-or-id))) + format-fields (fn format-fields [fields] + (into {} (for [field fields] + [(:name field) (when-let [nested-fields (seq (parent-id->children (:id field)))] + (format-fields nested-fields))])))] + (format-fields (get parent-id->children nil)))) + +(expect + toucannery-transactions-expected-fields-hierarchy + (tt/with-temp* [Database [db {:engine ::toucanery/toucanery}] + Table [table {:name "transactions", :db_id (u/get-id db)}]] + ;; do the initial sync + (sync-fields/sync-fields-for-table! table) + (let [transactions-table-id (u/get-id (db/select-one-id Table :db_id (u/get-id db), :name "transactions"))] + (actual-fields-hierarchy transactions-table-id)))) + +;; If you delete a nested Field, and re-sync a Table, it should recreate the Field as it was before! It should not +;; create any duplicate Fields (#8950) +(expect + toucannery-transactions-expected-fields-hierarchy + (tt/with-temp* [Database [db {:engine ::toucanery/toucanery}] + Table [table {:name "transactions", :db_id (u/get-id db)}]] + ;; do the initial sync + (sync-fields/sync-fields-for-table! table) + (let [transactions-table-id (u/get-id (db/select-one-id Table :db_id (u/get-id db), :name "transactions"))] + ;; Give the Table a new Hash, and delete `toucan.details.age` + (db/update! Table transactions-table-id :fields_hash "something new") + (db/delete! Field :table_id transactions-table-id, :name "age") + ;; ok, resync the Table. `toucan.details.age` should be recreated, but only one. We should *not* have a + ;; `toucan.age` Field as well, which was happening before the bugfix in this PR + (sync-fields/sync-fields-for-table! table) + ;; Fetch all the Fields in the `transactions` Table (name & parent name) after the sync, format them in a + ;; hierarchy for easy comparison + (actual-fields-hierarchy transactions-table-id)))) diff --git a/test/metabase/sync/sync_metadata/fields_test.clj b/test/metabase/sync/sync_metadata/fields_test.clj index b764114fc96a8d54422cf6b56a07c81c171c1f7d..5c7ce98ce72bb73f24f9725018a131c541f70741 100644 --- a/test/metabase/sync/sync_metadata/fields_test.clj +++ b/test/metabase/sync/sync_metadata/fields_test.clj @@ -2,7 +2,7 @@ "Tests for the logic that syncs Field models with the Metadata fetched from a DB. (There are more tests for this behavior in the namespace `metabase.sync-database.sync-dynamic-test`, which is sort of a misnomer.)" (:require [clojure.java.jdbc :as jdbc] - [expectations :refer :all] + [expectations :refer [expect]] [metabase [query-processor :as qp] [sync :as sync] @@ -11,6 +11,7 @@ [database :refer [Database]] [field :refer [Field]] [table :refer [Table]]] + [metabase.sync.sync-metadata.fields.sync-instances :as sync-fields.sync-instances] [metabase.sync.util-test :as sut] [metabase.test [data :as data] @@ -216,7 +217,7 @@ {new-db-type :database_type} (get-field)] ;; Syncing again with no change should not call sync-field-instances! or update the hash - (tu/throw-if-called metabase.sync.sync-metadata.fields/sync-field-instances! + (tu/throw-if-called sync-fields.sync-instances/sync-instances! (sync/sync-database! (data/db)) [old-db-type new-db-type @@ -302,4 +303,4 @@ [(no-fields-hash before-table-md) (no-fields-hash after-table-md) (= (:fields-hash before-table-md) - (:fields-hash after-table-md))])) ) + (:fields-hash after-table-md))]))) diff --git a/test/metabase/sync_database/sync_dynamic_test.clj b/test/metabase/sync_database/sync_dynamic_test.clj index e23034fa806611d467e57983bd6f8e7b2120162d..2f285494aaeb4d78dc4fecf4a64aae459f60c806 100644 --- a/test/metabase/sync_database/sync_dynamic_test.clj +++ b/test/metabase/sync_database/sync_dynamic_test.clj @@ -1,7 +1,7 @@ (ns metabase.sync-database.sync-dynamic-test "Tests for databases with a so-called 'dynamic' schema, i.e. one that is not hard-coded somewhere. A Mongo database is an example of such a DB. " - (:require [expectations :refer :all] + (:require [expectations :refer [expect]] [metabase [sync :as sync] [util :as u]] @@ -47,7 +47,7 @@ ;; TODO - At some point these tests should be moved into a `sync-metadata-test` or `sync-metadata.fields-test` -;; namespace +;; namespace. Actually I think they might belong in `metabase.sync.sync-metadata.fields.sync-instances-test` ;; make sure nested fields get resynced correctly if their parent field didn't change (expect diff --git a/test/metabase/test/mock/toucanery.clj b/test/metabase/test/mock/toucanery.clj index cbf295a78e7d70c8e8a0ea2b07910c3a5de75b9d..63f57bf70548997d53d678c393c1f35e94e8165a 100644 --- a/test/metabase/test/mock/toucanery.clj +++ b/test/metabase/test/mock/toucanery.clj @@ -5,7 +5,7 @@ [metabase.test.mock.util :as mock-util])) -(def ^:private ^:const toucanery-tables +(def toucanery-tables {"transactions" {:name "transactions" :schema nil :fields #{{:name "id"