diff --git a/resources/migrations/000_migrations.yaml b/resources/migrations/000_migrations.yaml index 1927dce96d75ea0166612ce6beb37fbcd8df1d99..cc4aadbceafab5c518d433881b462c68c5fe8eca 100644 --- a/resources/migrations/000_migrations.yaml +++ b/resources/migrations/000_migrations.yaml @@ -3649,6 +3649,7 @@ databaseChangeLog: - changeSet: id: 56 author: wwwiiilll + comment: 'Added 0.25.0' changes: - addColumn: tableName: core_user @@ -3662,6 +3663,7 @@ databaseChangeLog: - changeSet: id: 57 author: camsaul + comment: 'Added 0.25.0' changes: - addColumn: tableName: report_card @@ -3673,6 +3675,7 @@ databaseChangeLog: - changeSet: id: 58 author: senior + comment: 'Added 0.25.0' changes: - createTable: tableName: dimension @@ -3744,6 +3747,7 @@ databaseChangeLog: - changeSet: id: 59 author: camsaul + comment: 'Added 0.26.0' changes: - addColumn: tableName: metabase_field @@ -3752,3 +3756,25 @@ databaseChangeLog: name: fingerprint type: text remarks: 'Serialized JSON containing non-identifying information about this Field, such as min, max, and percent JSON. Used for classification.' + - changeSet: + id: 60 + author: camsaul + comment: 'Added 0.26.0' + changes: + - addColumn: + tableName: metabase_database + columns: + - column: + name: metadata_sync_schedule + type: varchar(254) + remarks: 'The cron schedule string for when this database should undergo the metadata sync process (and analysis for new fields).' + defaultValue: '0 50 * * * ? *' # run at the end of every hour + constraints: + nullable: false + - column: + name: cache_field_values_schedule + type: varchar(254) + remarks: 'The cron schedule string for when FieldValues for eligible Fields should be updated.' + defaultValue: '0 50 0 * * ? *' # run at 12:50 AM + constraints: + nullable: false diff --git a/src/metabase/sync/interface.clj b/src/metabase/sync/interface.clj index 6ba02b7b9e4fa469644df39aed82ac7a3ec91b32..9ef49ee06f84003ac007cd56a32aba2f64db2ef3 100644 --- a/src/metabase/sync/interface.clj +++ b/src/metabase/sync/interface.clj @@ -97,6 +97,40 @@ (def Fingerprint "Schema for a Field 'fingerprint' generated as part of the analysis stage. Used to power the 'classification' sub-stage of analysis. Stored as the `fingerprint` column of Field." - {(s/optional-key :global) GlobalFingerprint + {(s/optional-key :version) su/IntGreaterThanZero ; Fingerprints with no version key are assumed to have version of 1 + (s/optional-key :global) GlobalFingerprint (s/optional-key :type) TypeSpecificFingerprint (s/optional-key :experimental) {s/Keyword s/Any}}) + + +;;; +------------------------------------------------------------------------------------------------------------------------+ +;;; | FINGERPRINT VERSIONING | +;;; +------------------------------------------------------------------------------------------------------------------------+ + +;; Occasionally we want to update the schema of our Field fingerprints and add new logic to populate the additional keys. +;; However, by default, analysis (which includes fingerprinting) only runs on *NEW* Fields, meaning *EXISTING* Fields won't +;; get new fingerprints with the updated info. +;; +;; To work around this, we can use a versioning system. Fields whose Fingerprint's version is lower than the current version +;; should get updated during the next sync/analysis regardless of whether they are or are not new Fields. However, this could +;; be quite inefficient: if we add a new fingerprint field for `:type/Number` Fields, why should we re-fingerprint `:type/Text` +;; Fields? Ideally, we'd only re-fingerprint the numeric Fields. +;; +;; Thus, our implementation below. Each new fingerprint version lists a set of types that should be upgraded to it. Our +;; fingerprinting logic will calculate whether a fingerprint needs to be recalculated based on its version and the changes +;; that have been made in subsequent versions. Only the Fields that would benefit from the new Fingerprint info need be +;; re-fingerprinted. +;; +;; Thus, if Fingerprint v2 contains some new info for numeric Fields, only Fields that derive from `:type/Number` need be upgraded +;; to v2. Textual Fields with a v1 fingerprint can stay at v1 for the time being. Later, if we introduce a v3 that includes new +;; "global" fingerprint info, both the v2-fingerprinted numeric Fields and the v1-fingerprinted textual Fields can be upgraded +;; to v3. + +(def ^:const ^Integer current-fingerprint-version + "The current version of our Field fingerprint. See comments above for explanation of how this is used." + 2) + +(def fingerprint-version->types-that-should-be-re-fingerprinted + "Map of fingerprint version to the set of Field base types that need to be upgraded to this version the next + time we do analysis." + {2 #{:type/Number}})