Skip to content
Snippets Groups Projects
Unverified Commit e3c211f3 authored by Ryan Senior's avatar Ryan Senior Committed by GitHub
Browse files

Merge pull request #8836 from metabase/serialize-with-sorted-maps

Ensure consistent ordering of maps/sets when hashing metadata
parents 9a45b6a9 382f814e
No related branches found
No related tags found
No related merge requests found
......@@ -23,13 +23,32 @@
(db/update! 'Card card-id
:result_metadata metadata)))
(defn- prepare-for-serialization
"Return version of `node` that will hash consistently"
[node]
(cond
;; Integers get converted to floats by the frontend and will hash differently. Convert all integers to floats so
;; that they hash the same before being sent to the FE and after
(integer? node)
(double node)
;; Hashmaps are not guaranteed to hash the same values (be stored in the same order) across machines or versions
;; of the JDK. Array maps will be automatically converted ot hashmaps once they are large enough. Convert maps to
;; sorted maps so that we can get a consistent ordering regardless of map implementation and whether or not the FE
;; changes the order of the keys
(map? node)
(into (sorted-map) node)
;; We probably don't have any sets in our result metadata. If we did, those are hashed and would not have a
;; predictable order. Putting this check/conversion in as it's easy to do and we might have sets in the future.
(set? node)
(into (sorted-set) node)
;; If it's not one of the above, it's a noop
:else
node))
(defn- serialize-metadata-for-hashing
[metadata]
(->> metadata
(walk/postwalk (fn [node]
(if (integer? node)
(double node)
node)))
(walk/postwalk prepare-for-serialization)
json/generate-string))
(defn- metadata-checksum
......
......@@ -97,6 +97,37 @@
false
(results-metadata/valid-checksum? "ABCD" (#'results-metadata/metadata-checksum "ABCDE")))
(def ^:private example-metadata
[{:base_type "type/Text"
:display_name "Date"
:name "DATE"
:unit nil
:special_type nil
:fingerprint {:global {:distinct-count 618 :nil% 0.0}, :type {:type/DateTime {:earliest "2013-01-03T00:00:00.000Z"
:latest "2015-12-29T00:00:00.000Z"}}}}
{:base_type "type/Integer"
:display_name "count"
:name "count"
:special_type "type/Quantity"
:fingerprint {:global {:distinct-count 3
:nil% 0.0},
:type {:type/Number {:min 235.0, :max 498.0, :avg 333.33 :q1 243.0, :q3 440.0 :sd 143.5}}}}])
(defn- array-map->hash-map
"Calling something like `(into (hash-map) ...)` will only return a hash-map if there are enough elements to push it
over the limit of an array-map. By passing the keyvals into `hash-map`, you can be sure it will be a hash-map."
[m]
(apply hash-map (apply concat m)))
;; tests that the checksum is consistent when an array-map is switched to a hash-map
(expect
(#'results-metadata/metadata-checksum example-metadata)
(#'results-metadata/metadata-checksum (mapv array-map->hash-map example-metadata)))
;; tests that the checksum is consistent with an integer and with a double
(expect
(#'results-metadata/metadata-checksum example-metadata)
(#'results-metadata/metadata-checksum (update-in example-metadata [1 :fingerprint :type :type/Number :min] int)))
;; make sure that queries come back with metadata
(expect
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment