Skip to content
Snippets Groups Projects
Commit 382f814e authored by Ryan Senior's avatar Ryan Senior
Browse files

Ensure consistent ordering of maps/sets when hashing metadata

This commit will ensure all hashes and sets are ordered before
serializing them to JSON and hashing that string. This will prevent
surprising behavior when the number of items in a map reach 9 or if
the order of elements changes between machines or JDK versions.

Fixes #8826
parent 9a45b6a9
No related branches found
No related tags found
No related merge requests found
......@@ -23,13 +23,32 @@
(db/update! 'Card card-id
:result_metadata metadata)))
(defn- prepare-for-serialization
"Return version of `node` that will hash consistently"
[node]
(cond
;; Integers get converted to floats by the frontend and will hash differently. Convert all integers to floats so
;; that they hash the same before being sent to the FE and after
(integer? node)
(double node)
;; Hashmaps are not guaranteed to hash the same values (be stored in the same order) across machines or versions
;; of the JDK. Array maps will be automatically converted ot hashmaps once they are large enough. Convert maps to
;; sorted maps so that we can get a consistent ordering regardless of map implementation and whether or not the FE
;; changes the order of the keys
(map? node)
(into (sorted-map) node)
;; We probably don't have any sets in our result metadata. If we did, those are hashed and would not have a
;; predictable order. Putting this check/conversion in as it's easy to do and we might have sets in the future.
(set? node)
(into (sorted-set) node)
;; If it's not one of the above, it's a noop
:else
node))
(defn- serialize-metadata-for-hashing
[metadata]
(->> metadata
(walk/postwalk (fn [node]
(if (integer? node)
(double node)
node)))
(walk/postwalk prepare-for-serialization)
json/generate-string))
(defn- metadata-checksum
......
......@@ -97,6 +97,37 @@
false
(results-metadata/valid-checksum? "ABCD" (#'results-metadata/metadata-checksum "ABCDE")))
(def ^:private example-metadata
[{:base_type "type/Text"
:display_name "Date"
:name "DATE"
:unit nil
:special_type nil
:fingerprint {:global {:distinct-count 618 :nil% 0.0}, :type {:type/DateTime {:earliest "2013-01-03T00:00:00.000Z"
:latest "2015-12-29T00:00:00.000Z"}}}}
{:base_type "type/Integer"
:display_name "count"
:name "count"
:special_type "type/Quantity"
:fingerprint {:global {:distinct-count 3
:nil% 0.0},
:type {:type/Number {:min 235.0, :max 498.0, :avg 333.33 :q1 243.0, :q3 440.0 :sd 143.5}}}}])
(defn- array-map->hash-map
"Calling something like `(into (hash-map) ...)` will only return a hash-map if there are enough elements to push it
over the limit of an array-map. By passing the keyvals into `hash-map`, you can be sure it will be a hash-map."
[m]
(apply hash-map (apply concat m)))
;; tests that the checksum is consistent when an array-map is switched to a hash-map
(expect
(#'results-metadata/metadata-checksum example-metadata)
(#'results-metadata/metadata-checksum (mapv array-map->hash-map example-metadata)))
;; tests that the checksum is consistent with an integer and with a double
(expect
(#'results-metadata/metadata-checksum example-metadata)
(#'results-metadata/metadata-checksum (update-in example-metadata [1 :fingerprint :type :type/Number :min] int)))
;; make sure that queries come back with metadata
(expect
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment