Skip to content
Snippets Groups Projects
Unverified Commit 8d5e27be authored by Robert Roland's avatar Robert Roland Committed by GitHub
Browse files

Fix broken datetime fingerprinters (#12856)

Comparing a LocalDateTime to any other time requires assigning a
timezone to the datetime. This change removes any reference to the query
processor store during the fingerprinting.

Adds tests that the fingerprints are also present in the database.

[ci all]
parent 70c993d8
No related branches found
No related tags found
No related merge requests found
......@@ -8,7 +8,6 @@
[math :as math]]
[medley.core :as m]
[metabase.models.field :as field]
[metabase.query-processor.timezone :as qp.tz]
[metabase.sync.analyze.classifiers.name :as classify.name]
[metabase.sync.util :as sync-util]
[metabase.util :as u]
......@@ -19,7 +18,8 @@
(:import com.bigml.histogram.Histogram
com.clearspring.analytics.stream.cardinality.HyperLogLogPlus
[java.time.chrono ChronoLocalDateTime ChronoZonedDateTime]
java.time.temporal.Temporal))
java.time.temporal.Temporal
java.time.ZoneOffset))
(defn col-wise
"Apply reducing functinons `rfs` coll-wise to a seq of seqs."
......@@ -189,17 +189,7 @@
String (->temporal [this] (->temporal (u.date/parse this)))
Long (->temporal [this] (->temporal (t/instant this)))
Integer (->temporal [this] (->temporal (t/instant this)))
;; this is challenging, because ChronoLocalDate requires a ZoneOffset
;; to work with. Ideally, we would use the database's ZoneOffset, but
;; we don't have access to that here. Use the JVM's systemDefault to
;; convert this.
ChronoLocalDateTime (->temporal [this] (.toInstant this
(.. (if-let [tz (or (qp.tz/report-timezone-id-if-supported)
(qp.tz/database-timezone-id))]
(java.time.ZoneId/of tz)
(java.time.ZoneId/systemDefault))
(getRules)
(getOffset (java.time.Instant/now)))))
ChronoLocalDateTime (->temporal [this] (.toInstant this (ZoneOffset/UTC)))
ChronoZonedDateTime (->temporal [this] (.toInstant this))
Temporal (->temporal [this] this))
......
(ns metabase.sync.analyze.fingerprint.fingerprinters-test
(:require [clojure.test :refer :all]
[metabase.models.field :as field]
[metabase.models.field :as field :refer [Field]]
[metabase.sync.analyze.fingerprint.fingerprinters :refer :all]
[metabase.test :as mt]))
[metabase.test :as mt]
[schema.core :as s]
[toucan.db :as db]))
(deftest fingerprint-temporal-values-test
;; we want to test h2 and postgres, because h2 doesn't
;; support overriding the timezone for a session / report
(mt/test-drivers #{:h2 :postgres}
(doseq [tz ["UTC" nil]]
(mt/with-temporary-setting-values [report-timezone tz]
(mt/with-database-timezone-id "UTC"
(mt/with-everything-store
(is (= {:global {:distinct-count 4
:nil% 0.5}
:type {:type/DateTime {:earliest "2013-01-01"
:latest "2018-01-01"}}}
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/DateTime}))
[#t "2013" nil #t "2018" nil nil #t "2015"])))
(testing "handle ChronoLocalDateTime"
(is (= {:global {:distinct-count 2
:nil% 0.0}
:type {:type/DateTime {:earliest "2013-01-01T20:04:00Z"
:latest "2018-01-01T04:04:00Z"}}}
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/Temporal}))
[(java.time.LocalDateTime/of 2013 01 01 20 04 0 0)
(java.time.LocalDateTime/of 2018 01 01 04 04 0 0)]))))
(testing "handle comparing explicit Instant with ChronoLocalDateTime"
(is (= {:global {:distinct-count 2
:nil% 0.0}
:type {:type/DateTime {:earliest "2007-12-03T10:15:30Z"
:latest "2018-01-01T04:04:00Z"}}}
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/Temporal}))
[(java.time.Instant/parse "2007-12-03T10:15:30.00Z")
(java.time.LocalDateTime/of 2018 01 01 04 04 0 0)]))))
(testing "mixing numbers and strings"
(is (= {:global {:distinct-count 2
:nil% 0.0}
:type {:type/DateTime {:earliest "1970-01-01T00:00:01.234Z"
:latest "2007-12-03T10:15:30Z"}}}
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/Temporal}))
["2007-12-03T10:15:30.00Z" 1234]))))
(testing "nil temporal values"
(is (= {:global {:distinct-count 1
:nil% 1.0}
:type {:type/DateTime {:earliest nil
:latest nil}}}
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/DateTime}))
(repeat 10 nil)))))))))))
(testing "handle LocalDate"
(is (= {:global {:distinct-count 4
:nil% 0.5}
:type {:type/DateTime {:earliest "2013-01-01"
:latest "2018-01-01"}}}
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/DateTime}))
[#t "2013" nil #t "2018" nil nil #t "2015"]))))
(testing "handle ChronoLocalDateTime"
(is (= {:global {:distinct-count 2
:nil% 0.0}
:type {:type/DateTime {:earliest "2013-01-01T20:04:00Z"
:latest "2018-01-01T04:04:00Z"}}}
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/Temporal}))
[(java.time.LocalDateTime/of 2013 01 01 20 04 0 0)
(java.time.LocalDateTime/of 2018 01 01 04 04 0 0)]))))
(testing "handle comparing explicit Instant with ChronoLocalDateTime"
(is (= {:global {:distinct-count 2
:nil% 0.0}
:type {:type/DateTime {:earliest "2007-12-03T10:15:30Z"
:latest "2018-01-01T04:04:00Z"}}}
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/Temporal}))
[(java.time.Instant/parse "2007-12-03T10:15:30.00Z")
(java.time.LocalDateTime/of 2018 01 01 04 04 0 0)]))))
(testing "mixing numbers and strings"
(is (= {:global {:distinct-count 2
:nil% 0.0}
:type {:type/DateTime {:earliest "1970-01-01T00:00:01.234Z"
:latest "2007-12-03T10:15:30Z"}}}
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/Temporal}))
["2007-12-03T10:15:30.00Z" 1234]))))
(testing "nil temporal values"
(is (= {:global {:distinct-count 1
:nil% 1.0}
:type {:type/DateTime {:earliest nil
:latest nil}}}
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/DateTime}))
(repeat 10 nil))))))
(deftest disambiguate-test
(testing "We should correctly disambiguate multiple competing multimethods (DateTime and FK in this case)"
......@@ -100,3 +96,31 @@
(transduce identity
(fingerprinter (field/map->FieldInstance {:base_type :type/Text}))
["metabase" "more" "like" "metabae" "[1, 2, 3]"]))))
(deftest fingerprints-in-db-test
(mt/test-drivers (mt/normal-drivers)
(testing "Fingerprints should actually get saved with the correct values"
(testing "Text fingerprints"
(is (schema= {:global {:distinct-count (s/eq 100)
:nil% (s/eq 0.0)}
:type {:type/Text {:percent-json (s/eq 0.0)
:percent-url (s/eq 0.0)
:percent-email (s/eq 0.0)
:average-length (s/pred #(< 15 % 16) "between 15 and 16")}}}
(db/select-one-field :fingerprint Field :id (mt/id :venues :name)))))
(testing "date fingerprints"
(is (schema= {:global {:distinct-count (s/eq 618)
:nil% (s/eq 0.0)}
:type {:type/DateTime {:earliest (s/pred #(.startsWith % "2013-01-03"))
:latest (s/pred #(.startsWith % "2015-12-29"))}}}
(db/select-one-field :fingerprint Field :id (mt/id :checkins :date)))))
(testing "number fingerprints"
(is (schema= {:global {:distinct-count (s/eq 4)
:nil% (s/eq 0.0)}
:type {:type/Number {:min (s/eq 1.0)
:q1 (s/pred #(< 1.44 % 1.46) "approx 1.4591129021415095")
:q3 (s/pred #(< 2.4 % 2.5) "approx 2.493086095768049")
:max (s/eq 4.0)
:sd (s/pred #(< 0.76 % 0.78) "between 0.76 and 0.78")
:avg (s/eq 2.03)}}}
(db/select-one-field :fingerprint Field :id (mt/id :venues :price))))))))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment