From 538d98104314c505d7b980df466ab49bb399f510 Mon Sep 17 00:00:00 2001
From: Ryan Senior <ryan@metabase.com>
Date: Tue, 5 Dec 2017 11:11:55 -0600
Subject: [PATCH] Switch infer spaces from doubles to floats

This change will consume about half the memory that the previous
doubles consumed. The change in precision shouldn't affect the results
of word splitting.
---
 src/metabase/util/infer_spaces.clj | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/metabase/util/infer_spaces.clj b/src/metabase/util/infer_spaces.clj
index ea4fb660888..ebae56da49e 100644
--- a/src/metabase/util/infer_spaces.clj
+++ b/src/metabase/util/infer_spaces.clj
@@ -20,7 +20,7 @@
   (let [log-count (Math/log (count words))]
     (into (sorted-map)
           (map-indexed (fn [idx word]
-                         [(hash word) (Math/log (* (inc idx) log-count))])
+                         [(hash word) (float (Math/log (* (inc idx) log-count)))])
                        words))))
 
 ;; # Build arrays for a cost lookup, assuming Zipf's law and cost = -math.log(probability).
@@ -35,9 +35,9 @@
     "Array of word hash values, ordered by that hash value"
     (int-array (keys sorted-words)))
 
-  (def ^:private ^"[D" word-cost
-    "Array of word cost doubles, ordered by the hash value for that word"
-    (double-array (vals sorted-words)))
+  (def ^:private ^"[F" word-cost
+    "Array of word cost floats, ordered by the hash value for that word"
+    (float-array (vals sorted-words)))
 
   ;; maxword = max(len(x) for x in words)
   (def ^:private max-word
@@ -97,7 +97,7 @@
   [input]
   (let [s (s/lower-case input)
         cost (build-cost-array s)]
-    (loop [i (double (count s))
+    (loop [i (float (count s))
            out []]
       (if-not (pos? i)
         (reverse out)
-- 
GitLab