From eb04c931a6ae2de24c4021911cb5a21a19b80a39 Mon Sep 17 00:00:00 2001 From: Cam Saul <cammsaul@gmail.com> Date: Tue, 4 Sep 2018 15:32:44 -0700 Subject: [PATCH] Fix uberjar caching --- .circleci/config.yml | 28 ++-- .circleci/skip-driver-tests.sh | 7 + .gitignore | 1 + bin/build-for-test | 43 ----- bin/ci | 282 --------------------------------- 5 files changed, 25 insertions(+), 336 deletions(-) delete mode 100755 bin/build-for-test delete mode 100755 bin/ci diff --git a/.circleci/config.yml b/.circleci/config.yml index 18726592dd1..e64f346ac1e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,12 +13,6 @@ restore-fe-deps-cache: &restore-fe-deps-cache - fe-deps-{{ checksum "yarn.lock" }} - fe-deps- -restore-uberjar-cache: &restore-uberjar-cache - keys: - - uberjar-{{ .Branch }}-{{ .Revision }} - - uberjar-{{ .Branch }} - - uberjar- - version: 2.1 jobs: @@ -39,6 +33,15 @@ jobs: key: source-{{ .Branch }}-{{ .Revision }} paths: - .git + # The basic idea here is to generate a file with checksums for all the backend source files, and save it as + # `./backend-checksums.txt`. Then we'll use the checksum of that files for uberjar caching; thus we can reuse + # the same uberjar for integration tests across any build where the backend files are the same + - run: + name: Generate checksums of all backend source files to use as Uberjar cache key + command: > + for file in `find ./src -type f -name '*.clj' | sort`; + do echo `md5sum $file` >> backend-checksums.txt; + done - persist_to_workspace: root: /home/circleci/ paths: @@ -428,13 +431,15 @@ jobs: - restore_cache: <<: *restore-be-deps-cache - restore_cache: - <<: *restore-uberjar-cache + keys: + - uberjar-{{ checksum "./backend-checksums.txt" }} - run: - name: Build uberjar - command: ./bin/build-for-test + name: Build uberjar if needed + command: > + if [ ! -f './target/uberjar/metabase.jar' ]; then ./bin/build version uberjar; fi no_output_timeout: 5m - save_cache: - key: uberjar-{{ .Branch }}-{{ .Revision }} + key: uberjar-{{ checksum "./backend-checksums.txt" }} paths: - /home/circleci/metabase/metabase/target/uberjar/metabase.jar @@ -446,7 +451,8 @@ jobs: - restore_cache: <<: *restore-fe-deps-cache - restore_cache: - <<: *restore-uberjar-cache + keys: + - uberjar-{{ checksum "./backend-checksums.txt" }} - run: name: Generate version file command: ./bin/build version diff --git a/.circleci/skip-driver-tests.sh b/.circleci/skip-driver-tests.sh index d998d27469f..d0ee1ff9fdd 100755 --- a/.circleci/skip-driver-tests.sh +++ b/.circleci/skip-driver-tests.sh @@ -1,5 +1,12 @@ #!/usr/bin/env bash +# Determines whether we should skip tests for a driver, usage: +# +# ./.circleci/skip-driver-tests.sh oracle +# +# Returns false if the commit message contains [ci all], [ci drivers], or [ci <driver-name>], +# or if the current branch is master or a release branch. + set -eu COMMIT_MESSAGE=`git log -1 --oneline` diff --git a/.gitignore b/.gitignore index f82f949b170..70e402a9dfd 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,4 @@ coverage-summary.json bin/node_modules/ *.log *.trace.db +/backend-checksums.txt diff --git a/bin/build-for-test b/bin/build-for-test deleted file mode 100755 index d6f3dd6d0d3..00000000000 --- a/bin/build-for-test +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash - -set -eu - -VERSION_PROPERTY_NAME="src_hash" - -source-hash() { - # hash all the files that might change a backend-only uberjar build (for integration tests) - ( - find src project.clj resources/sample-dataset.db.mv.db -type f -print0 | xargs -0 shasum ; - find resources -type f \( -iname \*.clj -o -iname \*.edn -o -iname \*.yaml -o -iname \*.properties \) -not -name "version.properties" -print0 | xargs -0 shasum ; - ) | shasum | awk '{ print $1 }' -} - -uberjar-hash() { - # java -jar target/uberjar/metabase.jar version | grep -oE 'source_hash [a-f0-9]+' | awk '{ print $2 }' - # pulling the version.properties directly from the jar is much faster - unzip -c target/uberjar/metabase.jar version.properties | grep "$VERSION_PROPERTY_NAME" | cut -f2 -d= -} - -check-uberjar-hash() { - expected_hash=$(source-hash) - actual_hash=$(uberjar-hash) - if [ "$expected_hash" == "$actual_hash" ]; then - return 0 - else - return 1 - fi -} - -build-uberjar-for-test() { - echo "$VERSION_PROPERTY_NAME=$(source-hash)" >> resources/version.properties - ./bin/build uberjar -} - -./bin/build version - -if [ ! -f "target/uberjar/metabase.jar" ] || ! check-uberjar-hash; then - echo "Building uberjar for testing" - build-uberjar-for-test -else - echo "Uberjar already up to date for testing" -fi diff --git a/bin/ci b/bin/ci deleted file mode 100755 index 91d024cecde..00000000000 --- a/bin/ci +++ /dev/null @@ -1,282 +0,0 @@ -#!/usr/bin/env bash - -# this ensures any failures along the way result in a CI failure -set -eu - -node-0() { - is_enabled "drivers" && export ENGINES="h2,mongo,mysql,bigquery,sparksql" || export ENGINES="h2" - if is_engine_enabled "mongo"; then - run_step install-mongodb - fi - if is_engine_enabled "sparksql"; then - run_step install-sparksql - fi - MB_MYSQL_TEST_USER=ubuntu run_step lein-test -} -node-1() { - run_step lein with-profile +ci docstring-checker - - is_enabled "drivers" && export ENGINES="h2,sqlserver,oracle" || export ENGINES="h2" - if is_engine_enabled "oracle"; then - run_step install-oracle - fi - MB_DB_TYPE=postgres MB_DB_DBNAME=circle_test MB_DB_PORT=5432 MB_DB_USER=ubuntu MB_DB_HOST=localhost \ - run_step lein-test -} -node-2() { - run_step lein with-profile +ci bikeshed - - is_enabled "drivers" && export ENGINES="h2,postgres,sqlite,presto" || export ENGINES="h2" - if is_engine_enabled "crate"; then - run_step install-crate - fi - if is_engine_enabled "presto"; then - run_step install-presto - fi - MB_ENCRYPTION_SECRET_KEY='Orw0AAyzkO/kPTLJRxiyKoBHXa/d6ZcO+p+gpZO/wSQ=' MB_DB_TYPE=mysql MB_DB_DBNAME=circle_test MB_DB_PORT=3306 MB_DB_USER=ubuntu MB_DB_HOST=localhost \ - MB_PRESTO_TEST_HOST=localhost MB_PRESTO_TEST_PORT=8080 MB_POSTGRESQL_TEST_USER=ubuntu \ - run_step lein-test -} -node-3() { - run_step yarn run lint - run_step yarn run flow - - is_enabled "drivers" && export ENGINES="h2,redshift,druid,vertica" || export ENGINES="h2" - if is_engine_enabled "vertica"; then - run_step install-vertica - fi - # this is redundant with node 0 unless one of the non-H2 driver tests is enabled - if [ ENGINES != "h2" ]; then - run_step lein-test - fi -} -node-4() { - run_step ./bin/reflection-linter - - run_step ./bin/build version frontend sample-dataset uberjar - report-frontend-size - report-uberjar-size -} -node-5() { - run_step lein with-profile +ci eastwood - - run_step yarn run test-karma - run_step yarn run test-unit --coverage - report-frontend-coverage -} -node-6() { - run_step ./bin/build-for-test - run_step check-uberjar-file-count - run_step yarn run test-integrated-no-build -} - -report() { - timestamp="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" - name="$1" - value="$2" - if ! [ -z ${STATS_DB+x} ]; then - psql "$STATS_DB" -c "INSERT INTO build_stats (timestamp, name, value, build_number, node_index, branch, hash) VALUES ('$timestamp', '$name', $value, $CIRCLE_BUILD_NUM, $CIRCLE_NODE_INDEX, '$CIRCLE_BRANCH', '$CIRCLE_SHA1');" > /dev/null - fi -} - -report-frontend-coverage() { - report "frontend-coverage-lines" $(node -e "console.log(require('./coverage-summary.json').total.lines.pct)") - report "frontend-coverage-functions" $(node -e "console.log(require('./coverage-summary.json').total.functions.pct)") - report "frontend-coverage-branches" $(node -e "console.log(require('./coverage-summary.json').total.branches.pct)") - report "frontend-coverage-statements" $(node -e "console.log(require('./coverage-summary.json').total.statements.pct)") - - report "frontend-loc" $(node -e "console.log(require('./coverage-summary.json').total.lines.total)") -} -report-frontend-size() { - report "frontend-size" "$(wc -c < resources/frontend_client/app/dist/app-main.bundle.js)" -} -report-uberjar-size() { - report "uberjar-size" "$(wc -c < target/uberjar/metabase.jar)" -} - -install-crate() { - sudo add-apt-repository ppa:crate/stable -y - sudo apt-get update - sudo apt-get install -y crate - # ulimit setting refused Crate service to start on CircleCI container - so comment it - sudo sed -i '/MAX_LOCKED_MEMORY/s/^/#/' /etc/init/crate.conf - echo "psql.port: 5200" | sudo tee -a /etc/crate/crate.yml - sudo service crate restart -} - -install-mongodb() { - sudo apt-get purge mongodb-org* - sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10 - echo "deb http://repo.mongodb.org/apt/ubuntu precise/mongodb-org/3.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list - sudo apt-get update - sudo apt-get install -y mongodb-org - sudo service mongod restart -} - -install-oracle() { - wget --output-document=plugins/ojdbc7.jar $ORACLE_JDBC_JAR -} - -install-vertica() { - wget --output-document=plugins/vertica-jdbc-7.1.2-0.jar $VERTICA_JDBC_JAR - docker run --detach --publish 5433:5433 sumitchawla/vertica - sleep 60 -} - -install-presto() { - docker run --detach --publish 8080:8080 metabase/presto-mb-ci - sleep 10 -} - -install-sparksql() { - # first, download the Spark Deps JAR and put it in the plugins/ dir - wget --output-document=plugins/spark-deps.jar https://s3.amazonaws.com/sparksql-deps/metabase-sparksql-deps-1.2.1.spark2-standalone.jar - - # next, download Spark and run it - spark_version='2.1.1' # Java 7 support was removed in Spark 2.2 so don't upgrade until we upgrade CI - hadoop_version='2.7' - - spark_archive="spark-${spark_version}-bin-hadoop${hadoop_version}.tgz" - wget --progress dot -e dotbytes=250K "https://archive.apache.org/dist/spark/spark-${spark_version}/${spark_archive}" - tar -xf $spark_archive - rm $spark_archive - - spark_dir="$(pwd)/spark-${spark_version}-bin-hadoop${hadoop_version}" - java -Duser.timezone=Etc/UTC \ - -Xmx512m \ - -cp "${spark_dir}/conf:${spark_dir}/jars/*" \ - org.apache.spark.deploy.SparkSubmit \ - --master local[8] \ - --conf spark.executor.extraJavaOptions=-Duser.timezone=Etc/UTC \ - --conf spark.cores.max=1 \ - --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 \ - --name "Thrift JDBC/ODBC Server" \ - --executor-memory 512m \ - spark-internal &>/dev/null & -} - -lein-test() { - lein with-profile +ci test -} - -if [ -z ${CIRCLE_BRANCH_REGEX+x} ]; then - CIRCLE_BRANCH_REGEX='^master|release-.+$' -fi - -is_enabled() { - (echo "$CIRCLE_BRANCH" | grep -qE "$CIRCLE_BRANCH_REGEX") || - [[ "$CIRCLE_COMMIT_MESSAGE" == *"[ci $1]"* ]] || - [[ "$CIRCLE_COMMIT_MESSAGE" == *"[ci all]"* ]] -} - -is_engine_enabled() { - [[ "$ENGINES" == *"$1"* ]] -} - -# Make sure uberjar has less than 64k files because that is the Java 7 LIMIT -check-uberjar-file-count() { - if [ ! -f ./target/uberjar/metabase.jar ]; then - echo "Missing uberjar." - exit 1 - fi - - file_count=$(unzip -l target/uberjar/metabase.jar | wc -l) - echo "Uberjar has ${file_count} files." - - if [ $file_count -gt 65535 ]; then - echo "Uberjar exceeds the 64k Java 7 file limit! We can't allow this. ¡Lo siento!" - exit 1 - fi -} - -# print a summary on exit -status=0 -summary="" - -# records the time and exit code of each step -run_step() { - status=0 - start=$(date +%s) - # run in the background then `wait` so fail_fast can interrupt it - "$@" & - wait $! || status=$? - elapsed=$(expr $(date +%s) - $start || true) - summary="${summary}status=$status time=$elapsed command=$@\n" - report "run-status \"$*\"" "$status" - report "run-time \"$*\"" "$elapsed" - return $status -} - -summary() { - # if last status was failure then fail the rest of the nodes - if [ $status != 0 ]; then - fail_fast - fi - echo -e "========================================" - echo -en "$summary" - echo -e "========================================" -} - -trap summary EXIT - -fail_fast() { - if [ -z ${CIRCLE_NODE_TOTAL+x} ]; then - return 0 - fi - - echo -e "========================================" - echo -e "Failing fast! Stopping other nodes..." - # Touch a file to differentiate between a local failure and a - # failure triggered by another node - touch '/tmp/local-fail' - # ssh to the other CircleCI nodes and send SIGUSR1 to tell them to exit early - for (( i = 0; i < $CIRCLE_NODE_TOTAL; i++ )); do - if [ $i != $CIRCLE_NODE_INDEX ]; then - ssh node$i 'touch /tmp/fail; pkill -SIGUSR1 -f "bash ./bin/ci"' 2> /dev/null || true - fi - done -} - -exit_early() { - echo -e "========================================" - echo -e "Exited early! Did not necesssarily pass!" - pkill -TERM -P $$ || true - exit 0 -} - -trap exit_early SIGUSR1 - -if [ -z ${CIRCLE_BUILD_NUM+x} ]; then - export CIRCLE_BUILD_NUM="-1" -fi - -if [ -z ${CIRCLE_SHA1+x} ]; then - export CIRCLE_SHA1="$(git rev-parse HEAD)" -fi - -if [ -z ${CIRCLE_BRANCH+x} ]; then - export CIRCLE_BRANCH="$(git rev-parse --abbrev-ref HEAD)" -fi - -export CIRCLE_COMMIT_MESSAGE="$(git log --format=oneline -n 1 $CIRCLE_SHA1)" - -# This local-fail check is to guard against two nodes failing at the -# same time. Both nodes ssh to each node and drop /tmp/fail. Those -# failing nodes then get here and see and the other node has told it -# to exit early. This results in both nodes exiting early, and thus -# not failing, causing the build to succeed -if [[ -f "/tmp/fail" && ! -f "/tmp/local-fail" ]]; then - exit_early -fi - -if [ -z ${CIRCLE_NODE_INDEX+x} ]; then - # If CIRCLE_NODE_INDEX isn't set, read node numbers from the args - # Useful for testing locally. - for i in "$@"; do - export CIRCLE_NODE_INDEX="$i" - node-$i - done -else - # Normal mode on CircleCI - node-$CIRCLE_NODE_INDEX -fi -- GitLab