Configuration

Configuration is supported by Typesafe Config, enabling multiple ways to pass in options. Most commonly, configuration is provided via either Java system properties (passed as command-line options) or via a HOCON config file. HOCON is a JSON-like format that is very flexible and human-readable. The reference config below is in HOCON format.

# Example of overriding configuration via system properties
java \
  -Dthatdot.connect.web-interface-port=9000 \
  -Dthatdot.connect.id.type=uuid-3 \
  -jar connect.jar

# Example of overriding configuration via configuration file
java \
  -Dconfig.file=thatdot-connect.conf \
  -jar connect.jar

Uncommented values are the defaults, unless otherwise noted. Unexpected configuration keys or values in the thatdot.connect block will report an error at startup.

An underscore _ is used to indicate a required property with no default value. There are none of these in the default configuration.

thatdot {
  connect {
    # address of the interface to which the HTTP and BOLT servers should be
    # bound - 0.0.0.0 means "all interfaces"
    address = "0.0.0.0"

    # port to which the HTTP server should be bound
    web-interface-port = 8080

    # how long to wait when starting up or shutting down the application
    timeout = "2m"

    cluster {
      # the "canonical" hostname of this host from the perspective of other
      # cluster members. This must be unambiguously resolvable by all cluster
      # members
      hostname = localhost

      # which port this host will on for cluster communication. defaults to a
      # random free port
      port = 65301

      # the name used for the underlying akka cluster
      name = single-host-cluster

      # List of hosts that form the Connect cluster, one object per host. One
      # of these must have first-shard = 0, and the ranges of shards across
      # all hosts must be contiguous and non-overlapping. Defaults to a
      # single-host cluster with 4 shards listening on a random free port.
      # The "topology" array must be the same on all clustered hosts
      topology = [
        {
          # index of first shard on this host (inclusive and non-negative)
          first-shard = 0

          # index of last shard on this host (inclusive and non-negative)
          last-shard = 3

          # the canonical hostname of the host -- must be resolvable by every
          # other cluster member
          hostname = localhost

          # the port number used for clustered communication to the host
          port = 65301
        }
      ]
    }


    # the minimum amount of time a node must stay in the cache after being updated
    decline-sleep-when-write-within = 100 ms

    # the minimum amount of time a node must stay in the cache after being accessed
    decline-sleep-when-access-within = 0 ms

    # whether the application should log its current config at startup
    dump-config = no

    # configuration for the id-assignment scheme the application should use.
    # This must be the same on all clustered hosts
    id {
      # one of [uuid-3, uuid-4, uuid-5, long, byte-array, uuid]
      # - uuid-3:     generate UUIDv3s according to the RFC specification
      # - uuid-4:     generate UUIDs labelled as v4, with id() and strId()
      #               returning random UUIDs, and idFrom returning
      #               deterministic UUIDs with version 4 identifying bytes
      # - uuid-5:     generate UUIDv5s according to the RFC specification
      # - long:       generate random integer IDs in the range
      #               [-(2^53-1), 2^53-1] -- these may be safely used as IEEE
      #               double-precision floating-point values without loss of
      #               precision. This id scheme is not appropriate for
      #               large-scale datasets because of the high likelihood of
      #               a collision
      # - byte-array: generate unstructured byte arrays as IDs
      # - uuid:       generate UUIDs with a mix of versions and namespaces
      type = uuid-5

      # whether the id scheme should be extended with a host-aware
      # partitioning schema. When "true", ids will be prefixed with a
      # "partition" key, and two IDs with the same partition key will always
      # be managed by the same shard
      partitioned = false

      # for uuid-5 and uuid-3 configuration, a UUID namespace may also be
      # set. This must be the same on all clustered hosts
      # namespace = "00000000-0000-0000-0000-000000000000"
    }

    # how many nodes must be present in a shard's cache before that shard
    # will attempt to expire nodes from its cache
    in-memory-soft-node-limit = 10000

    # how many nodes must be present in a shard's cache before that shard
    # will throw an error when creating a new node
    in-memory-hard-node-limit = 75000

    # configuration for which data to save about nodes and when to do so
    persistence {
      # whether to save node journals. "true" uses more disk space and
      # enables more functionality, such as historical queries
      journal-enabled = true

      # one of [on-node-sleep, on-node-update, never]. When to save a
      # snapshot of a node's current state, including any SingleId Standing
      # Queries registered on the node
      snapshot-schedule = on-node-sleep

      # whether only a single snapshot should be retained per-node. If false,
      # one snapshot will be saved at each timestamp against which a
      # historical query is made
      snapshot-singleton = false

      # when to save Standing Query partial result (only applies for the
      # `MultipleValues` mode -- `SingleId` Standing Queries always save when
      # a node saves a snapshot, regardless of this setting)
      standing-query-schedule = on-node-sleep
    }

    # storage backend / "persistor" configuration. There are several possible
    # "type"s, non-default options are listed (commented out) below
    store {
      # store data in a memory-mapped local file using MapDB. This is not
      # supported in a multi-host cluster
      type = local

      # base filename from which MapDB filenames will be created. For
      # example, "connect.db", "connect.db.part.3", etc
      filepath = "connect.db"

      # whether to create any directories in "filepath" that do not yet exist
      create-parent-dir = no

      # how many files to use. MapDB performance slows dramatically above
      # around 2GB per file
      number-partitions = 1

      # whether to use a write-ahead log. Does not support Windows hosts.
      write-ahead-log = off

      # if write-ahead-log = true, how often to commit the write ahead log
      commit-interval = "10s"

      # if set, the number of nodes for which to optimize node creation
      # latency
      # bloom-filter-size =
    }
    # store {
    #   # store data in an Apache Cassandra instance
    #   type = cassandra
    #
    #   # "host:port" strings at which Cassandra nodes can be accessed from
    #   # the application
    #   endpoints = [
    #     "localhost:9042"
    #   ]
    #
    #   # the keyspace to use
    #   keyspace = thatdot
    #
    #   # whether the application should create the keyspace if it does not
    #   # yet exist
    #   should-create-keyspace = true
    #
    #   # whether the applicaiton should create tables in the keyspace if
    #   # they do not yet exist
    #   should-create-tables = true
    #
    #   # how many copies of each datum the Cassandra cluster should retain
    #   replication-factor = 1
    #
    #   # how many hosts must agree on a datum for Connect to consider that
    #   # datum written/read
    #   write-consistency = local-quorum
    #   read-consistency = local-quorum
    #
    #   # passed through to Cassandra
    #   local-datacenter = "datacenter1"
    #
    #   # how long to wait before considering an insert operation failed
    #   insert-timeout = "10s"
    #
    #   # how long to wait before considering a select operation failed
    #   select-timeout = "10s"
    #
    #   # if set, the number of nodes for which to optimize node creation
    #   # latency
    #   # bloom-filter-size =
    # }
    # store {
    #   # store data in a local filesystem using RocksDB. This is not
    #   # supported in a multi-host cluster
    #   type = rocks-db
    #
    #   # base folder in which RocksDB data will be stored
    #   filepath = "connect.db"
    #
    #   # whether to create any directories in "filepath" that do not yet
    #   # exist
    #   create-parent-dir = no
    #
    #   # whether to use a write-ahead log.
    #   write-ahead-log = on
    #
    #   # whether to force all writes to be fully confirmed to disk. This
    #   # is substantially slower, but maintains data integrity even under
    #   # power loss (write-ahead-log is enough to maintain integrity due to
    #   # process crashes).
    #   sync-all-writes = off
    #
    #   # if set, the number of nodes for which to optimize node creation
    #   # latency
    #   # bloom-filter-size =
    # }
    # store {
    #   # do not store any data, only use the in-memory node cache
    #   type = empty
    # }
    # store {
    #   # store data in hydrolix. This feature is experimental.
    #   type = hydrolix
    #
    #   # hydrolix project name
    #   project-name = thatdot
    #
    #   # hydrolix API host to which to connect
    #   host = "thatdot.hydrolix.live"
    #
    #   # table in which to store node journals
    #   journals-table = journals
    #
    #   # maximum number of concurrent connections to the hydrolix API
    #   max-concurrent = 2
    #
    #   # interval within which to batch up requests to the hydrolix API
    #   batch-window = "100ms"
    #
    #   # enable HTTP/2 (relies on experimental akka features)
    #   use-http2 = false
    #
    #   # if set, the number of nodes for which to optimize node creation
    #   # latency
    #   # bloom-filter-size =
    # }
    # store {
    #   # store data in an s3 bucket. This feature is experimental, and
    #   # supports only limited "persistence" configuration. This is not
    #   # supported in a multi-host cluster.
    #   type = s3
    #
    #   # the name of the S3 bucket to use
    #   bucket-name = _
    #
    #   # if set, the number of nodes for which to optimize node creation
    #   # latency
    #   # bloom-filter-size =
    # }

    # where metrics collected by the application should be reported
    metrics-reporters = [
      {
        # one of [jmx, csv, influxdb, slf4j]
        # jmx will report metrics as namespaced MBeans. Other alternatives
        # are listed (commented out) below
        type = jmx
      }
      # {
      #   # create a csv file for each reported metric
      #   type = csv
      #
      #   # required by csv - the interval at which new rows will be written
      #   # to the CSV file (for example, 200ms)
      #   period = _
      #
      #   # required by csv - the directory in which the csv files should be
      #   # created and written
      #   log-directory = _
      # }
      # {
      #   # report metrics to an influxdb database
      #   type = influxdb
      #
      #   # required by influxdb - the interval at which new records will be
      #   # written to the database
      #   period = _
      #
      #   # connection information for the influxdb database
      #   database = metrics
      #   scheme = http
      #   host = localhost
      #   port = 8086
      #
      #   # authentication information for the influxdb database. Both fields
      #   # may be omitted
      #   # user =
      #   # password =
      # }
      # {
      #   # log metrics via an slf4j logger
      #   type = slf4j
      #
      #   # required by slf4j - the interval at which new records will be
      #   # logged
      #   period = _
      #
      #   # logger to which metrics will be logged
      #   logger-name = metrics
      # }
    ]

    # the property on a node reserved to store that node's labels. This must
    # be the same on all clustered hosts
    labels-property = "__LABEL"

    # experimental: the indexing scheme this application should use
    index {
      # one of [empty, historical-node-index, current-in-memory-index,
      # b-tree].
      # Only "empty" is supported on a multi-host cluster
      type = empty
    }

    bolt {
      # whether the BOLT protocol should be enabled
      enabled = false

      # one of [optional, required, disabled]. Whether TLS should be used for
      # BOLT connections. See the security section to configure TLS
      # certificates
      encryption = optional

      # the port on which the BOLT protocol handler should listen
      port = 7687
    }

    # configuration of TLS certificates used by BOLT
    security {
      # the SSL keystore to use. If omitted, this will default to the
      # keystore specified by the javax.net.ssl.keyStore system property.
      # keystore =

      # the password for the SSL keystore to use. If omitted, this will
      # default to the password specified by the
      # javax.net.ssl.keyStorePassword system property
      # password =
    }
  }
}