Configuration

Configuration is supported by Typesafe Config, enabling multiple ways to pass in options. Most commonly, configuration is provided via either Java system properties (passed as command-line options) or via a HOCON config file. HOCON is a JSON-like format that is very flexible and human-readable. The reference config below is in HOCON format.


# Example of setting configuration via configuration file java \ -Dconfig.file=quine.conf \ -jar streaming-graph.jar # Example of overriding configuration via system properties java \ -Dquine.webserver.port=9000 \ -Dquine.id.type=uuid-3 \ -jar streaming-graph.jar # Example of overriding configuration via environment variables CONFIG_FORCE_quine_webserver_port=9000 \ CONFIG_FORCE_quine_id_type=uuid-5 \ java \ -Dconfig.override_with_env_vars=true \ -jar streaming-graph.jar

Uncommented values are the defaults, unless otherwise noted. Unexpected configuration keys or values in the quine block will report an error at startup.

An underscore _ is used to indicate a required property with no default value. There are none of these in the default configuration.

quine {

  # webserver binding configuration
  webserver {
    # whether the webserver should be enabled
    enabled = true

    # Hostname or address of the interface to which the HTTP server should
    # be bound - 0.0.0.0 means "all interfaces"
    # There are two special values which are interpreted dynamically:
    #   1.) "<getHostAddress>" uses the host IP found at runtime
    #   2.) "<getHostName>" uses the host DNS name found at runtime
    address = "0.0.0.0"

    # port to which the HTTP server should be bound
    # setting to `0` will choose an available port at random.
    port = 8080
  }
  # (optional) Configuration to use when advertising this server
  # (e.g., canonical address), if different than bind
  # configuration (e.g., when deployed behind a reverse proxy).
  # webserver-advertise {
    # Hostname or address using which the application should generate
    # user-facing hyperlinks to itself. This should be uniquely
    # resolvable from the end-users' client.
    # address = "localhost"

    # port (on `address`) via which the HTTP server can be reached
    # port = 8080
  # }

  cluster {
    # An arbitrary name to use for this cluster. It is used for actor
    # addresses and will appear in logs. All members of the same cluster
    # must share the same cluster name.
    name = quine-cluster

    # Minimum number of cluster members which must join before the
    # cluster is operational. This value is ignored on all nodes other
    # than the seed. Default is `1`
    target-size = 1

    # Duration determining how long to wait before a unreachable member
    # is removed from the cluster. A short value will heal a cluster
    # faster, at the expense of prematurely ejecting a member which might
    # become reachable again. This value should be set to a number of
    # seconds that is NOT LOWER than:  `10 * log(cluster_size)`  where
    # the `cluster_size` includes the number of hot-spares.
    # If not set, this value is estimated automatically.
    # member-down-timeout = 10 seconds

    # The clustering service will be bound to a specific address and port.
    # The combination of address and port serves to uniquely identify
    # each cluster member. This is also the address/port where incoming
    # traffic from other cluster members is directed.
    this-member {
      # Hostname or interface IP address to which the clustering service
      # is bound.
      # There are two special values which are interpreted dynamically:
      #   1.) "<getHostAddress>" uses the host IP found at runtime
      #   2.) "<getHostName>" uses the host DNS name found at runtime
      address = <getHostAddress>

      # The port to which the clustering service is bound.
      # setting to `0` will choose an available port at random.
      port = 25520

      # Optionally define a specific position in the cluster, which might
      # be overridden by the cluster. Cluster position is 0-indexed.
      # Setting to a negative number will let the cluster assign the
      # position of this member. If the specified position is already
      # occupied, this member will be assigned to another location or
      # used as a hot-spare.
      preferred-position = -1

      # When the cluster enters an Operating state, this value determines
      # whether this particular member will automatically resume any ingest
      # streams that were previously running by a member formerly at this
      # cluster position. This is relevant for restarts of the entire cluster
      # and when a single member takes over for another
      # failed member. This setting will be retained if this member joins the
      # cluster as a spare, so that when/if it is needed, this setting will
      # apply no matter which position it fills in the cluster.
      restore-ingest = true
    }

    # Seeds define the address/port of at least one member of the cluster.
    # Any member of the cluster can function as a seed node (including
    # hot-spares). It is strongly recommended to specify multiple seed
    # nodes; if a member goes down and restarts, it will contact the seed
    # nodes listed here. If a seed node restarts, it will need to contact
    # other seed nodes in order to rejoin the cluster.
    cluster-join = {
      type = static-seed-addresses
      seed-addresses = []
    }
      #
      # Example seed-addresses entry:
      # {
      #   Hostname or IP address where the cluster seed node is accessed.
      #   It must be uniquely resolvable on the network. The default value
      #   assumed this-member is the seed.
      #   address = localhost
      #   Port where the cluster seed node can be accessed.
      #   port = 25520
      # }
      # Seed addresses may also be passed in as comma-separted list of host:port pairs in the
      # $QUINE_SEED_ADDRESSES environment variable. If the port is left off, it defaults to 25520.

    # Can use a dns address, e.g. `quine` in this example at which resolves to one or more quine hosts
    # as the cluster discovery mechanism, instead of statically specifying seed addresses.
    # cluster join = {
    #   type = dns-entry
    #   name = quine
    # }
    # The DNS entry to resolve can also be specified with the $QUINE_SEED_DNS environment variable.
  }

  # configuration for the id-assignment scheme the application should use.
  # This must be the same on all clustered hosts
  id {
    # one of [uuid-3, uuid-4, uuid-5, long, byte-array, uuid]
    # - uuid-3:     generate UUIDv3s according to the RFC specification
    # - uuid-4:     generate UUIDs labelled as v4, with id() and strId()
    #               returning random UUIDs, and idFrom returning
    #               deterministic UUIDs with version 4 identifying bytes
    # - uuid-5:     generate UUIDv5s according to the RFC specification
    # - long:       generate random integer IDs in the range
    #               [-(2^53-1), 2^53-1] -- these may be safely used as
    #               IEEE double-precision floating-point values without
    #               loss of precision. This id scheme is not appropriate
    #               for large-scale datasets because of the high
    #               likelihood of a collision
    # - byte-array: generate unstructured byte arrays as IDs
    # - uuid:       generate UUIDs with a mix of versions and namespaces
    type = uuid

    # whether the id scheme should be extended with a host-aware
    # partitioning schema. When "true", ids will be prefixed with a
    # "partition" key, and two IDs with the same partition key will
    # always be managed by the same shard
    partitioned = false

    # for uuid-5 and uuid-3 configuration, a UUID namespace may also be
    # set. This must be the same on all clustered hosts
    # namespace = "00000000-0000-0000-0000-000000000000"
  }

  # Selects the order edges between nodes are returned in queries
  # one of [reverse-insertion, unordered]
  # reverse-insertion means the edges are returned in the reverse
  # of the order they were added (that is, from newest to oldest).
  edge-iteration = reverse-insertion

  # (optional) The number of nodes in a shard's cache before that shard
  # will begin to expire nodes from its cache.
  in-memory-soft-node-limit = 10000

  # (optional) A limit to the total number of nodes in a shard's cache.
  # Attempts to create a node that would exceed this limit will return
  # an error. This value must always be higher than
  # `in-memory-soft-node-limit`
  in-memory-hard-node-limit = 75000

  # configuration for which data to save about nodes and when to do so
  persistence {
    # whether to save node journals. "true" uses more disk space and
    # enables more functionality, such as historical queries
    journal-enabled = true

    # one of [on-node-sleep, on-node-update, never]. When to save a
    # snapshot of a node's current state, including any DistinctId Standing
    # Queries registered on the node
    snapshot-schedule = on-node-sleep

    # whether only a single snapshot should be retained per-node. If
    # false, one snapshot will be saved at each timestamp against which
    # a historical query is made
    snapshot-singleton = false

    # when to save Standing Query partial result (only applies for the
    # `MultipleValues` mode -- `DistinctId` Standing Queries always save
    # whena node saves a snapshot, regardless of this setting)
    standing-query-schedule = on-node-sleep

    # whether effects in-memory occur before or after updates are confirmed
    # persisted to disk.
    # Possible values: memory-first, persistor-first
    effect-order = persistor-first
  }

  # storage backend / "persistor" configuration. There are several
  # possible "type"s, non-default options are below (commented out)
  store {
    # store data in a local filesystem using RocksDB. This is not
    # supported in a multi-host cluster
    type = rocks-db

    # base folder in which RocksDB data will be stored
    filepath = "quine.db"

    # whether to create any directories in "filepath" that do not yet
    # exist
    create-parent-dir = no

    # whether to use a write-ahead log.
    write-ahead-log = on

    # whether to force all writes to be fully confirmed to disk. This
    # is substantially slower, but maintains data integrity even under
    # power loss (write-ahead-log is enough to maintain integrity due
    # to process crashes).
    sync-all-writes = off

    # if set, the number of nodes for which to optimize node creation
    # latency
    # bloom-filter-size =
  }
  # store {
  #   # store data in an Apache Cassandra instance
  #   type = cassandra
  #
  #   # "host:port" strings at which Cassandra nodes can be accessed from
  #   # the application
  #   endpoints = [
  #     "localhost:9042"
  #   ]
  #
  #   # the keyspace to use
  #   keyspace = quine
  #
  #   # whether the application should create the keyspace if it does not
  #   # yet exist
  #   should-create-keyspace = true
  #
  #   # whether the application should create tables in the keyspace if
  #   # they do not yet exist
  #   should-create-tables = true
  #
  #   # how many copies of each datum the Cassandra cluster should retain
  #   replication-factor = 1
  #
  #   # how many hosts must agree on a datum for Quine to consider that
  #   # datum written/read
  #   write-consistency = LOCAL_QUORUM
  #   read-consistency = LOCAL_QUORUM
  #
  #   # passed through to Cassandra
  #   local-datacenter = "datacenter1"
  #
  #   # how long to wait before considering a write operation failed
  #   write-timeout = "10s"
  #
  #   # how long to wait before considering a read operation failed
  #   read-timeout = "10s"
  #
  #   # if set, the number of nodes for which to optimize node creation
  #   # latency
  #   # bloom-filter-size =
  # }
  # store {
  #   # store data in a memory-mapped local file using MapDB. This is not
  #   # supported in a multi-host cluster
  #   type = map-db
  #
  #   # base filename from which MapDB filenames will be created. For
  #   # example, "quine.db", "quine.db.part.3", etc
  #   filepath = "quine.db"
  #
  #   # whether to create any directories in "filepath" that don't yet exist
  #   create-parent-dir = no
  #
  #   # how many files to use. MapDB performance slows dramatically above
  #   # around 2GB per file
  #   number-partitions = 1
  #
  #   # whether to use a write-ahead log. Does not support Windows hosts.
  #   write-ahead-log = off
  #
  #   # if write-ahead-log = true, how often to commit the write ahead log
  #   commit-interval = "10s"
  #
  #   # if set, the number of nodes for which to optimize node creation
  #   # latency
  #   # bloom-filter-size =
  # }
  # store {
  #   # store data in a ClickHouse cluster. Each thatDot Streaming Graph cluster member
  #   # must connect to a different ClickHouse shard, and the number of Streaming Graph
  #   # cluster members must be the same as the number of ClickHouse shards. Additionally,
  #   # The ClickHouse cluster should be preconfigured with the quine database schema.
  #   type = click-house
  #
  #   # uri of the ClickHouse shard this Streaming Graph cluster member should connect to
  #   url = "http://localhost:8123"
  #
  #   # ClickHouse username
  #   username = "quine"
  #
  #   # ClickHouse password
  #   password = "quine"
  #
  #   # if set, the number of nodes for which to optimize node creation
  #   # latency
  #   # bloom-filter-size =
  # }
  # store {
  #   # do not store any data, only use the temporary node cache
  #   # all writes to the persistor will be a no-op.
  #   type = empty
  # }
  # store {
  #   # Use in-memory maps to simulate a local persistor.
  #   type = in-memory
  # }

  # where metrics collected by the application should be reported
  metrics-reporters = [
    {
      # one of [jmx, csv, influxdb, slf4j]
      # jmx will report metrics as namespaced MBeans. Other alternatives
      # are listed (commented out) below
      type = jmx
    }
    # {
    #   # create a csv file for each reported metric
    #   type = csv
    #
    #   # required by csv - the interval at which new rows will be
    #   # written to the CSV file (for example, 200ms)
    #   period = _
    #
    #   # required by csv - the directory in which the csv files should
    #   # be created and written
    #   log-directory = _
    # }
    # {
    #   # report metrics to an influxdb (version 1) database
    #   type = influxdb
    #
    #   # required by influxdb - the interval at which new records will
    #   # be written to the database
    #   period = _
    #
    #   # connection information for the influxdb database
    #   database = metrics
    #   scheme = http
    #   host = localhost
    #   port = 8086
    #
    #   # authentication information for the influxdb database. Both
    #   # fields may be omitted
    #   # user =
    #   # password =
    # }
    # {
    #   # log metrics via an slf4j logger
    #   type = slf4j
    #
    #   # required by slf4j - the interval at which new records will be
    #   # logged
    #   period = _
    #
    #   # logger to which metrics will be logged
    #   logger-name = metrics
    # }
  ]

  # Startup and shutdown timeout for the Quine Application
  # The system will throw an error and exit if any component required
  # to start or shutdown Quine takes longer that this time
  timeout = 2 m

  # the property on a node reserved to store that node's labels. This
  # must be the same on all clustered hosts
  labels-property = "__LABEL"

  # the minimum amount of time a node must stay in the cache after
  # being updated
  decline-sleep-when-write-within = 100 ms

  # the minimum amount of time a node must stay in the cache after
  # being accessed
  decline-sleep-when-access-within = 0 ms

  # nodes will wait up to this amount of time before processing messages
  # when at-time is in the future (occurs when there is difference in
  # the system clock across nodes in the cluster)
  max-catch-up-sleep = 2000 ms  

  # whether the application should log its current config at startup
  dump-config = no

  bolt {
    # whether the BOLT protocol should be enabled
    enabled = false

    # one of [optional, required, disabled]. Whether TLS should be used
    # for BOLT connections. See the security section to configure TLS
    # certificates
    encryption = optional

    # The interface to which the bolt service should be bound.
    # There are two special values which are interpreted dynamically:
    #   1.) "<getHostAddress>" uses the host IP found at runtime
    #   2.) "<getHostName>" uses the host DNS name found at runtime
    address = "0.0.0.0"

    # the port on which the BOLT protocol handler should listen
    # setting to `0` will choose an available port at random.
    port = 7687
  }

  # configuration of TLS certificates used by BOLT
  security {
    # the SSL keystore to use. If omitted, this will default to the
    # keystore specified by the javax.net.ssl.keyStore system property.
    # keystore =

    # the password for the SSL keystore to use. If omitted, this will
    # default to the password specified by the
    # javax.net.ssl.keyStorePassword system property
    # password =
  }

  # which metrics are enabled and their configurations
  metrics {
    # whether to enable debug metrics (i.e., metrics whose collection may slow down
    # the operation of Quine Enterprise)
    enable-debug-metrics = no
  }

  # trial permissions (Streaming Graph trial version only)
  #trial {
  #  email = "test_trial_version@thatdot.com"
  #  api-key = TRIAL_API_KEY
  #}
}