Cluster Install Walk-through

This is a step-by-step sequence of API calls that simulate a typical zero-to-cluster installation workflow scenario.

  1. Create a Repository.

  2. Create Machine Credentials with appropriate settings for your cluster. You may need to create multiple if you have disparate credentials across your cluster.

  3. Create a Config Profile with appropriate configuration settings where you wish to override the defaults. If you wish to vary your configs for each datacenter or even at the node-level, you need to create multiple.

  4. Create a Cluster model, including datacenters and nodes (see Datacenter and Node). Assign repository, machine credentials, and config profiles to the appropriate cluster model objects.

  5. Create an Node job.

  6. Watch the job (see Jobs) for status.


#!/usr/bin/env python
# (C) DataStax, 2015.  All Rights Reserved
# Demonstrates installation of DataStax Community on existing empty servers
# using the LCM API
# Example Usage:
# (target node ip one) (target node ip two) (...)

import sys
import os
import requests
import json
import threading

server_ip = ""
base_url = 'http://%s:8888/api/v2/lcm/' % server_ip
homedir = os.getenv("HOME")
# private_key_path = os.path.join(homedir, ".ssh/id_rsa")
# private_key = open(private_key_path).read()

def do_post(url, post_data):
    """Convenience function to simplify posting data to the LCM api."""
    print "Posting to:", url
    result = + url,
                           headers={'Content-Type': 'application/json'})
    result_data = json.loads(result.text)
    print "Response is:", result.text
    return result_data

# Credentials to log-in to the DSE nodes over SSH and obtain root privileges
# Log in over SSH as root authenticating with a password
# Run commands directly, without su or sudo
# machine_credential = do_post("machine_credentials/",
#                              {"name": "mc11",
#                               "login-user": "root",
#                               "login-password": "root",
#                               "become-mode": "direct"})

# Log in over SSH as vagrant, authenticating with an ssh-key
# Run commands through su when root privileges are necessary
# machine_credential = do_post("provisioning/machine_credentials/",
#                              {"name": "mc11",
#                               "login-user": "vagrant",
#                               "ssh-private-key": private_key,
#                               "become-mode": "su",
#                               "become-user": "root",
#                               "become-password": "password"})

# Log in over SSH as ubuntu, authenticating with a password
# Run commands through sudo when root privileges are necessary
machine_credential = do_post("machine_credentials/",
                             {"name": "mc11",
                              "login-user": "johndoe",
                              "login-password": "foobie",
                              "become-mode": "sudo",
                              "become-password": "foobie"})

# Describes many DSE settings and LCM options to the LCM api.
# Other than the name and datastax-version, all attributes are optional
# and have sensible defaults.
cluster_profile = do_post(
    {"name": "cp1",
     "datastax-version": "4.8.1",
     "json": {'cassandra-yaml': {"start-native-transport": True},
              'logback-xml': {'file-appender-min-index': 2,
                              'loggers': [{'name': 'com.thinkaurelius.thrift',
                                           'level': 'ERROR'}]},
              'java-setup': {'major-version': "1.8.0"
                             # 'update-version': "77",
                             # 'build-version': '02'
              'package-proxy': {
                  'enabled': False
                  # 'protocol': 'http',
                  # 'host': 'proxy.lan',
                  # 'port': '3128'
                  # 'authentication-required': False,
                  # 'username': 'user',
                  # 'password': 'pw'
     "comment": "no comment"})

repository = do_post(
    {"name": "my-repo-credentials",
     "username": "johndoe",
     "password": "foobie"

# Describes a cluster to the LCM api.
cluster = do_post("clusters/",
                  {"name": "cluster01",
                   "machine-credential-id": machine_credential["id"],
                   "repository-id": repository["id"],
                   "config-profile-id": cluster_profile["id"]})

# Describes a data-center to the LCM api.
# Machine-credentials, repository-credentials, and config-profiles can also be
# assigned to datacenters, or will be inherited from the cluster if unset.
datacenter = do_post("datacenters/",
                     {"name": "datacenter01",
                      "cluster-id": cluster["id"]})

# Describes a list of nodes to the LCM api using ip-addresses specified as
# command-line arguments to this script, ie:
ip_addresses = sys.argv[1:]
nodes = [do_post("nodes/",
                 {"name": ip,
                  "ssh-management-address": ip,
                  "datacenter-id": datacenter["id"]})
         for ip in ip_addresses]

# Request an install job to execute the installation and configuration of the
# cluster. Until this point, we've been describing future state. Now LCM will
# execute the changes necessary to achieve that state.
install_job = do_post("actions/install",
                      {"job-scope": "cluster",
                       "resource-id": cluster["id"]})

# Poll the LCM api for updates on the status of the job and progress events
displayed_events = {}

def check_job_status(job_id, displayed_events):
    # Fetch the most recent 200 job-events
    url = "job_events/?job-id=%s&per-page=200" % job_id
    result = requests.get(base_url + url)

    # Display events if we haven't seen them before
    search_results = json.loads(result.text)
    for search_result in search_results['results']:
        job_event_result = requests.get(search_result['href'])
        job_event = json.loads(job_event_result.text)
        if job_event['id'] not in displayed_events:
            displayed_events[job_event['id']] = True
            # Job events are extremely detailed, select some of the more
            # human-friendly fields for display
            print "Node %s: %s %s (%s): %s" % (job_event['node-id'],
    # Check if the entire job has been completed
    job_url = "jobs/%s" % job_id
    job_response = requests.get(base_url + job_url)
    job_results = json.loads(job_response.text)

    # Poll every 5 seconds, displaying status until the job is complete
    if job_results['status'] == 'RUNNING':
        t = threading.Timer(5.0,
                            [job_id, displayed_events])
    elif job_results['status'] == 'COMPLETE':
        print "Job completed successfully."

# Initiate status polling
t = threading.Timer(5.0,
                    [install_job["id"], displayed_events])

Was this helpful?

Give Feedback

How can we improve the documentation?

© 2024 DataStax | Privacy policy | Terms of use

Apache, Apache Cassandra, Cassandra, Apache Tomcat, Tomcat, Apache Lucene, Apache Solr, Apache Hadoop, Hadoop, Apache Pulsar, Pulsar, Apache Spark, Spark, Apache TinkerPop, TinkerPop, Apache Kafka and Kafka are either registered trademarks or trademarks of the Apache Software Foundation or its subsidiaries in Canada, the United States and/or other countries. Kubernetes is the registered trademark of the Linux Foundation.

General Inquiries: +1 (650) 389-6000,