Running spark-submit job with internal authentication

Example of running a spark-submit job with internal authentication.

This example shows how to run a spark-submit job with internal authentication.

When you use dse spark-submit to submit a Spark job, the Spark Master URL and the Spark Cassandra Connection URL are set automatically. Then use Spark Conf to set the application name. For example:
package simpleSpark;
import com.datastax.spark.connector.cql.CassandraConnector;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
public interface SparkConfSetup {
  static public SparkConf getSparkConf() {
    return new SparkConf() 
      .setAppName("SimpleSpark");    
  }
  static public JavaSparkContext getJavaSparkContext() { 
    SparkContext usingSparkContext = new SparkContext(getSparkConf());
    return new JavaSparkContext(usingSparkContext);    
  }
  static public CassandraConnector getCassandraConnector() {
    return CassandraConnector.apply((getSparkConf()));    
  }
}
  1. Clone the source files from github.
  2. Install Apache Maven.
  3. Add the DataStax Enterprise Spark JAR file to the local repository:
    mvn install:install-file -Dfile=/usr/share/dse/dse-spark-version.jar -DgroupId=com.datastax -DartifactId=dse -Dversion=5.0.3 -Dpackaging=jar
    
  4. Add maven to the project so that pom.xml looks like this:
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    
        <modelVersion>4.0.0</modelVersion>
        <groupId>BasicSparkDemo</groupId>
        <artifactId>BasicSparkDemo</artifactId>
        <packaging>jar</packaging>
        <version>0.1</version>
        <name>BasicSparkDemo</name>
        <url>https://www.datastax.com/</url>
    
        <build>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>2.3.2</version>
                    <configuration>
                        <source>1.8</source>
                        <target>1.8</target>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    
        <dependencies>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-core_2.10</artifactId>
                <version>1.6.2</version>
                <scope>provided</scope>
            </dependency>
            <dependency>
                <groupId>com.datastax.spark</groupId>
                <artifactId>spark-cassandra-connector</artifactId>
                <version>2.0.0</version>
            </dependency>
            <dependency>
                <groupId>com.datastax</groupId>
                <artifactId>dse</artifactId>
                <version>5.0.3</version>
                <scope>provided</scope>
            </dependency>
        </dependencies>
    
        <properties>
            <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        </properties>
    
    </project>
  5. Edit the code to create the SparkConf object:
    package simpleSpark;
    
    import com.datastax.spark.connector.cql.CassandraConnector;
    import org.apache.spark.SparkConf;
    import org.apache.spark.SparkContext;
    import org.apache.spark.api.java.JavaSparkContext;
    
    /**
    *  When you submit a Spark Job using dse spark-submit it automatically sets the Spark Master URL and the Spark Cassandra Connection URL.
    *  The Spark Conf then just needs to set the app name.
    **/
    public interface SparkConfSetup {
    
        static public SparkConf getSparkConf() {
            return new SparkConf()
                .setAppName("SimpleSpark");
        }
    
        static public JavaSparkContext getJavaSparkContext() {
            SparkContext usingSparkContext = new SparkContext(getSparkConf());
            return new JavaSparkContext(usingSparkContext);
        }
    
        static public CassandraConnector getCassandraConnector() {
            return CassandraConnector.apply((getSparkConf()));
        }
  6. Build the package with Maven:
    mvn clean package
  7. To use authentication with spark-submit, specify the credentials to authenticate against the configured Cassandra authentication schema:
    dse -u cassandra -p cassandra spark-submit --class simpleSpark.SparkWordCount ./target/BasicSparkDemo-0.1.jar
    
    You can provide authentication credentials in several ways, see Credentials for authentication.