How to programmatically access hadoop cluster where kerberos is enable

Question

Here is the code using which I am fetching a file from a Hadoop filesystem. Now, in my local single node setup I am able to use this code for fetching the file.

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

public class HDFSdownloader{

public static void main(String[] args) throws Exception {

    System.getProperty("java.classpath");
    if (args.length != 3) {

        System.out.println("use: HDFSdownloader hdfs src dst");

        System.exit(1);

    }

    System.out.println(HDFSdownloader.class.getName());
    HDFSdownloader dw = new HDFSdownloader();

    dw.copy2local(args[0], args[1], args[2]);

}

private void copy2local(String hdfs, String src, String dst) throws IOException {

    System.out.println("!! Entering function !!");
    Configuration conf = new Configuration();
    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    conf.set("fs.default.name", hdfs);

    FileSystem.get(conf).copyToLocalFile(new Path(src), new Path(dst));

    System.out.println("!! copytoLocalFile Reached!!");

Now I took the same code, bundled it in a jar and tried to run it on another node(say B). This time the code had to fetch a file from a proper distributed Hadoop cluster. That cluster has Kerberos enabled in it.

Here I got following error:

Exception in thread "main" org.apache.hadoop.security.AccessControlException: SIMPLE authentication is not enabled.  Available:[TOKEN, KERBEROS]
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:2115)
at org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFileSystem.java:1305)
at org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFileSystem.java:1301)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1317)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:337)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:289)
at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2030)
at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:1999)
at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:1975)
at com.hdfs.test.hdfs_util.HDFSdownloader.copy2local(HDFSdownloader.java:49)
at com.hdfs.test.hdfs_util.HDFSdownloader.main(HDFSdownloader.java:35)

Can you please help me out here?

coldcode · Answer 1 · Mar 27, 2018

Okay,here's the code snippet to work in the scenario you have described above. let me point out few Important points before you have a look at the code:

Provide keytab file location in UserGroupInformation
Provide kerberos realm details in JVM arguments - krb5.conf file
Define hadoop security authentication mode as kerberos

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;

public class KerberosHDFSIO {

public static void main(String[] args) throws IOException {

    Configuration conf = new Configuration();
    //The following property is enough for a non-kerberized setup
    //      conf.set("fs.defaultFS", "localhost:9000");

    //need following set of properties to access a kerberized cluster
    conf.set("fs.defaultFS", "hdfs://devha:8020");
    conf.set("hadoop.security.authentication", "kerberos");

    //The location of krb5.conf file needs to be provided in the VM arguments for the JVM
    //-Djava.security.krb5.conf=/Users/user/Desktop/utils/cluster/dev/krb5.conf

    UserGroupInformation.setConfiguration(conf);
    UserGroupInformation.loginUserFromKeytab("user@HADOOP_DEV.ABC.COM",
            "/Users/user/Desktop/utils/cluster/dev/.user.keytab");

    try (FileSystem fs = FileSystem.get(conf);) {
        FileStatus[] fileStatuses = fs.listStatus(new Path("/user/username/dropoff"));
        for (FileStatus fileStatus : fileStatuses) {
            System.out.println(fileStatus.getPath().getName());
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }