kerberos相关问题记录
主要本人在工作中碰到的一些kerberos问题,防止下次继续踩坑。
[toc]
org.apache.hadoop.ipc.RemoteException(java.io.IOException): Delegation Token can be issued only with kerberos or web authentication
具体报错如下:
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getDelegationToken(FSNamesystem.java:7614)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getDelegationToken(NameNodeRpcServer.java:557)
at org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.getDelegationToken(AuthorizationProviderProxyClientProtocol.java:666)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getDelegationToken(ClientNamenodeProtocolServerSideTranslatorPB.java:1003)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1554)
at org.apache.hadoop.ipc.Client.call(Client.java:1498)
at org.apache.hadoop.ipc.Client.call(Client.java:1398)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233)
at com.sun.proxy.$Proxy169.getDelegationToken(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getDelegationToken(ClientNamenodeProtocolTranslatorPB.java:985)
at sun.reflect.GeneratedMethodAccessor1218.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:291)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:203)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:185)
at com.sun.proxy.$Proxy170.getDelegationToken(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.getDelegationToken(DFSClient.java:1042)
at org.apache.hadoop.hdfs.DistributedFileSystem.getDelegationToken(DistributedFileSystem.java:1689)
at org.apache.hadoop.fs.FileSystem.collectDelegationTokens(FileSystem.java:549)
at org.apache.hadoop.fs.FileSystem.addDelegationTokens(FileSystem.java:527)
at org.apache.hadoop.hdfs.DistributedFileSystem.addDelegationTokens(DistributedFileSystem.java:2400)
at org.apache.spark.deploy.yarn.security.HDFSCredentialProvider$$anonfun$getTokenRenewalInterval$1$$anonfun$apply$2.apply(HDFSCredentialProvider.scala:79)
at org.apache.spark.deploy.yarn.security.HDFSCredentialProvider$$anonfun$getTokenRenewalInterval$1$$anonfun$apply$2.apply(HDFSCredentialProvider.scala:77)
at scala.collection.immutable.Set$Set1.foreach(Set.scala:94)
at org.apache.spark.deploy.yarn.security.HDFSCredentialProvider$$anonfun$getTokenRenewalInterval$1.apply(HDFSCredentialProvider.scala:77)
at org.apache.spark.deploy.yarn.security.HDFSCredentialProvider$$anonfun$getTokenRenewalInterval$1.apply(HDFSCredentialProvider.scala:75)
at scala.Option.flatMap(Option.scala:171)
at org.apache.spark.deploy.yarn.security.HDFSCredentialProvider.getTokenRenewalInterval(HDFSCredentialProvider.scala:75)
at org.apache.spark.deploy.yarn.security.HDFSCredentialProvider.obtainCredentials(HDFSCredentialProvider.scala:55)
at org.apache.spark.deploy.yarn.security.ConfigurableCredentialManager$$anonfun$obtainCredentials$2.apply(ConfigurableCredentialManager.scala:82)
at org.apache.spark.deploy.yarn.security.ConfigurableCredentialManager$$anonfun$obtainCredentials$2.apply(ConfigurableCredentialManager.scala:80)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.MapLike$DefaultValuesIterable.foreach(MapLike.scala:206)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.deploy.yarn.security.ConfigurableCredentialManager.obtainCredentials(ConfigurableCredentialManager.scala:80)
at org.apache.spark.deploy.yarn.DtClient.prepareLocalResources(DtClient.scala:421)
at org.apache.spark.deploy.yarn.DtClient.createContainerLaunchContext(DtClient.scala:919)
at org.apache.spark.deploy.yarn.DtClient.submitApplication(DtClient.scala:178)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.submitSparkSqlJobForBatch(SparkYarnClient.java:391)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.submitSqlJob(SparkYarnClient.java:514)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.lambda$processSubmitJobWithType$0(SparkYarnClient.java:162)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
at com.dtstack.engine.base.util.KerberosUtils.loginKerberosWithCallBack(KerberosUtils.java:144)
at com.dtstack.engine.base.util.KerberosUtils.login(KerberosUtils.java:129)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.processSubmitJobWithType(SparkYarnClient.java:156)
at com.dtstack.engine.common.client.AbstractClient.submitJob(AbstractClient.java:76)
at com.dtstack.engine.common.client.ClientProxy$2.execute(ClientProxy.java:81)
at com.dtstack.engine.common.client.ClientProxy$2.execute(ClientProxy.java:77)
at com.dtstack.engine.common.callback.ClassLoaderCallBackMethod.callbackAndReset(ClassLoaderCallBackMethod.java:12)
at com.dtstack.engine.common.client.ClientProxy.lambda$submitJob$1(ClientProxy.java:77)
at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
原因:
报这个错的原因是因为client在从服务器获取delegation token时,授权类型是token而不是Kerberos
或者CRETIFICATE
。源码如下:
/**
* @param renewer Renewer information
* @return delegation toek
* @throws IOException on error
*/
Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
throws IOException {
Token<DelegationTokenIdentifier> token;
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
checkNameNodeSafeMode("Cannot issue delegation token");
if (!isAllowedDelegationTokenOp()) {
throw new IOException(
"Delegation Token can be issued only with kerberos or web authentication");
}
if (dtSecretManager == null || !dtSecretManager.isRunning()) {
LOG.warn("trying to get DT with no secret manager running");
return null;
}
UserGroupInformation ugi = getRemoteUser();
String user = ugi.getUserName();
Text owner = new Text(user);
Text realUser = null;
if (ugi.getRealUser() != null) {
realUser = new Text(ugi.getRealUser().getUserName());
}
DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner,
renewer, realUser);
token = new Token<DelegationTokenIdentifier>(
dtId, dtSecretManager);
long expiryTime = dtSecretManager.getTokenExpiryTime(dtId);
getEditLog().logGetDelegationToken(dtId, expiryTime);
} finally {
writeUnlock();
}
getEditLog().logSync();
return token;
}
/**
*
* @return true if delegation token operation is allowed
*/
private boolean isAllowedDelegationTokenOp() throws IOException {
AuthenticationMethod authMethod = getConnectionAuthenticationMethod();
if (UserGroupInformation.isSecurityEnabled()
&& (authMethod != AuthenticationMethod.KERBEROS)
&& (authMethod != AuthenticationMethod.KERBEROS_SSL)
&& (authMethod != AuthenticationMethod.CERTIFICATE)) {
return false;
}
return true;
}
其根本原因是什么呢?其实这个问题是在spark sql任务提交时发生的。而spark任务在解决kerberos认证问题时,用到了HDFS DELEGATION TOKEN
这个机制。在每次提交之前会通过当前ugi的credentials
去获取token。后面任务在运行的时候会拿着获取的token
和其他组件进行认证。但是如果每次提交都用的是同一个已经认证过的ugi,那么在获取token
的时候,因为ugi中已经包含了该token
,后面的认证方式会以TOKEN
的认证方式获取token。而获取token所指定的认证方式只有KERBEROS
、KERBEROS_SSL
和CERTIFICATE
三种,因此会报如上的错。
解决办法:
spark任务提交时用新的ugi去提交,不要使用重复的ugi进行提交。
2020-12-15 17:03:38.337 [pool-SparkYarnClient_ClientProxy-32-thread-1] DEBUG org.apache.hadoop.ipc.Client [Client.java:1188] - closing ipc connection to sjh-kf-cm01/172.29.51.33:8020: token (token for hive: HDFS_DELEGATION_TOKEN owner=hive@HADOOP.COM, renewer=yarn, realUser=, issueDate=1608019170252, maxDate=1608623970252, sequenceNumber=1147276, masterKeyId=905) can’t be found in cache
具体报错如下:
2020-12-15 17:03:38.337 [pool-SparkYarnClient_ClientProxy-32-thread-1] DEBUG org.apache.hadoop.ipc.Client [Client.java:1188] - closing ipc connection to sjh-kf-cm01/172.29.51.33:8020: token (token for hive: HDFS_DELEGATION_TOKEN owner=hive@HADOOP.COM, renewer=yarn, realUser=, issueDate=1608019170252, maxDate=1608623970252, sequenceNumber=1147276, masterKeyId=905) can't be found in cache
org.apache.hadoop.ipc.RemoteException: token (token for hive: HDFS_DELEGATION_TOKEN owner=hive@HADOOP.COM, renewer=yarn, realUser=, issueDate=1608019170252, maxDate=1608623970252, sequenceNumber=1147276, masterKeyId=905) can't be found in cache
at org.apache.hadoop.security.SaslRpcClient.saslConnect(SaslRpcClient.java:376)
at org.apache.hadoop.ipc.Client$Connection.setupSaslConnection(Client.java:560)
at org.apache.hadoop.ipc.Client$Connection.access$1900(Client.java:375)
at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:729)
at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:725)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:725)
at org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:375)
at org.apache.hadoop.ipc.Client.getConnection(Client.java:1528)
at org.apache.hadoop.ipc.Client.call(Client.java:1451)
at org.apache.hadoop.ipc.Client.call(Client.java:1412)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
at com.sun.proxy.$Proxy176.mkdirs(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.mkdirs(ClientNamenodeProtocolTranslatorPB.java:558)
at sun.reflect.GeneratedMethodAccessor1094.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy177.mkdirs(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.primitiveMkdir(DFSClient.java:3000)
at org.apache.hadoop.hdfs.DFSClient.mkdirs(DFSClient.java:2970)
at org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1047)
at org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1043)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.mkdirsInternal(DistributedFileSystem.java:1061)
at org.apache.hadoop.hdfs.DistributedFileSystem.mkdirs(DistributedFileSystem.java:1036)
at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:1881)
at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:600)
at org.apache.spark.deploy.yarn.DtClient.prepareLocalResources(DtClient.scala:469)
at org.apache.spark.deploy.yarn.DtClient.createContainerLaunchContext(DtClient.scala:929)
at org.apache.spark.deploy.yarn.DtClient.submitApplication(DtClient.scala:178)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.submitSparkSqlJobForBatch(SparkYarnClient.java:390)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.submitSqlJob(SparkYarnClient.java:513)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.lambda$processSubmitJobWithType$0(SparkYarnClient.java:161)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
at com.dtstack.engine.base.util.KerberosUtils.loginKerberosWithCallBack(KerberosUtils.java:144)
at com.dtstack.engine.base.util.KerberosUtils.login(KerberosUtils.java:129)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.processSubmitJobWithType(SparkYarnClient.java:155)
at com.dtstack.engine.common.client.AbstractClient.submitJob(AbstractClient.java:76)
at com.dtstack.engine.common.client.ClientProxy$2.execute(ClientProxy.java:81)
at com.dtstack.engine.common.client.ClientProxy$2.execute(ClientProxy.java:77)
at com.dtstack.engine.common.callback.ClassLoaderCallBackMethod.callbackAndReset(ClassLoaderCallBackMethod.java:12)
at com.dtstack.engine.common.client.ClientProxy.lambda$submitJob$1(ClientProxy.java:77)
at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
原因:
这跟Delegation Token
的生命周期有关系。ResourceManager会在每个Job运行完成后去移除该任务所产生的Delegation Token
。
解决办法:
可以通过ApplicationSubmissionContext.setCancelTokensWhenComplete(false)
禁止每次任务完成后清理token。
2020-12-23 19:51:43:submit job get unknown error java.io.IOException: Login failure for hdfs/eng-cdh1@DTSTACK.COM from keytab /home/admin/app/dt-center-engine/kerberos/keytab/data/sftp/CONSOLE_kerberos_principal/SPARK/kerberos/hdfs_pure.keytab: javax.security.auth.login.LoginException: Checksum failed
具体报错如下:
2020-12-23 19:51:43:submit job get unknown error
java.io.IOException: Login failure for hdfs/eng-cdh1@DTSTACK.COM from keytab /home/admin/app/dt-center-engine/kerberos/keytab/data/sftp/CONSOLE_kerberos_principal/SPARK/kerberos/hdfs_pure.keytab: javax.security.auth.login.LoginException: Checksum failed
at org.apache.hadoop.security.UserGroupInformation.loginUserFromKeytab(UserGroupInformation.java:986)
at org.apache.spark.deploy.yarn.DtClient.initSecurity(DtClient.scala:401)
at org.apache.spark.deploy.yarn.DtClient.prepareLocalResources(DtClient.scala:419)
at org.apache.spark.deploy.yarn.DtClient.createContainerLaunchContext(DtClient.scala:920)
at org.apache.spark.deploy.yarn.DtClient.submitApplication(DtClient.scala:178)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.submitSparkSqlJobForBatch(SparkYarnClient.java:391)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.submitSqlJob(SparkYarnClient.java:514)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.lambda$processSubmitJobWithType$0(SparkYarnClient.java:162)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
at com.dtstack.engine.base.util.KerberosUtils.loginKerberosWithCallBack(KerberosUtils.java:149)
at com.dtstack.engine.base.util.KerberosUtils.login(KerberosUtils.java:134)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.processSubmitJobWithType(SparkYarnClient.java:156)
at com.dtstack.engine.common.client.AbstractClient.submitJob(AbstractClient.java:76)
at com.dtstack.engine.common.client.ClientProxy$2.execute(ClientProxy.java:81)
at com.dtstack.engine.common.client.ClientProxy$2.execute(ClientProxy.java:77)
at com.dtstack.engine.common.callback.ClassLoaderCallBackMethod.callbackAndReset(ClassLoaderCallBackMethod.java:12)
at com.dtstack.engine.common.client.ClientProxy.lambda$submitJob$1(ClientProxy.java:77)
at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: javax.security.auth.login.LoginException: Checksum failed
at com.sun.security.auth.module.Krb5LoginModule.attemptAuthentication(Krb5LoginModule.java:804)
at com.sun.security.auth.module.Krb5LoginModule.login(Krb5LoginModule.java:617)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at javax.security.auth.login.LoginContext.invoke(LoginContext.java:755)
at javax.security.auth.login.LoginContext.access$000(LoginContext.java:195)
at javax.security.auth.login.LoginContext$4.run(LoginContext.java:682)
at javax.security.auth.login.LoginContext$4.run(LoginContext.java:680)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.login.LoginContext.invokePriv(LoginContext.java:680)
at javax.security.auth.login.LoginContext.login(LoginContext.java:587)
at org.apache.hadoop.security.UserGroupInformation.loginUserFromKeytab(UserGroupInformation.java:976)
... 22 more
Caused by: KrbException: Checksum failed
at sun.security.krb5.internal.crypto.ArcFourHmacEType.decrypt(ArcFourHmacEType.java:102)
at sun.security.krb5.internal.crypto.ArcFourHmacEType.decrypt(ArcFourHmacEType.java:94)
at sun.security.krb5.EncryptedData.decrypt(EncryptedData.java:175)
at sun.security.krb5.KrbAsRep.decrypt(KrbAsRep.java:149)
at sun.security.krb5.KrbAsRep.decryptUsingKeyTab(KrbAsRep.java:121)
at sun.security.krb5.KrbAsReqBuilder.resolve(KrbAsReqBuilder.java:285)
at sun.security.krb5.KrbAsReqBuilder.action(KrbAsReqBuilder.java:361)
at com.sun.security.auth.module.Krb5LoginModule.attemptAuthentication(Krb5LoginModule.java:776)
... 35 more
Caused by: java.security.GeneralSecurityException: Checksum failed
at sun.security.krb5.internal.crypto.dk.ArcFourCrypto.decrypt(ArcFourCrypto.java:408)
at sun.security.krb5.internal.crypto.ArcFourHmac.decrypt(ArcFourHmac.java:91)
at sun.security.krb5.internal.crypto.ArcFourHmacEType.decrypt(ArcFourHmacEType.java:100)
... 42 more
原因:
- principal不可用以及密码错误导致认证失败
- 另外一种错误是出现多kdc认证时,一个keytab文件中存在多个principal,而认证时使用的principal跟所认证的服务对不上导致。
解决办法:
- 重新生成有效的keytab文件
- 选择正确的principal去认证对应的服务。
Caused by GSSException: No valid credentials provided (Mechanism level: Integrity check on decrypted field failed (31) - PROCESS_TGS)
具体报错如下:
2020-12-10 19:03:11:submit job get unknown error
java.io.IOException: Failed on local exception: java.io.IOException: javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Integrity check on decrypted field failed (31) - PROCESS_TGS)]; Host Details : local host is: "dp-kf-app1/172.25.4.174"; destination host is: "cdh-kf-cm01":8032;
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:785)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1558)
at org.apache.hadoop.ipc.Client.call(Client.java:1498)
at org.apache.hadoop.ipc.Client.call(Client.java:1398)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233)
at com.sun.proxy.$Proxy163.getClusterMetrics(Unknown Source)
at org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getClusterMetrics(ApplicationClientProtocolPBClientImpl.java:206)
at sun.reflect.GeneratedMethodAccessor1054.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:291)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:203)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:185)
at com.sun.proxy.$Proxy164.getClusterMetrics(Unknown Source)
at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.getYarnClusterMetrics(YarnClientImpl.java:524)
at org.apache.spark.deploy.yarn.DtClient$$anonfun$submitApplication$1.apply(DtClient.scala:159)
at org.apache.spark.deploy.yarn.DtClient$$anonfun$submitApplication$1.apply(DtClient.scala:159)
at org.apache.spark.internal.Logging$class.logInfo(Logging.scala:54)
at org.apache.spark.deploy.yarn.DtClient.logInfo(DtClient.scala:60)
at org.apache.spark.deploy.yarn.DtClient.submitApplication(DtClient.scala:158)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.submitSparkSqlJobForBatch(SparkYarnClient.java:391)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.submitSqlJob(SparkYarnClient.java:514)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.lambda$processSubmitJobWithType$0(SparkYarnClient.java:162)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
at com.dtstack.engine.base.util.KerberosUtils.loginKerberosWithCallBack(KerberosUtils.java:144)
at com.dtstack.engine.base.util.KerberosUtils.login(KerberosUtils.java:129)
at com.dtstack.engine.sparkyarn.sparkyarn.SparkYarnClient.processSubmitJobWithType(SparkYarnClient.java:156)
at com.dtstack.engine.common.client.AbstractClient.submitJob(AbstractClient.java:76)
at com.dtstack.engine.common.client.ClientProxy$2.execute(ClientProxy.java:81)
at com.dtstack.engine.common.client.ClientProxy$2.execute(ClientProxy.java:77)
at com.dtstack.engine.common.callback.ClassLoaderCallBackMethod.callbackAndReset(ClassLoaderCallBackMethod.java:12)
at com.dtstack.engine.common.client.ClientProxy.lambda$submitJob$1(ClientProxy.java:77)
at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Integrity check on decrypted field failed (31) - PROCESS_TGS)]
at org.apache.hadoop.ipc.Client$Connection$1.run(Client.java:720)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
at org.apache.hadoop.ipc.Client$Connection.handleSaslConnectionFailure(Client.java:683)
at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:770)
at org.apache.hadoop.ipc.Client$Connection.access$3200(Client.java:397)
at org.apache.hadoop.ipc.Client.getConnection(Client.java:1620)
at org.apache.hadoop.ipc.Client.call(Client.java:1451)
... 35 more
Caused by: javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Integrity check on decrypted field failed (31) - PROCESS_TGS)]
at com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:211)
at org.apache.hadoop.security.SaslRpcClient.saslConnect(SaslRpcClient.java:414)
at org.apache.hadoop.ipc.Client$Connection.setupSaslConnection(Client.java:595)
at org.apache.hadoop.ipc.Client$Connection.access$2000(Client.java:397)
at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:762)
at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:758)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:758)
... 38 more
Caused by: GSSException: No valid credentials provided (Mechanism level: Integrity check on decrypted field failed (31) - PROCESS_TGS)
at sun.security.jgss.krb5.Krb5Context.initSecContext(Krb5Context.java:770)
at sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:248)
at sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:179)
at com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:192)
... 47 more
Caused by: KrbException: Integrity check on decrypted field failed (31) - PROCESS_TGS
at sun.security.krb5.KrbTgsRep.<init>(KrbTgsRep.java:73)
at sun.security.krb5.KrbTgsReq.getReply(KrbTgsReq.java:251)
at sun.security.krb5.KrbTgsReq.sendAndGetCreds(KrbTgsReq.java:262)
at sun.security.krb5.internal.CredentialsUtil.serviceCreds(CredentialsUtil.java:308)
at sun.security.krb5.internal.CredentialsUtil.acquireServiceCreds(CredentialsUtil.java:126)
at sun.security.krb5.Credentials.acquireServiceCreds(Credentials.java:458)
at sun.security.jgss.krb5.Krb5Context.initSecContext(Krb5Context.java:693)
... 50 more
Caused by: KrbException: Identifier doesn't match expected value (906)
at sun.security.krb5.internal.KDCRep.init(KDCRep.java:140)
at sun.security.krb5.internal.TGSRep.init(TGSRep.java:65)
at sun.security.krb5.internal.TGSRep.<init>(TGSRep.java:60)
at sun.security.krb5.KrbTgsRep.<init>(KrbTgsRep.java:55)
... 56 more
原因:
这意味着keytab
中存储的key与kdc
中存储的key匹配不上导致。
解决办法:
检查服务器密码是否修改以及keytab
文件是否过期。除此之外还需要检查当前服务器与kdc服务器的时候是否有偏差。