概述
某客户项目在登录VCSA的时候,提示
503 Service Unavailable (Failed to connect to endpoint: [N7Vmacore4Http20NamedPipeServiceSpecE:0x0000557445f8baf0] _serverNamespace = / action = Allow _pipeName =/var/run/vmware/vpxd-webserver-pipe)
错误,如下图所示:
本文记录并分享该case解决方案。
解决过程
在此之前,建议对VC进行至少一次内存快照!
证书检查
通过一下命令查看证书是否过期
for i in $(/usr/lib/vmware-vmafd/bin/vecs-cli store list); do echo STORE $i; /usr/lib/vmware-vmafd/bin/vecs-cli entry list --store $i --text | egrep "Alias|Not After"; done
或者使用脚本检查,将下面脚本创建为 checksts.py ,并赋予其可执行权限 chmod +x checksts.py
#!/opt/vmware/bin/python
"""
Copyright 2020-2022 VMware, Inc. All rights reserved. -- VMware Confidential
Author: Keenan Matheny (keenanm@vmware.com)
"""
##### BEGIN IMPORTS #####
import os
import sys
import json
import subprocess
import re
import pprint
import ssl
from datetime import datetime, timedelta
import textwrap
from codecs import encode, decode
import subprocess
from time import sleep
try:
# Python 3 hack.
import urllib.request as urllib2
import urllib.parse as urlparse
except ImportError:
import urllib2
import urlparse
sys.path.append(os.environ['VMWARE_PYTHON_PATH'])
from cis.defaults import def_by_os
sys.path.append(os.path.join(os.environ['VMWARE_CIS_HOME'],
def_by_os('vmware-vmafd/lib64', 'vmafdd')))
import vmafd
from OpenSSL.crypto import (load_certificate, dump_privatekey, dump_certificate, X509, X509Name, PKey)
from OpenSSL.crypto import (TYPE_DSA, TYPE_RSA, FILETYPE_PEM, FILETYPE_ASN1 )
today = datetime.now()
today = today.strftime("%d-%m-%Y")
vcsa_kblink = "https://kb.vmware.com/s/article/76719"
win_kblink = "https://kb.vmware.com/s/article/79263"
##### END IMPORTS #####
class parseCert( object ):
# Certificate parsing
def format_subject_issuer(self, x509name):
items = []
for item in x509name.get_components():
items.append('%s=%s' % (decode(item[0],'utf-8'), decode(item[1],'utf-8')))
return ", ".join(items)
def format_asn1_date(self, d):
return datetime.strptime(decode(d,'utf-8'), '%Y%m%d%H%M%SZ').strftime("%Y-%m-%d %H:%M:%S GMT")
def merge_cert(self, extensions, certificate):
z = certificate.copy()
z.update(extensions)
return z
def __init__(self, certdata):
built_cert = certdata
self.x509 = load_certificate(FILETYPE_PEM, built_cert)
keytype = self.x509.get_pubkey().type()
keytype_list = {TYPE_RSA:'rsaEncryption', TYPE_DSA:'dsaEncryption', 408:'id-ecPublicKey'}
extension_list = ["extendedKeyUsage",
"keyUsage",
"subjectAltName",
"subjectKeyIdentifier",
"authorityKeyIdentifier"]
key_type_str = keytype_list[keytype] if keytype in keytype_list else 'other'
certificate = {}
extension = {}
for i in range(self.x509.get_extension_count()):
critical = 'critical' if self.x509.get_extension(i).get_critical() else ''
if decode(self.x509.get_extension(i).get_short_name(),'utf-8') in extension_list:
extension[decode(self.x509.get_extension(i).get_short_name(),'utf-8')] = self.x509.get_extension(i).__str__()
certificate = {'Thumbprint': decode(self.x509.digest('sha1'),'utf-8'), 'Version': self.x509.get_version(),
'SignatureAlg' : decode(self.x509.get_signature_algorithm(),'utf-8'), 'Issuer' :self.format_subject_issuer(self.x509.get_issuer()),
'Valid From' : self.format_asn1_date(self.x509.get_notBefore()), 'Valid Until' : self.format_asn1_date(self.x509.get_notAfter()),
'Subject' : self.format_subject_issuer(self.x509.get_subject())}
combined = self.merge_cert(extension,certificate)
cert_output = json.dumps(combined)
self.subjectAltName = combined.get('subjectAltName')
self.subject = combined.get('Subject')
self.validfrom = combined.get('Valid From')
self.validuntil = combined.get('Valid Until')
self.thumbprint = combined.get('Thumbprint')
self.subjectkey = combined.get('subjectKeyIdentifier')
self.authkey = combined.get('authorityKeyIdentifier')
self.combined = combined
class parseSts( object ):
def __init__(self):
self.processed = []
self.results = {}
self.results['expired'] = {}
self.results['expired']['root'] = []
self.results['expired']['leaf'] = []
self.results['valid'] = {}
self.results['valid']['root'] = []
self.results['valid']['leaf'] = []
def get_certs(self,force_refresh):
urllib2.getproxies = lambda: {}
vmafd_client = vmafd.client('localhost')
domain_name = vmafd_client.GetDomainName()
dc_name = vmafd_client.GetAffinitizedDC(domain_name, force_refresh)
if vmafd_client.GetPNID() == dc_name:
url = (
'http://localhost:7080/idm/tenant/%s/certificates?scope=TENANT'
% domain_name)
else:
url = (
'https://%s/idm/tenant/%s/certificates?scope=TENANT'
% (dc_name,domain_name))
return json.loads(urllib2.urlopen(url).read().decode('utf-8'))
def check_cert(self,certificate):
cert = parseCert(certificate)
certdetail = cert.combined
# Attempt to identify what type of certificate it is
if cert.authkey:
cert_type = "leaf"
else:
cert_type = "root"
# Try to only process a cert once
if cert.thumbprint not in self.processed:
# Date conversion
self.processed.append(cert.thumbprint)
exp = cert.validuntil.split()[0]
conv_exp = datetime.strptime(exp, '%Y-%m-%d')
exp = datetime.strftime(conv_exp, '%d-%m-%Y')
now = datetime.strptime(today, '%d-%m-%Y')
exp_date = datetime.strptime(exp, '%d-%m-%Y')
# Get number of days until it expires
diff = exp_date - now
certdetail['daysUntil'] = diff.days
# Sort expired certs into leafs and roots, put the rest in goodcerts.
if exp_date <= now:
self.results['expired'][cert_type].append(certdetail)
else:
self.results['valid'][cert_type].append(certdetail)
def execute(self):
json = self.get_certs(force_refresh=False)
for item in json:
for certificate in item['certificates']:
self.check_cert(certificate['encoded'])
return self.results
def main():
warning = False
warningmsg = '''
WARNING!
You have expired STS certificates. Please follow the KB corresponding to your OS:
VCSA: %s
Windows: %s
''' % (vcsa_kblink, win_kblink)
parse_sts = parseSts()
results = parse_sts.execute()
valid_count = len(results['valid']['leaf']) + len(results['valid']['root'])
expired_count = len(results['expired']['leaf']) + len(results['expired']['root'])
#### Display Valid ####
print("\n%s VALID CERTS\n================" % valid_count)
print("\n\tLEAF CERTS:\n")
if len(results['valid']['leaf']) > 0:
for cert in results['valid']['leaf']:
print("\t[] Certificate %s will expire in %s days (%s years)." % (cert['Thumbprint'], cert['daysUntil'], round(cert['daysUntil']/365)))
else:
print("\tNone")
print("\n\tROOT CERTS:\n")
if len(results['valid']['root']) > 0:
for cert in results['valid']['root']:
print("\t[] Certificate %s will expire in %s days (%s years)." % (cert['Thumbprint'], cert['daysUntil'], round(cert['daysUntil']/365)))
else:
print("\tNone")
#### Display expired ####
print("\n%s EXPIRED CERTS\n================" % expired_count)
print("\n\tLEAF CERTS:\n")
if len(results['expired']['leaf']) > 0:
for cert in results['expired']['leaf']:
print("\t[] Certificate: %s expired on %s!" % (cert.get('Thumbprint'),cert.get('Valid Until')))
continue
else:
print("\tNone")
print("\n\tROOT CERTS:\n")
if len(results['expired']['root']) > 0:
for cert in results['expired']['root']:
print("\t[] Certificate: %s expired on %s!" % (cert.get('Thumbprint'),cert.get('Valid Until')))
continue
else:
print("\tNone")
if expired_count > 0:
print(warningmsg)
if __name__ == '__main__':
exit(main())
发现其确实存在过期的签名证书
修复证书
将下面脚本创建为 vcsa_fixsts.sh ,并赋予其可执行权限 chmod +x vcsa_fixsts.sh
#!/bin/bash
# Copyright (c) 2020-2021 VMware, Inc. All rights reserved.
# VMware Confidential
#
# Run this from the affected PSC/VC
#
# NOTE: This works on external and embedded PSCs
# This script will do the following
# 1: Regenerate STS certificate
#
# What is needed?
# 1: Offline snapshots of VCs/PSCs
# 2: SSO Admin Password
NODETYPE=$(cat /etc/vmware/deployment.node.type)
if [ "$NODETYPE" = "management" ]; then
echo "Detected this node is a vCenter server with external PSC."
echo "Please run this script from a vCenter with embedded PSC, or an external PSC"
exit 1
fi
if [ "$NODETYPE" = "embedded" ] && [ ! -f /usr/lib/vmware-vmdir/sbin/vmdird ]; then
echo "Detected this node is a vCenter gateway"
echo "Please run this script from a vCenter with embedded PSC, or an external PSC"
exit 1
fi
echo "NOTE: This works on external and embedded PSCs"
echo "This script will do the following"
echo "1: Regenerate STS certificate"
echo "What is needed?"
echo "1: Offline snapshots of VCs/PSCs"
echo "2: SSO Admin Password"
echo "IMPORTANT: This script should only be run on a single PSC per SSO domain"
mkdir -p /tmp/vmware-fixsts
SCRIPTPATH="/tmp/vmware-fixsts"
LOGFILE="$SCRIPTPATH/fix_sts_cert.log"
echo "==================================" | tee -a $LOGFILE
echo "Resetting STS certificate for $HOSTNAME started on $(date)" | tee -a $LOGFILE
echo ""| tee -a $LOGFILE
echo ""
DN=$(/opt/likewise/bin/lwregshell list_values '[HKEY_THIS_MACHINE\Services\vmdir]' | grep dcAccountDN | awk '{$1=$2=$3="";print $0}'|tr -d '"'|sed -e 's/^[ \t]*//')
echo "Detected DN: $DN" | tee -a $LOGFILE
PNID=$(/opt/likewise/bin/lwregshell list_values '[HKEY_THIS_MACHINE\Services\vmafd\Parameters]' | grep PNID | awk '{print $4}'|tr -d '"')
echo "Detected PNID: $PNID" | tee -a $LOGFILE
PSC=$(/opt/likewise/bin/lwregshell list_values '[HKEY_THIS_MACHINE\Services\vmafd\Parameters]' | grep DCName | awk '{print $4}'|tr -d '"')
echo "Detected PSC: $PSC" | tee -a $LOGFILE
DOMAIN=$(/opt/likewise/bin/lwregshell list_values '[HKEY_THIS_MACHINE\Services\vmafd\Parameters]' | grep DomainName | awk '{print $4}'|tr -d '"')
echo "Detected SSO domain name: $DOMAIN" | tee -a $LOGFILE
SITE=$(/opt/likewise/bin/lwregshell list_values '[HKEY_THIS_MACHINE\Services\vmafd\Parameters]' | grep SiteName | awk '{print $4}'|tr -d '"')
MACHINEID=$(/usr/lib/vmware-vmafd/bin/vmafd-cli get-machine-id --server-name localhost)
echo "Detected Machine ID: $MACHINEID" | tee -a $LOGFILE
IPADDRESS=$(ifconfig | grep eth0 -A1 | grep "inet addr" | awk -F ':' '{print $2}' | awk -F ' ' '{print $1}')
echo "Detected IP Address: $IPADDRESS" | tee -a $LOGFILE
DOMAINCN="dc=$(echo "$DOMAIN" | sed 's/\./,dc=/g')"
echo "Domain CN: $DOMAINCN"
ADMIN="cn=administrator,cn=users,$DOMAINCN"
USERNAME="administrator@${DOMAIN^^}"
ROOTCERTDATE=$(openssl x509 -in /var/lib/vmware/vmca/root.cer -text | grep "Not After" | awk -F ' ' '{print $7,$4,$5}')
TODAYSDATE=$(date +"%Y %b %d")
echo "#" > $SCRIPTPATH/certool.cfg
echo "# Template file for a CSR request" >> $SCRIPTPATH/certool.cfg
echo "#" >> certool.cfg
echo "# Country is needed and has to be 2 characters" >> $SCRIPTPATH/certool.cfg
echo "Country = DS" >> $SCRIPTPATH/certool.cfg
echo "Name = $PNID" >> $SCRIPTPATH/certool.cfg
echo "Organization = VMware" >> $SCRIPTPATH/certool.cfg
echo "OrgUnit = VMware" >> $SCRIPTPATH/certool.cfg
echo "State = VMware" >> $SCRIPTPATH/certool.cfg
echo "Locality = VMware" >> $SCRIPTPATH/certool.cfg
echo "IPAddress = $IPADDRESS" >> $SCRIPTPATH/certool.cfg
echo "Email = email@acme.com" >> $SCRIPTPATH/certool.cfg
echo "Hostname = $PNID" >> $SCRIPTPATH/certool.cfg
echo "==================================" | tee -a $LOGFILE
echo "==================================" | tee -a $LOGFILE
echo ""
echo "Detected Root's certificate expiration date: $ROOTCERTDATE" | tee -a $LOGFILE
echo "Detected today's date: $TODAYSDATE" | tee -a $LOGFILE
echo "==================================" | tee -a $LOGFILE
flag=0
if [[ $TODAYSDATE > $ROOTCERTDATE ]];
then
echo "IMPORTANT: Root certificate is expired, so it will be replaced" | tee -a $LOGFILE
flag=1
mkdir /certs && cd /certs
cp $SCRIPTPATH/certool.cfg /certs/vmca.cfg
/usr/lib/vmware-vmca/bin/certool --genselfcacert --outprivkey /certs/vmcacert.key --outcert /certs/vmcacert.crt --config /certs/vmca.cfg
/usr/lib/vmware-vmca/bin/certool --rootca --cert /certs/vmcacert.crt --privkey /certs/vmcacert.key
fi
echo "#" > $SCRIPTPATH/certool.cfg
echo "# Template file for a CSR request" >> $SCRIPTPATH/certool.cfg
echo "#" >> $SCRIPTPATH/certool.cfg
echo "# Country is needed and has to be 2 characters" >> $SCRIPTPATH/certool.cfg
echo "Country = DS" >> $SCRIPTPATH/certool.cfg
echo "Name = STS" >> $SCRIPTPATH/certool.cfg
echo "Organization = VMware" >> $SCRIPTPATH/certool.cfg
echo "OrgUnit = VMware" >> $SCRIPTPATH/certool.cfg
echo "State = VMware" >> $SCRIPTPATH/certool.cfg
echo "Locality = VMware" >> $SCRIPTPATH/certool.cfg
echo "IPAddress = $IPADDRESS" >> $SCRIPTPATH/certool.cfg
echo "Email = email@acme.com" >> $SCRIPTPATH/certool.cfg
echo "Hostname = $PNID" >> $SCRIPTPATH/certool.cfg
echo ""
echo "Exporting and generating STS certificate" | tee -a $LOGFILE
echo ""
cd $SCRIPTPATH
/usr/lib/vmware-vmca/bin/certool --server localhost --genkey --privkey=sts.key --pubkey=sts.pub
/usr/lib/vmware-vmca/bin/certool --gencert --cert=sts.cer --privkey=sts.key --config=$SCRIPTPATH/certool.cfg
openssl x509 -outform der -in sts.cer -out sts.der
CERTS=$(csplit -f root /var/lib/vmware/vmca/root.cer '/-----BEGIN CERTIFICATE-----/' '{*}' | wc -l)
openssl pkcs8 -topk8 -inform pem -outform der -in sts.key -out sts.key.der -nocrypt
i=1
until [ $i -eq $CERTS ]
do
openssl x509 -outform der -in root0$i -out vmca0$i.der
((i++))
done
echo ""
echo ""
read -s -p "Enter password for administrator@$DOMAIN: " DOMAINPASSWORD
echo ""
# Find the highest tenant credentials index
MAXCREDINDEX=1
while read -r line
do
INDEX=$(echo "$line" | tr -dc '0-9')
if [ $INDEX -gt $MAXCREDINDEX ]
then
MAXCREDINDEX=$INDEX
fi
done < <(/opt/likewise/bin/ldapsearch -h localhost -p 389 -b "cn=$DOMAIN,cn=Tenants,cn=IdentityManager,cn=Services,$DOMAINCN" -D "cn=administrator,cn=users,$DOMAINCN" -w "$DOMAINPASSWORD" "(objectclass=vmwSTSTenantCredential)" cn | grep cn:)
# Sequentially search for tenant credentials up to max index and delete if found
echo "Highest tenant credentials index : $MAXCREDINDEX" | tee -a $LOGFILE
i=1
if [ ! -z $MAXCREDINDEX ]
then
until [ $i -gt $MAXCREDINDEX ]
do
echo "Exporting tenant $i to $SCRIPTPATH" | tee -a $LOGFILE
echo ""
ldapsearch -h localhost -D "cn=administrator,cn=users,$DOMAINCN" -w "$DOMAINPASSWORD" -b "cn=TenantCredential-$i,cn=$DOMAIN,cn=Tenants,cn=IdentityManager,cn=Services,$DOMAINCN" > $SCRIPTPATH/tenantcredential-$i.ldif
if [ $? -eq 0 ]
then
echo "Deleting tenant $i" | tee -a $LOGFILE
ldapdelete -h localhost -D "cn=administrator,cn=users,$DOMAINCN" -w "$DOMAINPASSWORD" "cn=TenantCredential-$i,cn=$DOMAIN,cn=Tenants,cn=IdentityManager,cn=Services,$DOMAINCN" | tee -a $LOGFILE
else
echo "Tenant $i not found" | tee -a $LOGFILE
echo ""
fi
((i++))
done
fi
echo ""
# Find the highest trusted cert chains index
MAXCERTCHAINSINDEX=1
while read -r line
do
INDEX=$(echo "$line" | tr -dc '0-9')
if [ $INDEX -gt $MAXCERTCHAINSINDEX ]
then
MAXCERTCHAINSINDEX=$INDEX
fi
done < <(/opt/likewise/bin/ldapsearch -h localhost -p 389 -b "cn=$DOMAIN,cn=Tenants,cn=IdentityManager,cn=Services,$DOMAINCN" -D "cn=administrator,cn=users,$DOMAINCN" -w "$DOMAINPASSWORD" "(objectclass=vmwSTSTenantTrustedCertificateChain)" cn | grep cn:)
# Sequentially search for trusted cert chains up to max index and delete if found
echo "Highest trusted cert chains index: $MAXCERTCHAINSINDEX" | tee -a $LOGFILE
i=1
if [ ! -z $MAXCERTCHAINSINDEX ]
then
until [ $i -gt $MAXCERTCHAINSINDEX ]
do
echo "Exporting trustedcertchain $i to $SCRIPTPATH" | tee -a $LOGFILE
echo ""
ldapsearch -h localhost -D "cn=administrator,cn=users,$DOMAINCN" -w "$DOMAINPASSWORD" -b "cn=TrustedCertChain-$i,cn=TrustedCertificateChains,cn=$DOMAIN,cn=Tenants,cn=IdentityManager,cn=Services,$DOMAINCN" > $SCRIPTPATH/trustedcertchain-$i.ldif
if [ $? -eq 0 ]
then
echo "Deleting trustedcertchain $i" | tee -a $LOGFILE
ldapdelete -h localhost -D "cn=administrator,cn=users,$DOMAINCN" -w "$DOMAINPASSWORD" "cn=TrustedCertChain-$i,cn=TrustedCertificateChains,cn=$DOMAIN,cn=Tenants,cn=IdentityManager,cn=Services,$DOMAINCN" | tee -a $LOGFILE
else
echo "Trusted cert chain $i not found" | tee -a $LOGFILE
fi
echo ""
((i++))
done
fi
echo ""
i=1
echo "dn: cn=TenantCredential-1,cn=$DOMAIN,cn=Tenants,cn=IdentityManager,cn=Services,$DOMAINCN" > sso-sts.ldif
echo "changetype: add" >> sso-sts.ldif
echo "objectClass: vmwSTSTenantCredential" >> sso-sts.ldif
echo "objectClass: top" >> sso-sts.ldif
echo "cn: TenantCredential-1" >> sso-sts.ldif
echo "userCertificate:< file:sts.der" >> sso-sts.ldif
until [ $i -eq $CERTS ]
do
echo "userCertificate:< file:vmca0$i.der" >> sso-sts.ldif
((i++))
done
echo "vmwSTSPrivateKey:< file:sts.key.der" >> sso-sts.ldif
echo "" >> sso-sts.ldif
echo "dn: cn=TrustedCertChain-1,cn=TrustedCertificateChains,cn=$DOMAIN,cn=Tenants,cn=IdentityManager,cn=Services,$DOMAINCN" >> sso-sts.ldif
echo "changetype: add" >> sso-sts.ldif
echo "objectClass: vmwSTSTenantTrustedCertificateChain" >> sso-sts.ldif
echo "objectClass: top" >> sso-sts.ldif
echo "cn: TrustedCertChain-1" >> sso-sts.ldif
echo "userCertificate:< file:sts.der" >> sso-sts.ldif
i=1
until [ $i -eq $CERTS ]
do
echo "userCertificate:< file:vmca0$i.der" >> sso-sts.ldif
((i++))
done
echo ""
echo "Applying newly generated STS certificate to SSO domain" | tee -a $LOGFILE
/opt/likewise/bin/ldapmodify -x -h localhost -p 389 -D "cn=administrator,cn=users,$DOMAINCN" -w "$DOMAINPASSWORD" -f sso-sts.ldif | tee -a $LOGFILE
echo ""
echo "Replacement finished - Please restart services on all vCenters and PSCs in your SSO domain" | tee -a $LOGFILE
echo "==================================" | tee -a $LOGFILE
echo "IMPORTANT: In case you're using HLM (Hybrid Linked Mode) without a gateway, you would need to re-sync the certs from Cloud to On-Prem after following this procedure" | tee -a $LOGFILE
echo "==================================" | tee -a $LOGFILE
echo "==================================" | tee -a $LOGFILE
if [ $flag == 1 ]
then
echo "Since your Root certificate was expired and was replaced, you will need to replace your MachineSSL and Solution User certificates" | tee -a $LOGFILE
echo "You can do so following this KB: https://kb.vmware.com/s/article/2097936" | tee -a $LOGFILE
fi
脚本输入如下
输入 administrator@vsphere.local 密码并按回车执行。
重置SSO密码
如果SSO密码忘记密码了呢?
进入目录
cd /usr/lib/vmware-vmdir/bin/
然后输入
./vdcadmintool
在菜单中选择 3 Reset account password
完整重置administrator@vsphere.local 密码过程如下图所示
完成修复
重置密码后,可以继续使用修复脚本进行修复。
根据提示可见,已完成修复,需要重启相关服务。
service-control --stop --all && service-control --start --all
可见服务重启完成
可以再看下服务的状态
service-control --status --all
服务都起来了。
登录验证
访问Web Client 正常
使用刚才重置后的密码进行登录
正常访问,恢复正常。
至此,已完成修复工作。