xiaoxue85 发表于 2019-1-31 12:05:14

部署k8s ssl集群实践4:部署etcd集群

  参考文档:
https://github.com/opsnull/follow-me-install-kubernetes-cluster
感谢作者的无私分享。
集群环境已搭建成功跑起来。
文章是部署过程中遇到的错误和详细操作步骤记录。如有需要对比参考,请按照顺序阅读和测试。
  4.1
下载和分发二进制安装包

# wget https://github.com/coreos/etcd/releases/download/v3.3.7/etcd-v3.3.7-linux-amd64.tar.gz
# ls
etcd-v3.3.7-linux-amd64.tar.gz  kubernetes  kubernetes-client-linux-amd64.tar.gz  kubernetes-src.tar.gz
#
# tar zxvf etcd-v3.3.7-linux-amd64.tar.gz
# ls
etcd-v3.3.7-linux-amd64 
  分发到所有节点

# cp etcd-v3.3.7-linux-amd64/etcd* /opt/k8s/bin
# scp etcd-v3.3.7-linux-amd64/etcd* root@k8s-node1:/opt/k8s/bin
etcd                                                                                  100%   18MB  91.6MB/s   00:00   
etcdctl                                                                               100%   15MB  96.1MB/s   00:00   
# scp etcd-v3.3.7-linux-amd64/etcd* root@k8s-node2:/opt/k8s/bin
etcd                                                                                  100%   18MB  92.2MB/s   00:00   
etcdctl                                                                               100%   15MB  92.3MB/s   00:00   
#
  4.2
创建etcd证书和私钥
  创建证书签名请求

# cat etcd-csr.json
{
"CN": "etcd",
"hosts": [
"127.0.0.1",
"192.168.1.92",
"192.168.1.93",
"192.168.1.95"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "SZ",
"L": "SZ",
"O": "k8s",
"OU": "4Paradigm"
}
]
}
#
  hosts 字段指定授权使用该证书的 etcd 节点 IP 或域名列表,这里将 etcd 集群的三
个节点 IP 都列在其中
  生成证书和私钥

# cfssl gencert -ca=/etc/kubernetes/cert/ca.pem -ca-key=/etc/kubernetes/cert/ca-key.pem -config=/etc/kubernetes/cert/ca-config.json -profile=kubernetes etcd-csr.json | cfssljson -bare etcd
# ls
etcd.csr  etcd-csr.json  etcd-key.pem  etcd.pem
#
  分发证书和私钥到节点

# cp etcd* /etc/etcd/cert/
# scp etcd* root@k8s-node1:/etc/etcd/cert/
etcd.csr                                                                              100% 1054     1.5MB/s   00:00   
etcd-csr.json                                                                         100%  213   350.8KB/s   00:00   
etcd-key.pem                                                                          100% 1679     2.5MB/s   00:00   
etcd.pem                                                                              100% 1415     2.3MB/s   00:00   
# scp etcd* root@k8s-node2:/etc/etcd/cert/
etcd.csr                                                                              100% 1054     1.2MB/s   00:00   
etcd-csr.json                                                                         100%  213   296.9KB/s   00:00   
etcd-key.pem                                                                          100% 1679     2.6MB/s   00:00   
etcd.pem                                                                              100% 1415     2.5MB/s   00:00   
#
  4.3
创建etcd的systemd unit模块文件
注意: \ 这个符号需改成\ 

# cat etcd.service.template

Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
Documentation=https://github.com/coreos

User=k8s
Type=notify
WorkingDirectory=/var/lib/etcd/
ExecStart=/opt/k8s/bin/etcd \\
--data-dir=/var/lib/etcd \\
--name=##NODE_NAME## \\
--cert-file=/etc/etcd/cert/etcd.pem \\
--key-file=/etc/etcd/cert/etcd-key.pem \\
--trusted-ca-file=/etc/kubernetes/cert/ca.pem \\
--peer-cert-file=/etc/etcd/cert/etcd.pem \\
--peer-key-file=/etc/etcd/cert/etcd-key.pem \\
--peer-trusted-ca-file=/etc/kubernetes/cert/ca.pem \\
--peer-client-cert-auth \\
--client-cert-auth \\
--listen-peer-urls=https://##NODE_IP##:2380 \\
--initial-advertise-peer-urls=https://##NODE_IP##:2380 \\
--listen-client-urls=https://##NODE_IP##:2379,http://127.0.0.1:2379
\\
--advertise-client-urls=https://##NODE_IP##:2379 \\
--initial-cluster-token=etcd-cluster-0 \\
--initial-cluster=${ETCD_NODES} \\
--initial-cluster-state=new
Restart=on-failure
RestartSec=5
LimitNOFILE=65536

WantedBy=multi-user.target
#
  User :指定以 k8s 账户运行;
WorkingDirectory 、 --data-dir :指定工作目录和数据目录为
/var/lib/etcd ,需在启动服务前创建这个目录;
--name :指定节点名称,当 --initial-cluster-state 值为 new 时, --
name 的参数值必须位于 --initial-cluster 列表中;
--cert-file 、 --key-file :etcd server 与 client 通信时使用的证书和私钥;
--trusted-ca-file :签名 client 证书的 CA 证书,用于验证 client 证书;
--peer-cert-file 、 --peer-key-file :etcd 与 peer 通信使用的证书和私
钥;
--peer-trusted-ca-file :签名 peer 证书的 CA 证书,用于验证 peer 证书;
  分发生成的 systemd unit 文件,并修改好各节点配置文件里的##NODE_NAME##和##NODE_IP##

# cp etcd.service.template /etc/systemd/system/etcd.service
# scp etcd.service.template root@k8s-node1:/etc/systemd/system/etcd.service
etcd.service.template                                                                 100% 1038     1.1MB/s   00:00   
# scp etcd.service.template root@k8s-node2:/etc/systemd/system/etcd.service
etcd.service.template                                                                 100% 1038     1.2MB/s   00:00   
#
  ##各个节点修改下
  4.4
启动etcd

# systemctl daemon-reload && systemctl enable etcd && systemctl restart etcd
  启动报错

Aug 20 16:40:29 k8s-master systemd: etcd.service holdoff time over, scheduling restart.
Aug 20 16:40:29 k8s-master systemd: Starting Etcd Server...
Aug 20 16:40:29 k8s-master etcd: etcd Version: 3.3.7
Aug 20 16:40:29 k8s-master etcd: Git SHA: 56536de55
Aug 20 16:40:29 k8s-master etcd: Go Version: go1.9.6
Aug 20 16:40:29 k8s-master etcd: Go OS/Arch: linux/amd64
Aug 20 16:40:29 k8s-master etcd: setting maximum number of CPUs to 1, total number of available CPUs is 1
Aug 20 16:40:29 k8s-master etcd: peerTLS: cert = /etc/etcd/cert/etcd.pem, key = /etc/etcd/cert/etcd-key.pem, ca = , trusted-ca = /etc/kubernetes/cert/ca.pem, client-cert-auth = true, crl-file =
Aug 20 16:40:29 k8s-master etcd: open /etc/etcd/cert/etcd-key.pem: permission denied
Aug 20 16:40:29 k8s-master systemd: etcd.service: main process exited, code=exited, status=1/FAILURE
Aug 20 16:40:29 k8s-master systemd: Failed to start Etcd Server.
Aug 20 16:40:29 k8s-master systemd: Unit etcd.service entered failed state.
Aug 20 16:40:29 k8s-master systemd: etcd.service failed.
#
  明显  /etc/etcd/cert/etcd-key.pem: permission denied  没有权限

# pwd
/etc/etcd/cert
# ll
总用量 16
-rw-r--r-- 1 root root 1054 8月  20 15:39 etcd.csr
-rw-r--r-- 1 root root  213 8月  20 15:39 etcd-csr.json
-rw------- 1 root root 1679 8月  20 15:39 etcd-key.pem
-rw-r--r-- 1 root root 1415 8月  20 15:39 etcd.pem
#
  我们启用启动etcd的用户是k8s,而且这里没有x的权限。
修改权限设置

# chown -R k8s /etc/etcd/cert/
# chmod +x -R /etc/etcd/cert/
# ll
总用量 16
-rwxr-xr-x 1 k8s root 1054 8月  20 15:39 etcd.csr
-rwxr-xr-x 1 k8s root  213 8月  20 15:39 etcd-csr.json
-rwx--x--x 1 k8s root 1679 8月  20 15:39 etcd-key.pem
-rwxr-xr-x 1 k8s root 1415 8月  20 15:39 etcd.pem
  /etc/kubernetes/cert/ 权限也不对

# cd /etc/kubernetes/cert/
# ll
总用量 20
-rw-r--r-- 1 root root  292 8月  16 16:05 ca-config.json
-rw-r--r-- 1 root root  993 8月  16 16:05 ca.csr
-rw-r--r-- 1 root root  201 8月  16 16:05 ca-csr.json
-rw------- 1 root root 1675 8月  16 16:05 ca-key.pem
-rw-r--r-- 1 root root 1338 8月  16 16:05 ca.pem
# chown -R k8s /etc/kubernetes/cert/
# chmod -R +x /etc/kubernetes/cert
  正常启动的配置文件,见下:

# cat /etc/systemd/system/etcd.service

Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
Documentation=https://github.com/coreos

User=k8s
Type=notify
WorkingDirectory=/var/lib/etcd/
ExecStart=/opt/k8s/bin/etcd \
--data-dir=/var/lib/etcd \
--name=k8s-master \
--cert-file=/etc/etcd/cert/etcd.pem \
--key-file=/etc/etcd/cert/etcd-key.pem \
--trusted-ca-file=/etc/kubernetes/cert/ca.pem \
--peer-cert-file=/etc/etcd/cert/etcd.pem \
--peer-key-file=/etc/etcd/cert/etcd-key.pem \
--peer-trusted-ca-file=/etc/kubernetes/cert/ca.pem \
--peer-client-cert-auth \
--client-cert-auth \
--listen-peer-urls=https://192.168.1.92:2380 \
--initial-advertise-peer-urls=https://192.168.1.92:2380 \
--listen-client-urls=https://192.168.1.92:2379,http://127.0.0.1:2379 \
--advertise-client-urls=https://192.168.1.92:2379 \
--initial-cluster-token=etcd-cluster-0 \
--initial-cluster=k8s-master=https://192.168.1.92:2380,k8s-node1=https://192.168.1.93:2380,k8s-node2=https://192.168.1.95:2380 \
--initial-cluster-state=new
Restart=on-failure
RestartSec=5
LimitNOFILE=65536

WantedBy=multi-user.target
#
  4.5
验证etcd集群
报错:

# etcdctl cluster-health
failed to check the health of member 64fe8a986fbba907 on https://192.168.1.95:2379: Get https://192.168.1.95:2379/health: dial tcp 192.168.1.95:2379: getsockopt: no route to host
member 64fe8a986fbba907 is unreachable: are all unreachable
failed to check the health of member 9eddf87b04c89943 on https://192.168.1.93:2379: Get https://192.168.1.93:2379/health: dial tcp 192.168.1.93:2379: getsockopt: no route to host
member 9eddf87b04c89943 is unreachable: are all unreachable
failed to check the health of member d71352a6aad35c57 on https://192.168.1.92:2379: Get https://192.168.1.92:2379/health: x509: certificate signed by unknown authority
member d71352a6aad35c57 is unreachable: are all unreachable
cluster is unavailable
#
# etcdctl member list
client: etcd cluster is unavailable or misconfigured; error #0: client: endpoint https://192.168.1.95:2379 exceeded header timeout
; error #1: client: endpoint https://192.168.1.93:2379 exceeded header timeout
; error #2: x509: certificate signed by unknown authority
#
  logs

# cat /var/log/messages
Aug 20 18:06:36 k8s-master etcd: health check for peer 64fe8a986fbba907 could not connect: dial tcp 192.168.1.95:2380: getsockopt: no route to host
Aug 20 18:06:36 k8s-master etcd: health check for peer 9eddf87b04c89943 could not connect: dial tcp 192.168.1.93:2380: getsockopt: no route to host
Aug 20 18:06:36 k8s-master etcd: failed to reach the peerURL(https://192.168.1.95:2380) of member 64fe8a986fbba907 (Get https://192.168.1.95:2380/version: dial tcp 192.168.1.95:2380: getsockopt: no route to host)
Aug 20 18:06:36 k8s-master etcd: cannot get the version of member 64fe8a986fbba907 (Get https://192.168.1.95:2380/version: dial tcp 192.168.1.95:2380: getsockopt: no route to host)
Aug 20 18:06:36 k8s-master etcd: failed to reach the peerURL(https://192.168.1.93:2380) of member 9eddf87b04c89943 (Get https://192.168.1.93:2380/version: dial tcp 192.168.1.93:2380: getsockopt: no route to host)
Aug 20 18:06:36 k8s-master etcd: cannot get the version of member 9eddf87b04c89943 (Get https://192.168.1.93:2380/version: dial tcp 192.168.1.93:2380: getsockopt: no route to host)
Aug 20 18:06:39 k8s-master etcd: rejected connection from "192.168.1.92:50868" (error "remote error: tls: bad certificate", ServerName "")
Aug 20 18:06:40 k8s-master etcd: failed to reach the peerURL(https://192.168.1.95:2380) of member 64fe8a986fbba907 (Get https://192.168.1.95:2380/version: dial tcp 192.168.1.95:2380: getsockopt: no route to host)
Aug 20 18:06:40 k8s-master etcd: cannot get the version of member 64fe8a986fbba907 (Get https://192.168.1.95:2380/version: dial tcp 192.168.1.95:2380: getsockopt: no route to host)
Aug 20 18:06:40 k8s-master etcd: failed to reach the peerURL(https://192.168.1.93:2380) of member 9eddf87b04c89943 (Get https://192.168.1.93:2380/version: dial tcp 192.168.1.93:2380: getsockopt: no route to host)
Aug 20 18:06:40 k8s-master etcd: cannot get the version of member 9eddf87b04c89943 (Get https://192.168.1.93:2380/version: dial tcp 192.168.1.93:2380: getsockopt: no route to host)
Aug 20 18:06:41 k8s-master etcd: health check for peer 64fe8a986fbba907 could not connect: dial tcp 192.168.1.95:2380: getsockopt: no route to host
Aug 20 18:06:41 k8s-master etcd: health check for peer 9eddf87b04c89943 could not connect: dial tcp 192.168.1.93:2380: getsockopt: no route to host
Aug 20 18:06:42 k8s-master etcd: rejected connection from "192.168.1.92:50902" (error "remote error: tls: bad certificate", ServerName "")
Aug 20 18:06:44 k8s-master etcd: failed to reach the peerURL(https://192.168.1.95:2380) of member 64fe8a986fbba907 (Get https://192.168.1.95:2380/version: dial tcp 192.168.1.95:2380: getsockopt: no route to host)
Aug 20 18:06:44 k8s-master etcd: cannot get the version of member 64fe8a986fbba907 (Get https://192.168.1.95:2380/version: dial tcp 192.168.1.95:2380: getsockopt: no route to host)
#
  分析思路:
出问题的可能性:
配置文件配置出错
证书
网络
防火墙屏蔽了端口
  一个个来测试
  用telnet检查发现2379和2380,防火墙没有关闭。
关闭防火墙再测试,还是报错:

Aug 21 09:04:02 k8s-node1 etcd: rejected connection from "192.168.1.92:36138" (error "remote error: tls: bad certificate", ServerName "")
Aug 21 09:04:19 k8s-node1 etcd: rejected connection from "192.168.1.93:51698" (error "remote error: tls: bad certificate", ServerName "")
# etcdctl cluster-health
failed to check the health of member 64fe8a986fbba907 on https://192.168.1.95:2379: Get https://192.168.1.95:2379/health: x509: certificate signed by unknown authority
member 64fe8a986fbba907 is unreachable: are all unreachable
failed to check the health of member 9eddf87b04c89943 on https://192.168.1.93:2379: Get https://192.168.1.93:2379/health: x509: certificate signed by unknown authority
member 9eddf87b04c89943 is unreachable: are all unreachable
failed to check the health of member d71352a6aad35c57 on https://192.168.1.92:2379: Get https://192.168.1.92:2379/health: x509: certificate signed by unknown authority
member d71352a6aad35c57 is unreachable: are all unreachable
cluster is unavailable
  这个报错应该是证书的问题了
找资料发现,如果不带证书测试就是报这个错误,带证书后,测试正常,见下:

# etcdctl --ca-file=/etc/kubernetes/cert/ca.pem --cert-file=/etc/etcd/cert/etcd.pem --key-file=/etc/etcd/cert/etcd-key.pem --endpoints=https://192.168.1.92:2379,https://192.168.1.93:2379,https://192.168.1.95:2379 cluster-health
member 64fe8a986fbba907 is healthy: got healthy result from https://192.168.1.95:2379
member 9eddf87b04c89943 is healthy: got healthy result from https://192.168.1.93:2379
member d71352a6aad35c57 is healthy: got healthy result from https://192.168.1.92:2379
cluster is healthy
#
# etcdctl  --ca-file=/etc/kubernetes/cert/ca.pem --cert-file=/etc/etcd/cert/etcd.pem --key-file=/etc/etcd/cert/etcd-key.pem --endpoints=https://192.168.1.92:2379,https://192.168.1.93:2379,https://192.168.1.95:2379  member list
64fe8a986fbba907: name=k8s-node2 peerURLs=https://192.168.1.95:2380 clientURLs=https://192.168.1.95:2379 isLeader=true
9eddf87b04c89943: name=k8s-node1 peerURLs=https://192.168.1.93:2380 clientURLs=https://192.168.1.93:2379 isLeader=false
d71352a6aad35c57: name=k8s-master peerURLs=https://192.168.1.92:2380 clientURLs=https://192.168.1.92:2379 isLeader=false
#
  执行命令看看
  master创建

# etcdctl --ca-file=/etc/kubernetes/cert/ca.pem --cert-file=/etc/etcd/cert/etcd.pem --key-file=/etc/etcd/cert/etcd-key.pem mkdir test
# etcdctl --ca-file=/etc/kubernetes/cert/ca.pem --cert-file=/etc/etcd/cert/etcd.pem --key-file=/etc/etcd/cert/etcd-key.pem mkdir ls
# etcdctl --ca-file=/etc/kubernetes/cert/ca.pem --cert-file=/etc/etcd/cert/etcd.pem --key-file=/etc/etcd/cert/etcd-key.pem  ls
/test
/ls
  node2检索

# etcdctl --ca-file=/etc/kubernetes/cert/ca.pem --cert-file=/etc/etcd/cert/etcd.pem --key-file=/etc/etcd/cert/etcd-key.pem mkdir test
# etcdctl --ca-file=/etc/kubernetes/cert/ca.pem --cert-file=/etc/etcd/cert/etcd.pem --key-file=/etc/etcd/cert/etcd-key.pem mkdir ls
# etcdctl --ca-file=/etc/kubernetes/cert/ca.pem --cert-file=/etc/etcd/cert/etcd.pem --key-file=/etc/etcd/cert/etcd-key.pem  ls
/test
/ls
  数据同步了
  4.6
执行文件的属主和有没有执行x的权限,请小心对比检查。



页: [1]
查看完整版本: 部署k8s ssl集群实践4:部署etcd集群