# !/usr/bin/python
# this script makes assumptions about the physical environment.
# 1) each rack is its own layer 3 network with a /24 subnet, which
# could be typical where each rack has its own
# switch with uplinks to a central core router.
#
# +-----------+
# |core router|
# +-----------+
# / \
# +-----------+ +-----------+
# |rack switch| |rack switch|
# +-----------+ +-----------+
# | data node | | data node |
# +-----------+ +-----------+
# | data node | | data node |
# +-----------+ +-----------+
#
# 2) topology script gets list of IP's as input, calculates network address, and prints '/network_address/ip'.
import netaddr
import sys
sys.argv.pop(0) # discard name of topology script from argv list as we just want IP addresses
netmask = '255.255.255.0' # set netmask to what's being used in your environment. The example uses a /24
for ip in sys.argv: # loop over list of datanode IP's
address = '{0}/{1}'.format(ip, netmask) # format address string so it looks like 'ip/netmask' to make netaddr work
try:
network_address = netaddr.IPNetwork(address).network # calculate and print network address
print "/{0}".format(network_address)
except:
print "/rack-unknown" # print catch-all value if unable to calculate network address
1条答案
按热度按时间c9x0cxw01#
必须配置系统以指定如何确定机架信息。例如,这个cloudera链接告诉您如何在cloudera manager中为主机配置机架。
或者,这个apache链接解释了如何通过配置文件在java类的外部脚本中指定这些信息。
拓扑结构通常采用/myrack/myhost的形式,不过您可以使用更深层的层次结构。它们有下面的python示例,假设每个机架都有一个/24子网,因此提取ip地址的前三个字节用作机架号-如果可以相应地设置节点ip地址,则可以采用类似的方法,或者编写您自己的脚本,根据每个节点上的ip地址或其他可用信息确定机架(在您的示例中,即使是主机名和机架之间的简单硬编码Map也适用于相对较少的节点)。