1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
   |  import optparse from pprint import pprint import logging import sys import math import ast
  ''' Reserved for OS + DN + NM, Map: Memory => Reservation ''' reservedStack = { 4:1, 8:2, 16:2, 24:4, 48:6, 64:8, 72:8, 96:12,                     128:24, 256:32, 512:64} ''' Reserved for HBase. Map: Memory => Reservation '''    reservedHBase = {4:1, 8:1, 16:2, 24:4, 48:8, 64:8, 72:8, 96:16,                     128:24, 256:32, 512:64} GB = 1024
  def getMinContainerSize(memory):   if (memory <= 4):     return 256   elif (memory <= 8):     return 512   elif (memory <= 24):     return 1024   else:     return 2048   pass
  def getReservedStackMemory(memory):   if (reservedStack.has_key(memory)):     return reservedStack[memory]   if (memory <= 4):     ret = 1   elif (memory >= 512):     ret = 64   else:     ret = 1   return ret
  def getReservedHBaseMem(memory):   if (reservedHBase.has_key(memory)):     return reservedHBase[memory]   if (memory <= 4):     ret = 1   elif (memory >= 512):     ret = 64   else:     ret = 2   return ret                      def main():   log = logging.getLogger(__name__)   out_hdlr = logging.StreamHandler(sys.stdout)   out_hdlr.setFormatter(logging.Formatter(' %(message)s'))   out_hdlr.setLevel(logging.INFO)   log.addHandler(out_hdlr)   log.setLevel(logging.INFO)   parser = optparse.OptionParser()   memory = 0   cores = 0   disks = 0   hbaseEnabled = True   parser.add_option('-c', '--cores', default = 16,                      help = 'Number of cores on each host')   parser.add_option('-m', '--memory', default = 64,                      help = 'Amount of Memory on each host in GB')   parser.add_option('-d', '--disks', default = 4,                      help = 'Number of disks on each host')   parser.add_option('-k', '--hbase', default = "True",                     help = 'True if HBase is installed, False is not')   (options, args) = parser.parse_args()      cores = int (options.cores)   memory = int (options.memory)   disks = int (options.disks)   hbaseEnabled = ast.literal_eval(options.hbase)      log.info("Using cores=" + str(cores) + " memory=" + str(memory) + "GB" +             " disks=" + str(disks) + " hbase=" + str(hbaseEnabled))   minContainerSize = getMinContainerSize(memory)   reservedStackMemory = getReservedStackMemory(memory)   reservedHBaseMemory = 0   if (hbaseEnabled):     reservedHBaseMemory = getReservedHBaseMem(memory)   reservedMem = reservedStackMemory + reservedHBaseMemory   usableMem = memory - reservedMem   memory -= (reservedMem)   if (memory < 2):     memory = 2     reservedMem = max(0, memory - reservedMem)        memory *= GB      containers = int (min(2 * cores,                          min(math.ceil(1.8 * float(disks)),                               memory/minContainerSize)))   if (containers <= 2):     containers = 3
    log.info("Profile: cores=" + str(cores) + " memory=" + str(memory) + "MB"            + " reserved=" + str(reservedMem) + "GB" + " usableMem="            + str(usableMem) + "GB" + " disks=" + str(disks))        container_ram = abs(memory/containers)   if (container_ram > GB):     container_ram = int(math.floor(container_ram / 512)) * 512   log.info("Num Container=" + str(containers))   log.info("Container Ram=" + str(container_ram) + "MB")   log.info("Used Ram=" + str(int (containers*container_ram/float(GB))) + "GB")   log.info("Unused Ram=" + str(reservedMem) + "GB")   log.info("yarn.scheduler.minimum-allocation-mb=" + str(container_ram))   log.info("yarn.scheduler.maximum-allocation-mb=" + str(containers*container_ram))   log.info("yarn.nodemanager.resource.memory-mb=" + str(containers*container_ram))   map_memory = container_ram   reduce_memory = 2*container_ram if (container_ram <= 2048) else container_ram   am_memory = max(map_memory, reduce_memory)   log.info("mapreduce.map.memory.mb=" + str(map_memory))   log.info("mapreduce.map.java.opts=-Xmx" + str(int(0.8 * map_memory)) +"m")   log.info("mapreduce.reduce.memory.mb=" + str(reduce_memory))   log.info("mapreduce.reduce.java.opts=-Xmx" + str(int(0.8 * reduce_memory)) + "m")   log.info("yarn.app.mapreduce.am.resource.mb=" + str(am_memory))   log.info("yarn.app.mapreduce.am.command-opts=-Xmx" + str(int(0.8*am_memory)) + "m")   log.info("mapreduce.task.io.sort.mb=" + str(int(0.4 * map_memory)))   pass
  if __name__ == '__main__':   try:     main()   except(KeyboardInterrupt, EOFError):     print("\nAborting ... Keyboard Interrupt.")     sys.exit(1)
 
  |