import os def check_gpus(): if not 'NVIDIA System Management' in os.popen('nvidia-smi -h').read(): print("'nvidia-smi' tool not found.") return False return True class GPUManager(): """ To automatic allocate the gpu, which returns the gpu with the largest free memory rate, unless the specified_device has been set up When gpus is unavailable, return 'cpu'; The implementation of GPUManager is referred to https://github.com/QuantumLiu/tf_gpu_manager """ def __init__(self, gpu_available=False, specified_device=-1): self.gpu_avaiable = gpu_available and check_gpus() self.specified_device = specified_device if self.gpu_avaiable: self.gpus = self._query_gpus() for gpu in self.gpus: gpu['allocated'] = False else: self.gpus = None def _sort_by_memory(self, gpus, by_size=False): if by_size: return sorted(gpus, key=lambda d: d['memory.free'], reverse=True) else: print('Sorted by free memory rate') return sorted( gpus, key=lambda d: float(d['memory.free']) / d['memory.total'], reverse=True) def _query_gpus(self): args = ['index', 'gpu_name', 'memory.free', 'memory.total'] cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format( ','.join(args)) results = os.popen(cmd).readlines() return [self._parse(line, args) for line in results] def _parse(self, line, args): numberic_args = ['memory.free', 'memory.total'] to_numberic = lambda v: float(v.upper().strip().replace('MIB', ''). replace('W', '')) process = lambda k, v: (int(to_numberic(v)) if k in numberic_args else v.strip()) return { k: process(k, v) for k, v in zip(args, line.strip().split(',')) } def auto_choice(self): """ To allocate a device """ if self.gpus is None: return 'cpu' elif self.specified_device >= 0: # allow users to specify the device return 'cuda:{}'.format(self.specified_device) else: for old_infos, new_infos in zip(self.gpus, self._query_gpus()): old_infos.update(new_infos) unallocated_gpus = [ gpu for gpu in self.gpus if not gpu['allocated'] ] if len(unallocated_gpus) == 0: # reset when all gpus have been allocated unallocated_gpus = self.gpus for gpu in self.gpus: gpu['allocated'] = False chosen_gpu = self._sort_by_memory(unallocated_gpus, True)[0] chosen_gpu['allocated'] = True index = chosen_gpu['index'] return 'cuda:{:s}'.format(index) # for testing if __name__ == '__main__': gpu_manager = GPUManager(gpu_available=True, specified_device=0) for i in range(20): print(gpu_manager.auto_choice())