当前位置: 代码迷 >> 综合 >> keras--多GPU训练
  详细解决方案

keras--多GPU训练

热度:129   发布时间:2023-10-27 03:00:34.0
		from keras.callbacks import TensorBoard, ModelCheckpointfrom keras.utils import multi_gpu_model # 导入keras多卡函数class ParallelModelCheckpoints(ModelCheckpoint): # 在保存模型时,由于存在两个模型,所以需要指定model,\# 继承ModelCheckpoint,重写init()def __init__(self, model, # 需要保存的模型filepath='./log/epoch-{epoch:02d}_loss-{loss:.4f}_acc-{val_acc:.4f}_lr-{lr:.5f}.h5',monitor='val_acc',verbose=1,save_best_only=True,save_weights_only=False,mode='auto',period=1):self.single_model = modelsuper(ParallelModelCheckpoints, self).__init__(filepath, monitor, verbose,save_best_only, save_weights_only, mode, period)def set_model(self, model):super(ParallelModelCheckpoints, self).set_model(self.single_model)# 首先在cpu上创建原来的模型with tf.device('/cpu:0'):model = MobileNet(...)# 创建多卡模型parallel_model = multi_gpu_model(model, gpus=4) # 其中 4 是gpu的数量parallel_model.load_weights(h5_path, by_name=True) # 继续训练的时候导入参数是用的parallel_model模型,而不是modelparallel_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])model_checkpoint = ParallelModelCheckpoints(model) # 设置需要保存h5的模型print("Start training the model")  # 然后就可以训练了training_history = parallel_model.fit_generator(train_generator,steps_per_epoch=step_size_train,validation_data=validation_generator,validation_steps=step_size_valid,epochs=epoch_list[-1],verbose=1,callbacks=[TensorBoard(log_dir='./tb'), model_checkpoint, stepDecayLR])print("Model training finished")
  相关解决方案