我有一个代码,有一个数据模块和一个模型,我训练我的模型与射线教练,这里是我的代码:
class CSIDataset(pl.LightningDataModule):
def __init__(self, pkl_dir):
super().__init__()
self.samples = []
for file in os.listdir(pkl_dir):
sample = pickle.load(open(pkl_dir + file, 'rb'))
if sample['x_score'].shape[0] > 0:
self.samples += [sample]
def __getitem__(self, idx):
data = self.samples[idx]
return data['x_capture'], data['x_score'], data['label']
def __len__(self):
return len(self.samples)
class CSIModel(pl.LightningModule):
def __init__(self, config):
super().__init__()
self.config = config
self.criterion = nn.BCELoss()
self.capture_rnn = nn.Sequential(
nn.Linear(config['capture_input_dim'], config['d_Wa']),
nn.Tanh(),
nn.Dropout(config['dropout']),
nn.LSTM(
input_size=config['d_Wa'],
hidden_size=config['d_lstm'],
num_layers=1,
batch_first=True)
)
self.capture_proj = nn.Sequential(
nn.Linear(config['d_lstm'], config['d_Wr']),
nn.Tanh(),
nn.Dropout(config['dropout'])
)
self.score = nn.Sequential(
nn.Linear(config['score_input_dim'], config['d_Wu']),
nn.Tanh(),
nn.Linear(config['d_Wu'], config['d_Ws']),
nn.Sigmoid()
)
self.cls = nn.Sequential(
nn.Linear(config['d_Ws'] + config['d_Wr'], 1),
nn.Sigmoid()
)
def configure_optimizers(self):
all_params = dict(self.named_parameters())
wd_name = 'score.0.weight'
wd_params = all_params[wd_name]
del all_params[wd_name]
return torch.optim.Adam(
[
{'params': wd_params, 'weight_decay': self.config['weight_decay']},
{'params': list(all_params.values())},
],
lr=self.config['lr']
)
def count_parameters(self):
return sum(p.numel() for p in self.parameters() if p.requires_grad)
def forward(self, x_capture, x_score):
hc, (_, _) = self.capture_rnn(x_capture.float())
hc = self.capture_proj(hc[:, -1])
hs = self.score(x_score.float()).mean(dim=1)
h = torch.cat([hc, hs], dim=1)
return self.cls(h)
def step(self, batch, mode='train'):
x_capture, x_score, labels = batch
labels = labels[:, None].float()
logits = self.forward(x_capture, x_score)
loss = self.criterion(logits, labels)
preds = logits.clone()
preds[preds >=0.5] = 1
preds[preds < 0.5] = 0
acc = (preds == labels).sum() / labels.shape[0]
tn, fn, fp, tp = confusion_matrix(logits, labels.int(), num_classes=2, threshold=0.5).flatten()
self.log(f'{mode}_loss', loss.item())
self.log(f'{mode}_acc', acc.item())
self.log(f'{mode}_tn', tn.item())
self.log(f'{mode}_fn', fn.item())
self.log(f'{mode}_fp', fp.item())
self.log(f'{mode}_tp', tp.item())
return {
'loss':loss,
'acc':acc,
'tn':tn,
'fn':fn,
'fp':fp,
'tp':tp
}
def training_step(self, batch, batch_idx):
return self.step(batch)
def test_step(self, batch, batch_idx):
return self.step(batch, mode='test')
def validation_step(self, batch, batch_idx):
return self.step(batch, mode='val')
def experiment(args):
dataset = 'weibo'
path = f'assets/{dataset}/'
train_set = CSIDataset(pkl_dir=path + 'train/pkls/')
val_set = CSIDataset(pkl_dir=path + 'validation/pkls/')
train_loader = DataLoader(train_set, batch_size=1, shuffle=False, num_workers=1)
val_loader = DataLoader(val_set, batch_size=1, shuffle=False, num_workers=1)
conf = {
'capture_input_dim' : 112,
'score_input_dim' : 50,
'd_Wa': 100,
'd_lstm' : 50,
'd_Wr' : 100,
'd_Wu' : 100,
'd_Ws' : 1,
'lr': args['lr'],
'dropout' : args['dropout'],
'weight_decay' : args['weight_decay']
}
model = CSIModel(conf)
name = f"dataset={dataset}-do={args['dropout']}-lr={args['lr']}-wd={args['weight_decay']}"
save_dir = f'weights/{name}/'
logger = TensorBoardLogger(save_dir='logs/', name=name)
checkpoint = ModelCheckpoint(
dirpath=save_dir,
filename='{epoch}-{val_loss:.2f}',
monitor='val_loss',
mode='min',
save_top_k=10,
every_n_epochs = 5
)
# reporter = TuneReportCallback(
# {
# "loss": "ptl/val_loss",
# "mean_accuracy": "ptl/val_acc"
# },
# on="validation_end"
# )
os.makedirs(save_dir, exist_ok=True)
json.dump(conf, open(save_dir + 'config.json', 'w'))
trainer = Trainer(
benchmark=True,
gpus=[1],
accumulate_grad_batches=64,
logger=logger,
enable_progress_bar=False,
max_epochs=10,
callbacks=[checkpoint]
)
trainer.fit(model, train_loader, val_loader)
res = trainer.validate(val_loader)[0]
tune.report(**res)
analysis = tune.run(
experiment,
num_samples=4,
resources_per_trial={"cpu": 1, "gpu": 2},
verbose=1,
config={
"weight_decay": tune.grid_search([0., 0.1, 0.01, 0.001]),
"lr": tune.loguniform(1e-5, 1e-1),
"dropout": tune.uniform(0., 0.3)
}
)
但在运行代码时,我收到了以下错误:
File "/tmp/ipykernel_2468582/2857088609.py", line 62, in experiment
File "/home/user/venv37/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 821, in validate
return self._call_and_handle_interrupt(self._validate_impl, model, dataloaders, ckpt_path, verbose, datamodule)
File "/home/user/venv37/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 685, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/home/user/venv37/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 864, in _validate_impl
results = self._run(model, ckpt_path=self.validated_ckpt_path)
File "/home/user/venv37/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1128, in _run
verify_loop_configurations(self)
File "/home/user/venv37/lib/python3.7/site-packages/pytorch_lightning/trainer/configuration_validator.py", line 40, in verify_loop_configurations
__verify_eval_loop_configuration(trainer, model, "val")
File "/home/user/venv37/lib/python3.7/site-packages/pytorch_lightning/trainer/configuration_validator.py", line 170, in __verify_eval_loop_configuration
has_step = is_overridden(step_name, model)
File "/home/user/venv37/lib/python3.7/site-packages/pytorch_lightning/utilities/model_helpers.py", line 47, in is_overridden
raise ValueError("Expected a parent")
ValueError: Expected a parent
尽管我的数据模块和模型都继承了pl.LightningModule
!我试过Python 3.8,torch==1.9.0和pytorch_lightning==1.5.5以及Python 3.7,但是错误仍然存在。任何解决方案都是值得赞赏的!我也试过this solution,但是没有成功!
1条答案
按热度按时间46scxncf1#
这可能涉及版本兼容性。
升级lightning并使用导入的lightning.pytorch作为pl
而不是将pytorch_lightning作为pl导入
对我有用